| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct ShapiroWilkTestVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using swilk_test_v = ShapiroWilkTestVisitor<T, I>; |
This functor class calculates the Shapiro Wilk Test The Shapiro-Wilk test is a statistical test used to determine if a sample of data comes from a normally distributed population. It assesses how closely the data distribution matches a normal distribution with the same mean and standard deviation. A low p-value (typically less than 0.05) suggests the data is likely not normally distributed. It's particularly useful when you have a small to moderate sample size (although it can handle larger samples as well) and before using parametric statistical methods that assume normality. get_result() returns the test statistics (W). This value ranges from 0 to 1, with values closer to 1 suggesting a better fit to a normal distribution. get_p_value() member function returns the p-value. This value represents the probability of observing data as extreme as, or more extreme than, the data you have, assuming the data is actually normally distributed.
ShapiroWilkTestVisitor();
|
T: Column data type. I: Index type. |
static void test_ShapiroWilkTestVisitor() { std::cout << "\nTesting ShapiroWilkTestVisitor{ } ..." << std::endl; StrDataFrame ibm; try { ibm.read("IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } const auto col_s = ibm.get_index().size(); RandGenParams<double> p1 { .min_value = 99, .max_value = 200, .seed = 123 }; ibm.load_column("uniform", gen_uniform_real_dist<double>(col_s, p1)); ibm.load_column("exponential", gen_exponential_dist<double>(col_s, p1)); ibm.load_column("lognormal", gen_lognormal_dist<double>(col_s, p1)); ibm.load_column("normal", gen_normal_dist<double>(col_s, p1)); RandGenParams<double> p2 { .seed = 123, .mean = 0, .std = 1.0 }; ibm.load_column("std_normal", gen_normal_dist<double>(col_s, p2)); ShapiroWilkTestVisitor<double, std::string> swt; ibm.single_act_visit<double>("IBM_Close", swt); assert((std::fabs(swt.get_result() - 0.953874) < 0.000001)); assert((std::fabs(swt.get_p_value() - 3.56659e-37) < 0.0000000000001)); ibm.single_act_visit<double>("uniform", swt); assert((std::fabs(swt.get_result() - 0.954293) < 0.000001)); assert((std::fabs(swt.get_p_value() - 4.8411e-37) < 0.0000000000001)); ibm.single_act_visit<double>("exponential", swt); assert((std::fabs(swt.get_result() - 0.819813) < 0.000001)); assert((std::fabs(swt.get_p_value() - 9.28713e-60) < 0.0000000000001)); ibm.single_act_visit<double>("lognormal", swt); assert((std::fabs(swt.get_result() - 0.524102) < 0.000001)); assert(swt.get_p_value() == 0.0); ibm.single_act_visit<double>("normal", swt); assert((std::fabs(swt.get_result() - 0.99974) < 0.00001)); assert((std::fabs(swt.get_p_value() - 0.816855) < 0.000001)); ibm.single_act_visit<double>("std_normal", swt); assert((std::fabs(swt.get_result() - 0.99974) < 0.00001)); assert((std::fabs(swt.get_p_value() - 0.816855) < 0.000001)); }