← Back to Documentations

Signature	Description	Parameters
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct MannWhitneyUTestVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using mwu_test_v = MannWhitneyUTestVisitor<T, I>;	This functor class calculates Mann-Whitney U Test The Mann-Whitney U test is a non-parametric statistical test used to compare two independent groups, particularly when data are not normally distributed, by ranking all observations and analyzing the differences in their ranks. The primary purpose of the Mann-Whitney U test is to determine if there's a statistically significant difference between the distributions of two independent groups. It's often used as an alternative to the independent samples t-test when the assumptions of normality or equal variances are violated. The null hypothesis (H0) is that the two populations are equal. The alternative hypothesis (H1) is that the two populations are not equal The following returns the results: get_result(): It returns the minimum of the two U statistics. U statistics represents the number of times observations from one group precede observations from another group in the ranking order. Essentially, it measures the degree of overlap between two independent samples. The smaller the U value, the greater the difference between the two groups, indicating less overlap. get_u1(): It returns the U1 statistics. get_u2(): It returns the U2 statistics. get_zscore(): It returns the z-score. Z-scores represents the standardized value of the U statistic. It allows you to determine how far the observed U value is from the expected U value under the null hypothesis, in terms of standard deviations. get_pvalue(): It returns the p-value. P-value represents the probability of observing the data, or something more extreme, under the null hypothesis. The null hypothesis typically states that there is no difference between the two groups being compared. A small p-value (commonly less than 0.05) suggests that the observed difference between the groups is unlikely to have occurred by chance, leading to the rejection of the null hypothesis. MannWhitneyUTestVisitor();	T: Column data type. I: Index type.

static void test_MannWhitneyUTestVisitor()  {

    std::cout << "\nTesting MannWhitneyUTestVisitor{ } ..." << std::endl;

    StrDataFrame    ibm;

    try  {
        ibm.read("IBM.csv", io_format::csv2);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    MinVisitor<double, std::string> min_val;
    MaxVisitor<double, std::string> max_val;

    ibm.single_act_visit<double>("IBM_Close", min_val);
    ibm.single_act_visit<double>("IBM_Close", max_val);

    const auto              col_s = ibm.get_index().size();
    RandGenParams<double>   p { .min_value = min_val.get_result(), .max_value = max_val.get_result(), .seed = 123 };

    ibm.load_column("uniform", gen_uniform_real_dist<double>(col_s, p));
    ibm.load_column("exponential", gen_exponential_dist<double>(col_s, p));
    ibm.load_column("lognormal", gen_lognormal_dist<double>(col_s, p));

    mwu_test_v<double, std::string> mwu_test;

    ibm.single_act_visit<double, double>("IBM_Close", "IBM_Open", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 12643394.5) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 12667566.5) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 12643394.5) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - -0.083) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 0.9339) < 0.0001));

    ibm.single_act_visit<double, double>("IBM_Low", "IBM_High", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 12213043.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 12213043.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 13097918.0) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - -3.0369) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 0.0024) < 0.0001));

    ibm.single_act_visit<double, double>("IBM_Close", "uniform", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 11666858.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 11666858.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 13644103.0) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - -6.7858) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 0.0) < 0.0001));

    ibm.single_act_visit<double, double>("IBM_Close", "lognormal", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 30.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 25310931.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 30.0) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - -86.8661) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 0.0) < 0.0001));

    ibm.single_act_visit<double, double>("uniform", "exponential", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 0.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 25310961.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 0.0) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - -86.8663) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 0.0) < 0.0001));

    ibm.single_act_visit<double, double>("exponential", "lognormal", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 9727387.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 9727387.0) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 15583574.0) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - -20.0982) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 0.0) < 0.0001));

    ibm.single_act_visit<double, double>("IBM_Close", "IBM_Close", mwu_test);
    assert((std::fabs(mwu_test.get_result() - 12655480.5) < 0.0001));
    assert((std::fabs(mwu_test.get_u1() - 12655480.5) < 0.0001));
    assert((std::fabs(mwu_test.get_u2() - 12655480.5) < 0.0001));
    assert((std::fabs(mwu_test.get_zscore() - 0.0) < 0.001));
    assert((std::fabs(mwu_test.get_pvalue() - 1.0) < 0.0001));
}