← Back to Documentations

Signature	Description	Parameters
#include <DataFrame/DataFrameStatsVisitors.h> template<arithmetic T, typename I = unsigned long> struct ChiSquaredTestVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using chis_test_v = ChiSquaredTestVisitor<T, I>;	This functor class calculates the Chi Squared Test A chi-squared test χ² is a statistical hypothesis test that determines if there is a significant difference between expected and observed frequencies in one or more categories. It is used to test for a relationship between two categorical variables (test of independence) or to see if the observed distribution of a single categorical variable matches a specific distribution (goodness of fit test). The basic idea is to compare actual and expected values, and if the calculated χ² value is greater than the critical value, you reject the null hypothesis that there is no difference. In this implementation The test take two time-series in this order; obserebd and expected. The get_result() returns the test statistics or the so called χ². The get_p_value(size_type degree_of_freedom) member function returns the p-value that represents the probability of obtaining a test statistic as extreme as, or more extreme than, the one calculated from the data, assuming the null hypothesis is true. It is used to determine the statistical significance of the result: if the p-value is less than a pre-determined significance level (commonly 0.05), the null hypothesis is rejected. ChiSquaredTestVisitor();	T: Column data type. I: Index type.

static void test_ChiSquaredTestVisitor()  {

    std::cout << "\nTesting ChiSquaredTestVisitor{ } ..." << std::endl;

    std::vector<unsigned long>  idx = { 123450, 123451, 123452, 123453, 123454 };
    std::vector<double>         ob1 = { 1, 2, 3, 4, 5 };
    std::vector<double>         ex1 = { 8, 9, 10, 11, 12 };
    std::vector<double>         ob2 = { 5, 18, 42, 27, 8 };
    std::vector<double>         ex2 = { 8, 20, 36, 24, 12 };
    std::vector<double>         ob3 = { 2, 5, 6, 8, 4 };
    std::vector<double>         ex3 = { 5, 5, 5, 5, 5 };
    MyDataFrame                 df;

    df.load_data(std::move(idx),
                 std::make_pair("observation 1", ob1),
                 std::make_pair("expected 1", ex1),
                 std::make_pair("observation 2", ob2),
                 std::make_pair("expected 2", ex2),
                 std::make_pair("observation 3", ob3),
                 std::make_pair("expected 3", ex3));

    ChiSquaredTestVisitor<double>   chi;

    df.single_act_visit<double, double>("observation 1", "expected 1", chi);
    assert(std::fabs(chi.get_result() - 25.0073) < 0.0001);
    assert(chi.get_p_value(4) < 0.0000000001);

    df.single_act_visit<double, double>("observation 2", "expected 2", chi);
    assert(std::fabs(chi.get_result() - 4.0333) < 0.0001);
    assert(std::fabs(chi.get_p_value(4) - 0.4953) < 0.0001);

    df.single_act_visit<double, double>("observation 3", "expected 3", chi);
    assert(std::fabs(chi.get_result() - 4.0) < 0.0001);
    assert(std::fabs(chi.get_p_value(4) - 0.5) < 0.0001);
}