Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct AutoCorrVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using acf_v = AutoCorrVisitor<T, I, A>;
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor class calculates the autocorrelation of given column. The result is a vector of auto correlations with lags of 0 up to max_lag. result[0] is always 1 which is lag of 0. Lags are the indices of the result vector.

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets.

  explicit
  AutoCorrVisitor (size_type max_lag);

  max_lag: Lag periods requested
get_result(): Returns the auto-correlations. In case of a scalar data column, result is a vector of auto correlations. In case of a multidimensional data column, result is a vector of vectors of component-wise auto-correlations.
T: Column data type. T must be an arithmetic-enabled type
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct FixedAutoCorrVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using facf_v = FixedAutoCorrVisitor<T, I, A>;
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor class calculates the autocorrelation of given column. The calculation lag is fixed with the given lag parameter. See roll_policy for how the calculations are done

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets.

  FixedAutoCorrVisitor (size_type lag_period, roll_policy rp);

  lag_period: Period to offset the correlation calculations
  rp: How to roll over the input with the lagged period. See roll_policy
get_result(): Returns the fixed-auto-correlations. In case of a scalar data column, result is a vector of auto correlations. In case of a multidimensional data column, result is a vector of vectors of component-wise auto-correlations.
T: Column data type. T must be an arithmetic-enabled type
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct PartialAutoCorrVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using pacf_v = PartialAutoCorrVisitor<T, I, A>;
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor class calculates the partial autocorrelation of given column. In time series analysis, the partial autocorrelation function (PACF) gives the partial correlation of a stationary time series with its own lagged values, regressed the values of the time series at all shorter lags. It contrasts with the autocorrelation function, which does not control for other lags.
This calculation requires intensive matrix arithmetic, and it is time consuming. For that reason, the max lag is limited to up to 375 periods. It is recommended to enable multithreading for higher lag periods and bigger time-series.

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. In case of multidimensional datasets, you are dealing with Partial Cross-Correlation or, more formally, the Partial Autocorrelation Matrix (sometimes called the PACF matrix or derived from the Partial Autocorrelation Function of a VAR model).

  explicit
  PartialAutoCorrVisitor (size_type max_lag);

  max_lag: Lag periods requested
get_result(): Returns the partial-auto-correlations. In case of a scalar data column, result is a vector of auto correlations. In case of a multidimensional data column, result is a vector of square DxD matrices of PA-correlationss, where D is the data dimension.
T: Column data type. T must be an arithmetic-enabled type
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_auto_correlation()  {

    std::cout << "\nTesting Auto Correlation ..." << std::endl;

    StlVecType<unsigned long>  idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466, 123467, 123468, 123469, 123470, 123471, 123472, 123473 };
    StlVecType<double>         d1 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0, 0.387, 0.123, 1.06, 0.65, 2.03, 0.4, 1.0, 0.007 };
    StlVecType<double>         d2 = { 1.23, 1.22, 1.21, 1.20, 1.19, 1.185, 1.181, 1.19, 1.195, 1.189, 1.185, 1.18, 1.181, 1.186, 1.189, 1.19, 1.194, 1.198, 1.199, 1.197, 1.193 };
    StlVecType<int>            i1 = { 22, 23, 24, 25, 99 };
    MyDataFrame                df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", i1));

    AutoCorrVisitor<double> auto_corr { 15 };
    auto                    fut = df.single_act_visit_async<double>("col_1", auto_corr);
    const auto              &result = fut.get().get_result();

    assert(result.size() == 15);
    assert(result[0] == 1.0);
    assert(fabs(result[1] - 0.562001) < 0.00001);
    assert(fabs(result[6] - 0.388131) < 0.00001);
    assert(fabs(result[10] - 0.125514) < 0.00001);

    const auto  &result2 = df.single_act_visit<double>("col_2", auto_corr).get_result();

    assert(result2.size() == 15);
    assert(result2[0] == 1.0);
    assert(fabs(result2[1] - 0.903754) < 0.00001);
    assert(fabs(result2[6] - -0.263385) < 0.00001);
    assert(fabs(result2[10] - -0.712274) < 0.00001);

    const MyDataFrame   df_c = df;

    const auto  &result3 = df_c.single_act_visit<double>("col_2", auto_corr).get_result();

    assert(result3.size() == 15);
    assert(result3[0] == 1.0);
    assert(fabs(result3[1] - 0.903754) < 0.00001);
    assert(fabs(result3[6] - -0.263385) < 0.00001);
    assert(fabs(result3[10] - -0.712274) < 0.00001);

    // Now multidimensional data
    //
    RandGenParams<double>   p;

    p.seed = 123;
    p.min_value = -5.0;
    p.max_value = 5.0;

    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;

    // Generate and load a random column
    //
    auto                    rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p);
    StlVecType<ary_col_t>   array_col(df.get_index().size());

    for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i)  {
        for (std::size_t d { 0 }; d < dim; ++d)
            array_col[i][d] = rand_vec[j++];
    }
    df.load_column<ary_col_t>("md_array_col", std::move(array_col));

    AutoCorrVisitor<ary_col_t>  md_auto_corr { 15 };

    df.single_act_visit<ary_col_t>("md_array_col", md_auto_corr);

    const auto  &md_result = md_auto_corr.get_result();

    assert(md_result.size() == 15);
    for (const auto &vec : md_result)
        assert(vec.size() == dim);
    assert(std::fabs(md_result[0][0] - 1.0) < 0.000001);
    assert(std::fabs(md_result[0][1] - 1.0) < 0.000001);
    assert(std::fabs(md_result[0][2] - 1.0) < 0.000001);
    assert(std::fabs(md_result[7][0] - -0.256401) < 0.000001);
    assert(std::fabs(md_result[7][2] - -0.071576) < 0.000001);
    assert(std::fabs(md_result[14][1] - -0.348612) < 0.000001);
    assert(std::fabs(md_result[14][2] - 0.050936) < 0.000001);
}

// -----------------------------------------------------------------------------

static void test_FixedAutoCorrVisitor()  {

    std::cout << "\nTesting FixedAutoCorrVisitor{  } ..." << std::endl;

    StrDataFrame    df;

    try  {
        df.read("IBM.csv", io_format::csv2);

        FixedAutoCorrVisitor<double, std::string>   fac { 31, roll_policy::blocks };

        df.single_act_visit<double> ("IBM_Close", fac);

        assert(fac.get_result().size() == 162);
        assert(std::abs(fac.get_result()[0] - -0.5436) < 0.0001);
        assert(std::abs(fac.get_result()[12] - 0.1328) < 0.001);
        assert(std::abs(fac.get_result()[14] - -0.594) < 0.0001);
        assert(std::abs(fac.get_result()[161] - 0.3364) < 0.0001);
        assert(std::abs(fac.get_result()[160] - -0.231) < 0.0001);
        assert(std::abs(fac.get_result()[159] - 0.075) < 0.001);

        FixedAutoCorrVisitor<double, std::string> fac2 { 31, roll_policy::continuous };

        df.single_act_visit<double> ("IBM_Close", fac2);

        assert(fac2.get_result().size() == 5000);
        assert(std::abs(fac2.get_result()[0] - -0.5436) < 0.0001);
        assert(std::abs(fac2.get_result()[12] - -0.7213) < 0.001);
        assert(std::abs(fac2.get_result()[14] - -0.6657) < 0.0001);
        assert(std::isnan(fac2.get_result()[4999]));
        assert(std::abs(fac2.get_result()[4998] - -1.0) < 0.001);
        assert(std::abs(fac2.get_result()[4997] - -0.9617) < 0.0001);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    // Now multidimensional data
    //
    RandGenParams<double>   p;

    p.seed = 123;
    p.min_value = -5.0;
    p.max_value = 5.0;

    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;

    // Generate and load 3 random columns
    //
    auto    rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p);

    StlVecType<ary_col_t>   array_col(df.get_index().size());

    for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i)  {
        for (std::size_t d { 0 }; d < dim; ++d)
            array_col[i][d] = rand_vec[j++];
    }
    df.load_column<ary_col_t>("md_array_col", std::move(array_col));

    FixedAutoCorrVisitor<ary_col_t, std::string>   md_b_fac { 31, roll_policy::blocks };

    df.single_act_visit<ary_col_t>("md_array_col", md_b_fac);

    const auto  &md_result1 = md_b_fac.get_result();

    assert(md_result1.size() == 162);
    for (const auto &vec : md_result1)
        assert(vec.size() == dim);
    assert(std::abs(md_result1[0][0] - -0.093056) < 0.000001);
    assert(std::abs(md_result1[10][0] - 0.00392099) < 0.00000001);
    assert(std::abs(md_result1[134][1] - -0.0776815) < 0.0000001);
    assert(std::abs(md_result1[161][2] - -0.608483) < 0.000001);

    FixedAutoCorrVisitor<ary_col_t, std::string> md_c_fac { 31, roll_policy::continuous };

    df.single_act_visit<ary_col_t>("md_array_col", md_c_fac);

    const auto  &md_result2 = md_c_fac.get_result();

    assert(md_result2.size() == 4998);
    for (const auto &vec : md_result2)
        assert(vec.size() == dim);
    assert(std::abs(md_result2[0][0] - -0.093056) < 0.000001);
    assert(std::abs(md_result2[1000][0] - -0.080953) < 0.000001);
    assert(std::abs(md_result2[1000][2] - 0.160131) < 0.000001);
    assert(std::abs(md_result2[4997][0] - -0.930264) < 0.000001);
    assert(std::abs(md_result2[4997][2] - 0.175084) < 0.000001);
}

// ----------------------------------------------------------------------------

static void test_PartialAutoCorrVisitor()  {

    std::cout << "\nTesting PartialAutoCorrVisitor{  } ..." << std::endl;

    StrDataFrame    df;

    try  {
        df.read("IBM.csv", io_format::csv2);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    PartialAutoCorrVisitor<double, std::string> pacf { 50 };

    df.single_act_visit<double> ("IBM_Close", pacf);

    assert(pacf.get_result().size() == 50);
    assert(std::fabs(pacf.get_result()[0] - 1.0) < 0.000001);
    assert(std::fabs(pacf.get_result()[1] - 0.999915) < 0.000001);
    assert(std::fabs(pacf.get_result()[10] - 0.982959) < 0.000001);
    assert(std::fabs(pacf.get_result()[30] - 0.983226) < 0.000001);
    assert(std::fabs(pacf.get_result()[48] - 0.98751) < 0.000001);
    assert(std::fabs(pacf.get_result()[49] - 0.987886) < 0.000001);

    // Now multidimensional data
    //
    RandGenParams<double>   p;

    p.seed = 123;
    p.min_value = -3.0;
    p.max_value = 3.0;

    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;

    // Generate and load 3 random columns
    //
    auto    rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p);

    std::vector<ary_col_t>   array_col(df.get_index().size());

    for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i)  {
        for (std::size_t d { 0 }; d < dim; ++d)
            array_col[i][d] = rand_vec[j++];
    }
    df.load_column<ary_col_t>("md_array_col", std::move(array_col));

    PartialAutoCorrVisitor<ary_col_t, std::string>  md_pacf { 50 };

    df.single_act_visit<ary_col_t>("md_array_col", md_pacf);

    const auto  &md_result = md_pacf.get_result();

    assert(md_result.size() == 50);
    for (const auto &mtx : md_result)  {
        assert(mtx.rows() == dim);
        assert(mtx.rows() == dim);
    }
    // First one must be the identity matrix
    //
    for (long r = 0; r < md_result[0].rows(); ++r)  {
        for (long c = 0; c < md_result[0].cols(); ++c)  {
            if (r == c)
                assert(md_result[0](r, c) == 1.0);
            else
                assert(md_result[0](r, c) == 0.0);
        }
    }
    assert(std::fabs(md_result[1](0, 0) - 0.0121899) < 0.000001);
    assert(std::fabs(md_result[1](1, 2) - 0.0032037) < 0.000001);
    assert(std::fabs(md_result[31](1, 1) - -0.0123611) < 0.000001);
    assert(std::fabs(md_result[31](2, 0) - -0.0122399) < 0.000001);
    assert(std::fabs(md_result[49](1, 0) - 0.00041) < 0.00001);
    assert(std::fabs(md_result[49](2, 2) - -0.00258) < 0.00001);
}


C++ DataFrame