Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct StdVisitor;
This functor class calculates the standard deviation of a given column.
This works with both scalar and multidimensional (i.e. vector and arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface.
          explicit
          StdVisitor(bool bias = false,
                     bool skipnan = false,
                     bool stable_algo = false);
        
bias: If true it divides by n - 1, otherwise by n.
skip_nan: If true it skips over nan numbers as if they didn't exist.
stable_algo: If true, it uses a version of Kahan summation that is numerically stable for data with very large values. Kahan summation is slower than regular summation, so only use it, if your data contains very large values.

There are also the following member functions:
get_result(): Returns the standard deviation.
              In case of scalar dataset, the standard deviation is a single number. In case of multidimensional
              dataset, the standard deviation is a square matrix of data dimension rows and columns.
get_count(): Returns the number of valid datapoints (none NaN)
get_mean(): Returns the mean of the time-series. In case of scalar dataset,
            the mean is a single number. In case of multidimensional dataset, the mean is
            a vector of size data dimension.
        
T: Column data type.
I: Index type.

void test_md_stats()  {

    std::cout << "\nTesting get_md_stats( ) ..." << std::endl;

    constexpr std::size_t   item_cnt = 1024;
    ULDataFrame             df;

    df.load_index(ULDataFrame::gen_sequence_index(0, item_cnt, 1));

    RandGenParams<double>   p;

    p.seed = 123;
    p.min_value = 0.5;
    p.max_value = 2.0;

    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;
    using vec_col_t = std::vector<double>;

    // Generate and load 3 random columns
    //
    auto    rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p);

    std::vector<ary_col_t>  array_col(df.get_index().size());
    std::vector<ary_col_t>  array_col2(df.get_index().size());
    std::vector<vec_col_t>  vector_col(df.get_index().size());

    for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i)  {
        vector_col[i].resize(dim);
        for (std::size_t d { 0 }; d < dim; ++d)
            array_col[i][d] = vector_col[i][d] = rand_vec[j++];
    }
    df.load_column<ary_col_t>("array_col", std::move(array_col));
    df.load_column<vec_col_t>("vector_col", std::move(vector_col));

    p.seed = 1024;
    rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p);
    for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i)  {
        for (std::size_t d { 0 }; d < dim; ++d)
            array_col2[i][d] = rand_vec[j++];
    }
    df.load_column<ary_col_t>("array_col2", std::move(array_col2));

    // Standard Deviation / Variance
    //
    VarVisitor<vec_col_t>   var;
    StdVisitor<ary_col_t>   stdev;

    df.single_act_visit<vec_col_t>("vector_col", var);
    df.single_act_visit<ary_col_t>("array_col", stdev);

    const auto  &var_result = var.get_result();

    assert(var_result.rows() == dim);
    assert(var_result.cols() == dim);
    assert(std::fabs(var_result(0, 0) - 0.188617) < 0.000001);
    assert(std::fabs(var_result(1, 2) - 0.004326) < 0.000001);
    assert(std::fabs(var_result(2, 1) - 0.004326) < 0.000001);

    const auto  &var_mean = var.get_mean();

    assert(var_mean.size() == dim);
    assert(std::fabs(var_mean[1] - 1.2646) < 0.0001);
    assert(std::fabs(var_mean[2] - 1.24829) < 0.00001);

    const auto  &std_result = stdev.get_result();

    assert(std_result.size() == dim);
    assert(std::fabs(std_result[1] - 0.428274) < 0.000001);
    assert(std::fabs(std_result[2] - 0.431151) < 0.000001);

    const auto  &std_mean = stdev.get_mean();

    assert(std_mean.size() == dim);
    assert(std::fabs(std_mean[1] - 1.2646) < 0.0001);
    assert(std::fabs(std_mean[2] - 1.24829) < 0.00001);
}

C++ DataFrame