Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct MeanVisitor;
This functor class calculates the mean of a given column.
The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface.

get_result() returns the scalar mean in case of a scalar column. In case of a multidimensional column, it returns a vector of means containing mean of each channel (dimension).
T: Column data type
I: Index type
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct StableMeanVisitor;
This functor class calculates running mean using Welford's algorithm for numerical stability
meant += (Xt - meant-1) / count
The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.
T: Column data type
I: Index type
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct WeightedMeanVisitor;
This functor class calculates weighted mean of a given column. It favors more recent data.
The numerator is V0 * 1 + V1 * 2 + ... + Vn-1 * n. The denominator is (n * (n + 1)) / 2.
The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.
T: Column data type
I: Index type
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct GeometricMeanVisitor;
This functor class calculates the geometric mean of a given column.
The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface.

get_result() returns the scalar mean in case of a scalar column. In case of a multidimensional column, it returns a vector of means containing mean of each channel (dimension).
T: Column data type.
I: Index type.
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct HarmonicMeanVisitor;
This functor class calculates the harmonic mean of a given column.
The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.
T: Column data type.
I: Index type.
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct QuadraticMeanVisitor;
This functor class calculates quadratic mean or root mean square. It gives a greater weight to larger items and is always equal to or greater than the "regular" arithmetic mean
The mean is calculated as √(X02 + X12 + ... + Xn-12) / n
The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True.

In addition to get_result(), this visitor also has get_euclidean_norm() that returns √X02 + X12 + ... + Xn-12
T: Column data type
I: Index type
static void test_BiasVisitor()  {

    std::cout << "\nTesting BiasVisitor{  } ..." << std::endl;

    StrDataFrame    df;

    try  {
        df.read("SHORT_IBM.csv", io_format::csv2);
        df.remove_data_by_loc<double, long>({ 0, 1500 });

        using avg1 = MeanVisitor<double, std::string>;

        avg1                                     avg1_v;
        bias_v<avg1, double, std::string, 256>   bias1 (avg1_v);

        df.single_act_visit<double>("IBM_Close", bias1);

        assert(bias1.get_result().size() == 221);
        assert(std::isnan(bias1.get_result()[0]));
        assert(std::isnan(bias1.get_result()[24]));
        assert(std::abs(bias1.get_result()[25] - 0.0309) < 0.0001);
        assert(std::abs(bias1.get_result()[30] - 0.0477) < 0.0001);
        assert(std::abs(bias1.get_result()[35] - 0.0907) < 0.0001);
        assert(std::abs(bias1.get_result()[220] - -0.0698) < 0.0001);
        assert(std::abs(bias1.get_result()[215] - -0.049) < 0.0001);
        assert(std::abs(bias1.get_result()[210] - 0.0242) < 0.0001);

        using s_avg1 = StableMeanVisitor<double, std::string>;

        s_avg1                                   s_avg1_v;
        bias_v<s_avg1, double, std::string, 256> s_bias1 (s_avg1_v);

        df.single_act_visit<double>("IBM_Close", s_bias1);

        assert(s_bias1.get_result().size() == 221);
        assert(std::isnan(s_bias1.get_result()[0]));
        assert(std::isnan(s_bias1.get_result()[24]));
        assert(std::abs(s_bias1.get_result()[25] - 0.0309) < 0.0001);
        assert(std::abs(s_bias1.get_result()[30] - 0.0477) < 0.0001);
        assert(std::abs(s_bias1.get_result()[35] - 0.0907) < 0.0001);
        assert(std::abs(s_bias1.get_result()[220] - -0.0698) < 0.0001);
        assert(std::abs(s_bias1.get_result()[215] - -0.049) < 0.0001);
        assert(std::abs(s_bias1.get_result()[210] - 0.0242) < 0.0001);

        using avg2 = WeightedMeanVisitor<double, std::string>;

        avg2                                     avg2_v;
        bias_v<avg2, double, std::string, 256>   bias2 (avg2_v);

        df.single_act_visit<double>("IBM_Close", bias2);

        assert(bias2.get_result().size() == 221);
        assert(std::isnan(bias2.get_result()[0]));
        assert(std::isnan(bias2.get_result()[24]));
        assert(std::abs(bias2.get_result()[25] - 0.0224) < 0.0001);
        assert(std::abs(bias2.get_result()[30] - 0.0381) < 0.0001);
        assert(std::abs(bias2.get_result()[35] - 0.068) < 0.0001);
        assert(std::abs(bias2.get_result()[220] - -0.0532) < 0.0001);
        assert(std::abs(bias2.get_result()[215] - -0.0496) < 0.0001);
        assert(std::abs(bias2.get_result()[210] - 0.0168) < 0.0001);

        using avg3 = GeometricMeanVisitor<double, std::string>;

        avg3                                     avg3_v;
        bias_v<avg3, double, std::string, 256>   bias3 (avg3_v);

        df.single_act_visit<double>("IBM_Close", bias3);

        assert(bias3.get_result().size() == 221);
        assert(std::isnan(bias3.get_result()[0]));
        assert(std::isnan(bias3.get_result()[24]));
        assert(std::abs(bias3.get_result()[25] - 0.0311) < 0.0001);
        assert(std::abs(bias3.get_result()[30] - 0.0479) < 0.0001);
        assert(std::abs(bias3.get_result()[35] - 0.0919) < 0.0001);
        assert(std::abs(bias3.get_result()[220] - -0.0685) < 0.0001);
        assert(std::abs(bias3.get_result()[215] - -0.0485) < 0.0001);
        assert(std::abs(bias3.get_result()[210] - 0.0245) < 0.0001);

        using avg4 = HarmonicMeanVisitor<double, std::string>;

        avg4                                     avg4_v;
        bias_v<avg4, double, std::string, 256>   bias4 (avg4_v);

        df.single_act_visit<double>("IBM_Close", bias4);

        assert(bias4.get_result().size() == 221);
        assert(std::isnan(bias4.get_result()[0]));
        assert(std::isnan(bias4.get_result()[24]));
        assert(std::abs(bias4.get_result()[25] - 0.0313) < 0.0001);
        assert(std::abs(bias4.get_result()[30] - 0.0481) < 0.0001);
        assert(std::abs(bias4.get_result()[35] - 0.093) < 0.0001);
        assert(std::abs(bias4.get_result()[220] - -0.0672) < 0.0001);
        assert(std::abs(bias4.get_result()[215] - -0.048) < 0.0001);
        assert(std::abs(bias4.get_result()[210] - 0.0248) < 0.0001);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    // Now multidimensional data
    //
    RandGenParams<double>   p;

    p.seed = 123;
    p.min_value = 1;
    p.max_value = 10.0;

    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;
    using vec_col_t = std::vector<double>;

    // Generate and load 3 random columns
    //
    auto    rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p);

    StlVecType<ary_col_t>   array_col(df.get_index().size());
    StlVecType<vec_col_t>   vector_col(df.get_index().size());

    for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i)  {
        vector_col[i].resize(dim);
        for (std::size_t d { 0 }; d < dim; ++d)
            array_col[i][d] = vector_col[i][d] = rand_vec[j++];
    }
    df.load_column<ary_col_t>("array_col", std::move(array_col));
    df.load_column<vec_col_t>("vector_col", std::move(vector_col));

    MeanVisitor<ary_col_t, std::string>             ary_mean_v;
    MeanVisitor<vec_col_t, std::string>             vec_mean_v;
    GeometricMeanVisitor<ary_col_t, std::string>    ary_geom_v;
    GeometricMeanVisitor<vec_col_t, std::string>    vec_geom_v;

    const auto  &ary_mean_res = df.single_act_visit<ary_col_t>("array_col", ary_mean_v).get_result();
    const auto  &vec_mean_res = df.single_act_visit<vec_col_t>("vector_col", vec_mean_v).get_result();
    const auto  &ary_geom_res = df.single_act_visit<ary_col_t>("array_col", ary_geom_v).get_result();
    const auto  &vec_geom_res = df.single_act_visit<vec_col_t>("vector_col", vec_geom_v).get_result();

    assert(ary_mean_res.size() == 3);
    assert(vec_mean_res.size() == 3);
    assert(std::abs(ary_mean_res[0] - 5.47712) < 0.00001);
    assert(std::abs(ary_mean_res[2] - 5.67667) < 0.00001);
    assert(std::abs(vec_mean_res[0] - 5.47712) < 0.00001);
    assert(std::abs(vec_mean_res[2] - 5.67667) < 0.00001);

    assert(ary_geom_res.size() == 3);
    assert(vec_geom_res.size() == 3);
    assert(std::abs(ary_geom_res[0] - 4.80128) < 0.00001);
    assert(std::abs(ary_geom_res[2] - 4.97216) < 0.00001);
    assert(std::abs(vec_geom_res[0] - 4.80128) < 0.00001);
    assert(std::abs(vec_geom_res[2] - 4.97216) < 0.00001);
}

C++ DataFrame