| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct MeanVisitor; |
This functor class calculates the mean of a given column. The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface. get_result() returns the scalar mean in case of a scalar column. In case of a multidimensional column, it returns a vector of means containing mean of each channel (dimension). |
T: Column data type I: Index type |
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct StableMeanVisitor; |
This functor class calculates running mean using Welford's algorithm for numerical stability meant += (Xt - meant-1) / count The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. |
T: Column data type I: Index type |
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct WeightedMeanVisitor; |
This functor class calculates weighted mean of a given column. It favors more recent data. The numerator is V0 * 1 + V1 * 2 + ... + Vn-1 * n. The denominator is (n * (n + 1)) / 2. The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. |
T: Column data type I: Index type |
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct GeometricMeanVisitor; |
This functor class calculates the geometric mean of a given column. The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface. get_result() returns the scalar mean in case of a scalar column. In case of a multidimensional column, it returns a vector of means containing mean of each channel (dimension). |
T: Column data type. I: Index type. |
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct HarmonicMeanVisitor; |
This functor class calculates the harmonic mean of a given column. The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. |
T: Column data type. I: Index type. |
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct QuadraticMeanVisitor; |
This functor class calculates quadratic mean or root mean square. It gives a greater weight to larger items and is always equal to or greater than the "regular" arithmetic mean The mean is calculated as √(X02 + X12 + ... + Xn-12) / n The constructor takes a single optional Boolean argument to whether skip NaN values. The default is True. In addition to get_result(), this visitor also has get_euclidean_norm() that returns √X02 + X12 + ... + Xn-12 |
T: Column data type I: Index type |
static void test_BiasVisitor() { std::cout << "\nTesting BiasVisitor{ } ..." << std::endl; StrDataFrame df; try { df.read("SHORT_IBM.csv", io_format::csv2); df.remove_data_by_loc<double, long>({ 0, 1500 }); using avg1 = MeanVisitor<double, std::string>; avg1 avg1_v; bias_v<avg1, double, std::string, 256> bias1 (avg1_v); df.single_act_visit<double>("IBM_Close", bias1); assert(bias1.get_result().size() == 221); assert(std::isnan(bias1.get_result()[0])); assert(std::isnan(bias1.get_result()[24])); assert(std::abs(bias1.get_result()[25] - 0.0309) < 0.0001); assert(std::abs(bias1.get_result()[30] - 0.0477) < 0.0001); assert(std::abs(bias1.get_result()[35] - 0.0907) < 0.0001); assert(std::abs(bias1.get_result()[220] - -0.0698) < 0.0001); assert(std::abs(bias1.get_result()[215] - -0.049) < 0.0001); assert(std::abs(bias1.get_result()[210] - 0.0242) < 0.0001); using s_avg1 = StableMeanVisitor<double, std::string>; s_avg1 s_avg1_v; bias_v<s_avg1, double, std::string, 256> s_bias1 (s_avg1_v); df.single_act_visit<double>("IBM_Close", s_bias1); assert(s_bias1.get_result().size() == 221); assert(std::isnan(s_bias1.get_result()[0])); assert(std::isnan(s_bias1.get_result()[24])); assert(std::abs(s_bias1.get_result()[25] - 0.0309) < 0.0001); assert(std::abs(s_bias1.get_result()[30] - 0.0477) < 0.0001); assert(std::abs(s_bias1.get_result()[35] - 0.0907) < 0.0001); assert(std::abs(s_bias1.get_result()[220] - -0.0698) < 0.0001); assert(std::abs(s_bias1.get_result()[215] - -0.049) < 0.0001); assert(std::abs(s_bias1.get_result()[210] - 0.0242) < 0.0001); using avg2 = WeightedMeanVisitor<double, std::string>; avg2 avg2_v; bias_v<avg2, double, std::string, 256> bias2 (avg2_v); df.single_act_visit<double>("IBM_Close", bias2); assert(bias2.get_result().size() == 221); assert(std::isnan(bias2.get_result()[0])); assert(std::isnan(bias2.get_result()[24])); assert(std::abs(bias2.get_result()[25] - 0.0224) < 0.0001); assert(std::abs(bias2.get_result()[30] - 0.0381) < 0.0001); assert(std::abs(bias2.get_result()[35] - 0.068) < 0.0001); assert(std::abs(bias2.get_result()[220] - -0.0532) < 0.0001); assert(std::abs(bias2.get_result()[215] - -0.0496) < 0.0001); assert(std::abs(bias2.get_result()[210] - 0.0168) < 0.0001); using avg3 = GeometricMeanVisitor<double, std::string>; avg3 avg3_v; bias_v<avg3, double, std::string, 256> bias3 (avg3_v); df.single_act_visit<double>("IBM_Close", bias3); assert(bias3.get_result().size() == 221); assert(std::isnan(bias3.get_result()[0])); assert(std::isnan(bias3.get_result()[24])); assert(std::abs(bias3.get_result()[25] - 0.0311) < 0.0001); assert(std::abs(bias3.get_result()[30] - 0.0479) < 0.0001); assert(std::abs(bias3.get_result()[35] - 0.0919) < 0.0001); assert(std::abs(bias3.get_result()[220] - -0.0685) < 0.0001); assert(std::abs(bias3.get_result()[215] - -0.0485) < 0.0001); assert(std::abs(bias3.get_result()[210] - 0.0245) < 0.0001); using avg4 = HarmonicMeanVisitor<double, std::string>; avg4 avg4_v; bias_v<avg4, double, std::string, 256> bias4 (avg4_v); df.single_act_visit<double>("IBM_Close", bias4); assert(bias4.get_result().size() == 221); assert(std::isnan(bias4.get_result()[0])); assert(std::isnan(bias4.get_result()[24])); assert(std::abs(bias4.get_result()[25] - 0.0313) < 0.0001); assert(std::abs(bias4.get_result()[30] - 0.0481) < 0.0001); assert(std::abs(bias4.get_result()[35] - 0.093) < 0.0001); assert(std::abs(bias4.get_result()[220] - -0.0672) < 0.0001); assert(std::abs(bias4.get_result()[215] - -0.048) < 0.0001); assert(std::abs(bias4.get_result()[210] - 0.0248) < 0.0001); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } // Now multidimensional data // RandGenParams<double> p; p.seed = 123; p.min_value = 1; p.max_value = 10.0; constexpr std::size_t dim { 3 }; using ary_col_t = std::array<double, dim>; using vec_col_t = std::vector<double>; // Generate and load 3 random columns // auto rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p); StlVecType<ary_col_t> array_col(df.get_index().size()); StlVecType<vec_col_t> vector_col(df.get_index().size()); for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i) { vector_col[i].resize(dim); for (std::size_t d { 0 }; d < dim; ++d) array_col[i][d] = vector_col[i][d] = rand_vec[j++]; } df.load_column<ary_col_t>("array_col", std::move(array_col)); df.load_column<vec_col_t>("vector_col", std::move(vector_col)); MeanVisitor<ary_col_t, std::string> ary_mean_v; MeanVisitor<vec_col_t, std::string> vec_mean_v; GeometricMeanVisitor<ary_col_t, std::string> ary_geom_v; GeometricMeanVisitor<vec_col_t, std::string> vec_geom_v; const auto &ary_mean_res = df.single_act_visit<ary_col_t>("array_col", ary_mean_v).get_result(); const auto &vec_mean_res = df.single_act_visit<vec_col_t>("vector_col", vec_mean_v).get_result(); const auto &ary_geom_res = df.single_act_visit<ary_col_t>("array_col", ary_geom_v).get_result(); const auto &vec_geom_res = df.single_act_visit<vec_col_t>("vector_col", vec_geom_v).get_result(); assert(ary_mean_res.size() == 3); assert(vec_mean_res.size() == 3); assert(std::abs(ary_mean_res[0] - 5.47712) < 0.00001); assert(std::abs(ary_mean_res[2] - 5.67667) < 0.00001); assert(std::abs(vec_mean_res[0] - 5.47712) < 0.00001); assert(std::abs(vec_mean_res[2] - 5.67667) < 0.00001); assert(ary_geom_res.size() == 3); assert(vec_geom_res.size() == 3); assert(std::abs(ary_geom_res[0] - 4.80128) < 0.00001); assert(std::abs(ary_geom_res[2] - 4.97216) < 0.00001); assert(std::abs(vec_geom_res[0] - 4.80128) < 0.00001); assert(std::abs(vec_geom_res[2] - 4.97216) < 0.00001); }