| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long, std::size_t A = 0> struct ZScoreVisitor; // ------------------------------------- template<typename T, typename I = unsigned long, std::size_t A = 0> using zs_v = ZScoreVisitor<T, I, A>; |
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This functor class calculates the z-score each value in a given column against the same column as the population. Its result is a vector of z-scores. This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface. get_result(): Returns the vector of z-scors. In case of scalar dataset, this is a vector of scalar z-scores. In case of multidimensional dataset, this is a vector of vectors of component-wise z-scors. |
T: Column data type. I: Index type. A: Memory alignment boundary for vectors. Default is system default alignment |
static void test_z_score_visitor() { std::cout << "\nTesting Z-Score visitors ..." << std::endl; StlVecType<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466, 123467, 123468, 123469, 123470, 123471, 123472, 123473 }; StlVecType<double> d1 = { 99.00011, 99.00012, 99.00013, 99.00014, 99.00015, 99.00016, 99.000113, 99.000112, 99.000111, 99.00019, 99.00018, 99.00017, 99.000114, 99.000115, 99.000116, 99.000117, 99.000118, 99.000119, 99.0001114, 99.0001113, 99.0001112 }; StlVecType<double> d2 = { 10.1, 20.1, 30.1, 40.1, 50.1, 60.1, 70.1, 120.1, 110.1, 28.1, 18.1, 100.1, 90.1, 80.1, 130.1, 140.1, 150.1, 160.1, 170.1, 180.1, 190.1 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2)); ZScoreVisitor<double> z_score; ZScoreVisitor<double> z_score2; const auto result = df.single_act_visit<double>("col_1", z_score).get_result(); const auto result2 = df.single_act_visit<double>("col_2", z_score2).get_result(); assert(result.size() == 21); assert(fabs(result[0] - -0.774806) < 0.000001); assert(fabs(result[4] - 0.816872) < 0.000001); assert(fabs(result[10] - 2.01063) < 0.000001); assert(fabs(result[19] - -0.723076) < 0.000001); assert(fabs(result[20] - -0.727055) < 0.000001); assert(result2.size() == 21); assert(fabs(result2[0] - -1.42003) < 0.00001); assert(fabs(result2[4] - -0.732921) < 0.00001); assert(fabs(result2[10] - -1.28261) < 0.00001); assert(fabs(result2[19] - 1.5002) < 0.00001); assert(fabs(result2[20] - 1.67198) < 0.00001); const MyDataFrame const_df = df; SampleZScoreVisitor<double> z_score3; auto fut = const_df.single_act_visit_async<double, double>("col_1", "col_2", z_score3); auto result3 = fut.get().get_result(); assert(fabs(result3 - -1136669.1600501483772) < 0.000001); result3 = df.single_act_visit<double, double>("col_2", "col_2", z_score3).get_result(); assert(result3 == 0.0); // Now multidimensional data // RandGenParams<double> p; p.seed = 123; p.min_value = 0; p.max_value = 10.0; constexpr std::size_t dim { 3 }; using ary_col_t = std::array<double, dim>; using vec_col_t = std::vector<double>; // Generate and load 3 random columns // auto rand_vec = gen_uniform_real_dist<double>(df.get_index().size() * dim, p); StlVecType<ary_col_t> array_col(df.get_index().size()); StlVecType<vec_col_t> vector_col(df.get_index().size()); for (std::size_t i { 0 }, j { 0 }; j < rand_vec.size(); ++i) { vector_col[i].resize(dim); for (std::size_t d { 0 }; d < dim; ++d) array_col[i][d] = vector_col[i][d] = rand_vec[j++]; } df.load_column<ary_col_t>("array_col", std::move(array_col)); df.load_column<vec_col_t>("vector_col", std::move(vector_col)); ZScoreVisitor<ary_col_t> ary_md_zs; ZScoreVisitor<vec_col_t> vec_md_zs; const auto &ary_res = df.single_act_visit<ary_col_t>("array_col", ary_md_zs).get_result(); const auto &vec_res = df.single_act_visit<vec_col_t>("vector_col", vec_md_zs).get_result(); assert(ary_res.size() == 21); assert(vec_res.size() == 21); for (const auto &vec : ary_res) assert(vec.size() == dim); for (const auto &vec : vec_res) assert(vec.size() == dim); assert(std::fabs(ary_res[0][0] - -0.956961) < 0.000001); assert(std::fabs(ary_res[0][2] - 1.342) < 0.001); assert(std::fabs(vec_res[0][0] - -0.956961) < 0.000001); assert(std::fabs(vec_res[0][2] - 1.342) < 0.001); assert(std::fabs(ary_res[20][1] - 1.06051) < 0.00001); assert(std::fabs(ary_res[20][2] - 0.219589) < 0.000001); assert(std::fabs(vec_res[20][1] - 1.06051) < 0.00001); assert(std::fabs(vec_res[20][2] - 0.219589) < 0.000001); }