| Signature | Description | Parameters |
|---|---|---|
include <DataFrame/DataFrameStatsVisitors.h> template<arithmetic T, typename I = unsigned long> struct ConfIntervalVisitor; // ---------------------------------- template<std::floating_point T, typename I = unsigned long> using coni_v = ConfIntervalVisitor<T, I>; |
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. A confidence interval is a range of values derived from sample data that is likely to contain the true, unknown population parameter. It's calculated as a point estimate (like the sample mean) plus or minus a margin of error. The accompanying confidence level, such as 95%, indicates the long-run probability that repeated sampling would result in intervals containing the true parameter. A 95% confidence interval does not mean there is a 95% probability that the true population mean lies in this specific interval . Instead, it means that if you were to conduct this procedure many times with different samples, 95% of the intervals you generate would contain the true population mean. This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. In case of multidimensional data, the analysis is done per dimension (channel).
explicit
ConfIntervalVisitor (value_type conf_level = 0.95);
conf_level: Confidence level must be between 80% and 99.9% inclusive,
otherwise the result is NaN.
There are also the following member functions:get_result(): Returns a std::pair<T, T> containing lower and upper limits of plausible values for the true population parameter. In case of multidimensional input, the result is a pair of vectors of dimension size. get_error_margin(): Returns the the margin pf error. In case of multidimensional input, it is a vector of dimension size |
T: Column data type. I: Index type. |
static void test_ConfIntervalVisitor() { std::cout << "\nTesting ConfIntervalVisitor{ } ..." << std::endl; using StrDataFrame = StdDataFrame<std::string>; StrDataFrame df; try { df.read("IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } df.load_column("Extra Col", std::vector<double> { 5.1, 4.9, 5.0, 5.3, 5.2, 4.8 }, nan_policy::dont_pad_with_nans); ConfIntervalVisitor<double, std::string> ci_v1 { 0.95 }; df.single_act_visit<double>("Extra Col", ci_v1); assert(std::fabs(ci_v1.get_error_margin() - 0.149697) < 0.000001); assert(std::fabs(ci_v1.get_result().first - 4.9003) < 0.0001); assert(std::fabs(ci_v1.get_result().second - 5.1997) < 0.0001); ConfIntervalVisitor<double, std::string> ci_v2 { 0.96 }; df.single_act_visit<double>("Extra Col", ci_v2); assert(std::fabs(ci_v2.get_error_margin() - 0.159023) < 0.000001); assert(std::fabs(ci_v2.get_result().first - 4.89098) < 0.00001); assert(std::fabs(ci_v2.get_result().second - 5.20902) < 0.00001); coni_v<double, std::string> ci_v3 { 0.99 }; df.single_act_visit<double>("IBM_Close", ci_v3); assert(std::fabs(ci_v3.get_error_margin() - 1.39119) < 0.00001); assert(std::fabs(ci_v3.get_result().first - 128.601) < 0.001); assert(std::fabs(ci_v3.get_result().second - 131.383) < 0.001); // Now multidimensional data // constexpr std::size_t dim { 3 }; using ary_col_t = std::array<double, dim>; using vec_col_t = std::vector<double>; std::vector<ary_col_t> ary_md_x { { 22.1, 1013.2, 55.4 }, { 21.8, 1012.8, 57.1 }, { 23.4, 1014.5, 53.2 }, { 22.7, 1013.9, 56.8 }, { 21.5, 1012.1, 58.3 }, { 23.1, 1014.1, 54.6 }, { 22.9, 1013.7, 55.9 }, { 21.2, 1011.8, 59.1 }, { 23.8, 1015.0, 52.7 }, { 22.5, 1013.4, 56.2 }, }; std::vector<vec_col_t> vec_md_x { { 22.1, 1013.2, 55.4 }, { 21.8, 1012.8, 57.1 }, { 23.4, 1014.5, 53.2 }, { 22.7, 1013.9, 56.8 }, { 21.5, 1012.1, 58.3 }, { 23.1, 1014.1, 54.6 }, { 22.9, 1013.7, 55.9 }, { 21.2, 1011.8, 59.1 }, { 23.8, 1015.0, 52.7 }, { 22.5, 1013.4, 56.2 }, }; df.load_column<ary_col_t>("ARY MD COL", std::move(ary_md_x), nan_policy::dont_pad_with_nans); df.load_column<vec_col_t>("VEC MD COL", std::move(vec_md_x), nan_policy::dont_pad_with_nans); ConfIntervalVisitor<ary_col_t, std::string> ary_ci_v; ConfIntervalVisitor<vec_col_t, std::string> vec_ci_v; df.single_act_visit<ary_col_t>("ARY MD COL", ary_ci_v); df.single_act_visit<vec_col_t>("VEC MD COL", vec_ci_v); assert(ary_ci_v.get_error_margin().size() == dim); assert(std::fabs(ary_ci_v.get_error_margin()[0] - 0.522667) < 0.000001); assert(std::fabs(ary_ci_v.get_error_margin()[2] - 1.27024) < 0.00001); assert(vec_ci_v.get_error_margin().size() == dim); assert(std::fabs(vec_ci_v.get_error_margin()[0] - 0.522667) < 0.000001); assert(std::fabs(vec_ci_v.get_error_margin()[2] - 1.27024) < 0.00001); assert(ary_ci_v.get_result().first.size() == dim); assert(ary_ci_v.get_result().second.size() == dim); assert(std::fabs(ary_ci_v.get_result().first[0] - 21.9773) < 0.0001); assert(std::fabs(ary_ci_v.get_result().first[2] - 54.6598) < 0.0001); assert(std::fabs(ary_ci_v.get_result().second[0] - 23.0227) < 0.0001); assert(std::fabs(ary_ci_v.get_result().second[2] - 57.2002) < 0.0001); assert(vec_ci_v.get_result().first.size() == dim); assert(vec_ci_v.get_result().second.size() == dim); assert(std::fabs(vec_ci_v.get_result().first[0] - 21.9773) < 0.0001); assert(std::fabs(vec_ci_v.get_result().first[2] - 54.6598) < 0.0001); assert(std::fabs(vec_ci_v.get_result().second[0] - 23.0227) < 0.0001); assert(std::fabs(vec_ci_v.get_result().second[2] - 57.2002) < 0.0001); }