Signature	Description	Parameters
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct BetaVisitor;	This functor class calculates the beta (i.e. exposure) of the given first column to the given second column (benchmark). This works with both scalar and multidimensional (i.e. vector and arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface. explicit BetaVisitor(bool bias = false, bool skipnan = false, bool stable_algo = false); bias: If true it divides by n - 1, otherwise by n. skip_nan: If true it skips over nan numbers as if they didn't exist. stable_algo: If true, it uses a version of Kahan summation that is numerically stable for data with very large values. Kahan summation is slower than regular summation, so only use it, if your data contains very large values. There are also the following member functions: get_result(): Returns the Beta In case of scalar dataset, the Beta a single number. In case of multidimensional dataset, the Beta a square matrix of data dimension rows and columns. get_count(): Returns the number of valid datapoints (none NaN) get_data_mean(): Returns the mean of the data time-series. In case of scalar dataset, the mean is a single number. In case of multidimensional dataset, the mean is a vector of size data dimension. get_benchmark_mean(): Returns the mean of the benchmark time-series. In case of scalar dataset, the mean is a single number. In case of multidimensional dataset, the mean is a vector of size data dimension.	T: Column data type I: Index type

Signature

Description

Parameters

#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct BetaVisitor;

This functor class calculates the beta (i.e. exposure) of the given first column to the given second column (benchmark).
This works with both scalar and multidimensional (i.e. vector and arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface.

explicit BetaVisitor(bool bias = false, bool skipnan = false, bool stable_algo = false);
bias: If true it divides by n - 1, otherwise by n.
skip_nan: If true it skips over nan numbers as if they didn't exist.
stable_algo: If true, it uses a version of Kahan summation that is numerically stable for data with very large values. Kahan summation is slower than regular summation, so only use it, if your data contains very large values.

There are also the following member functions:

get_result(): Returns the Beta
              In case of scalar dataset, the Beta a single number. In case of multidimensional
              dataset, the Beta a square matrix of data dimension rows and columns.
get_count(): Returns the number of valid datapoints (none NaN)
get_data_mean(): Returns the mean of the data time-series. In case of scalar dataset,
                 the mean is a single number. In case of multidimensional dataset, the mean is
                 a vector of size data dimension.
get_benchmark_mean(): Returns the mean of the benchmark time-series. In case of scalar dataset,
                      the mean is a single number. In case of multidimensional dataset, the mean is
                      a vector of size data dimension.

T: Column data type
I: Index type

static void test_beta()  {

    using MyDataFrame = StdDataFrame<unsigned long>;

    std::cout << "\nTesting Beta ..." << std::endl;

    std::vector<unsigned long>  idx =
        { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466,
          123467, 123468, 123469, 123470, 123471, 123472, 123473 };
    std::vector<double>         d1 = { 1.0, 10, 8, 18, 19, 16, 21, 17, 20, 3, 2, 11, 7.0, 5, 9, 15, 14, 13, 12, 6, 4 };
    std::vector<double>         d2 = { 1.0, 10, 8, 18, 19, 16, 21, 17, 20, 3, 2, 11, 7.0, 5, 9, 15, 14, 13, 12, 6, 4 };
    std::vector<double>         d3 = { 1.1, 10.09, 8.2, 18.03, 19.4, 15.9, 20.8, 17.1, 19.9, 3.3, 2.2, 10.8, 7.4, 5.3, 9.1, 14.9, 14.8, 13.2, 12.6, 6.1, 4.4 };
    std::vector<double>         d4 = { 0.1, 9.09, 7.2, 17.03, 18.4, 14.9, 19.8, 16.1, 18.9, 2.3, 1.2, 9.8, 6.4, 4.3, 8.1, 13.9, 13.8, 12.2, 11.6, 5.1, 3.4 };
    std::vector<double>         d5 = { 20.0, 10.1, -30.2, 18.5, 1.1, 16.2, 30.8, -1.56, 20.1, 25.5, 30.89, 11.1, 7.4, 5.3, 19, 15.1, 1.3, 1.2, 12.6, 23.2, 40.1 };
    MyDataFrame                 df;

    df.load_data(std::move(idx),
                 std::make_pair("dblcol_1", d1),
                 std::make_pair("dblcol_2", d2),
                 std::make_pair("dblcol_3", d3),
                 std::make_pair("dblcol_4", d4),
                 std::make_pair("dblcol_5", d5));

    ReturnVisitor<double, unsigned long>    return_visit(return_policy::log);

    df.load_column<double>("dblcol_1_return", df.single_act_visit<double>("dblcol_1", return_visit).get_result(), nan_policy::dont_pad_with_nans);
    df.load_column<double>("dblcol_2_return", df.single_act_visit<double>("dblcol_2", return_visit).get_result(), nan_policy::dont_pad_with_nans);
    df.load_column<double>("dblcol_3_return", df.single_act_visit<double>("dblcol_3", return_visit).get_result(), nan_policy::dont_pad_with_nans);
    df.load_column<double>("dblcol_4_return", df.single_act_visit<double>("dblcol_4", return_visit).get_result(), nan_policy::dont_pad_with_nans);
    df.load_column<double>("dblcol_5_return", df.single_act_visit<double>("dblcol_5", return_visit).get_result(), nan_policy::dont_pad_with_nans);

    BetaVisitor<double> beta_visit(false, true);
    double              result = df.visit<double, double>("dblcol_1_return", "dblcol_2_return", beta_visit).get_result();

    assert(result == 1.0);

    result = df.visit<double, double>("dblcol_1_return", "dblcol_3_return", beta_visit).get_result();
    assert(fabs(result - 1.04881) < 0.00001);

    result = df.visit<double, double>("dblcol_1_return", "dblcol_4_return", beta_visit).get_result();
    assert(fabs(result - 0.647582) < 0.00001);

    result = df.visit<double, double>("dblcol_1_return", "dblcol_5_return", beta_visit).get_result();
    assert(fabs(result - -0.128854) < 0.00001);

    // Now multidimensional data
    //
    using col_t = std::array<double, 2>;

    std::vector<col_t>  md_data = { {1, 0}, {0, 1}, {1, 1}, {2, 1} };
    std::vector<col_t>  md_bench = { {2, -1}, {1, 3}, {3, 2}, {5, 1} };

    df.load_column<col_t>("md_data", md_data, nan_policy::dont_pad_with_nans);
    df.load_column<col_t>("md_benchmark", md_bench, nan_policy::dont_pad_with_nans);

    BetaVisitor<col_t>  md_beta;

    df.single_act_visit<col_t, col_t>("md_data", "md_benchmark", md_beta);

    const auto  &md_result { md_beta.get_result() };

    assert(md_result.rows() == 2);
    assert(md_result.cols() == 2);
    assert(fabs(md_result(0, 0) - 0.428571) < 0.000001);
    assert(fabs(md_result(0, 1) - -0.142857) < 0.000001);
    assert(fabs(md_result(1, 0) - 0.142857) < 0.000001);
    assert(fabs(md_result(1, 1) - 0.285714) < 0.000001);

    const auto  data_mean { md_beta.get_data_mean() };
    const auto  benchmark_mean { md_beta.get_benchmark_mean() };

    assert((data_mean == std::vector<double>{ 1.0, 0.75 }));
    assert((benchmark_mean == std::vector<double>{ 2.75, 1.25 }));
}