| Signature | Description |
|---|---|
enum class correlation_type : unsigned char {
pearson = 1, // Ρ =
|
This specifies different correlation types. |
| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h>
template<typename T, typename I = unsigned long>
struct CorrVisitor;
|
This functor class calculates the correlation of two given columns.
explicit
CorrVisitor(correlation_type t = correlation_type::pearson,
bool bias = false,
bool skipnan = false,
bool stable_algo = false);
correlation_type: Correlation type from above.bias: If true it divides by n - 1, otherwise by n. skip_nan: If true it skips over nan numbers as if they didn't exist. stable_algo: If true, it uses a version of Kahan summation that is numerically stable for data with very large values. Kahan summation is slower than regular summation, so only use it, if your data contains very large values. There are also the following member functions:
get_result(): Returns the correlation
get_mean1(): Returns the mean of the first time-series
get_mean2(): Returns the mean of the second time-series
|
T: Column data type. T must be an arithmetic-enabled type I: Index type. |
#include <DataFrame/DataFrameStatsVisitors.h>
template<typename T, typename I = unsigned long,
std::size_t A = 0>
struct CrossCorrVisitor;
|
This functor calculates a series of above correlations between lagged vectors of the first and second time-series. Lags are from min_lag (included) to max_lag (excluded) and incremented by 1. The result is a vector of correlations containing "max_lag – min_lag" values. If lag value is positive, the second time-series is lagged forward. Otherwise, the first time-series is lagged forward.
CrossCorrVisitor (long min_lag,
long max_lag,
correlation_type t = correlation_type::pearson,
bool biased = false,
bool skip_nan = false,
bool stable_algo = false);
min_lag: Minimum lag periodmax_lag: Maximum lag period correlation_type: Correlation type from above. bias: If true it divides by n - 1, otherwise by n. skip_nan: If true it skips over nan numbers as if they didn't exist. stable_algo: If true, it uses a version of Kahan summation that is numerically stable for data with very large values. Kahan summation is slower than regular summation, so only use it, if your data contains very large values. |
T: Column data type. T must be an arithmetic-enabled type I: Index type. A: Memory alignment boundary for vectors. Default is system default alignment |
MyDataFrame df; df.create_column<int>(static_cast<const char *>("col_name")); StlVecType<int> intvec = { 1, 2, 3, 4, 5 }; StlVecType<double> dblvec = { 1.2345, 2.2345, 3.2345, 4.2345, 5.2345 }; StlVecType<double> dblvec2 = { 0.998, 0.3456, 0.056, 0.15678, 0.00345, 0.923, 0.06743, 0.1 }; StlVecType<std::string> strvec = { "Col_name", "Col_name", "Col_name", "Col_name", "Col_name" }; StlVecType<unsigned long> ulgvec = { 1UL, 2UL, 3UL, 4UL, 5UL, 8UL, 7UL, 6UL }; StlVecType<unsigned long> xulgvec = ulgvec; MyDataFrame::size_type rc = df.load_data(std::move(ulgvec), std::make_pair("int_col", intvec), std::make_pair("dbl_col", dblvec), std::make_pair("dbl_col_2", dblvec2), std::make_pair("str_col", strvec), std::make_pair("ul_col", xulgvec)); df.append_column<std::string>("str_col", "Additional column"); df.append_column("dbl_col", 10.56); CorrVisitor<double> p_corr_visitor; CorrVisitor<double> s_corr_visitor(correlation_type::spearman); CorrVisitor<double> k_corr_visitor(correlation_type::kendall_tau); df.single_act_visit<double, double>("dbl_col", "dbl_col_2", s_corr_visitor); df.single_act_visit<double, double>("dbl_col", "dbl_col_2", k_corr_visitor); auto fut = df.visit_async<double, double>("dbl_col", "dbl_col_2", p_corr_visitor); const double p_corr = fut.get().get_result(); assert(fabs(p_corr - -0.358381) < 0.000001); assert(fabs(s_corr_visitor.get_result() - -0.380952) < 0.000001); assert(fabs(k_corr_visitor.get_result() - -0.285714) < 0.000001);
// ---------------------------------------------------------------------------- static void test_CrossCorrVisitor() { std::cout << "\nTesting CrossCorrVisitor{ } ..." << std::endl; typedef StdDataFrame64<std::string> StrDataFrame; StrDataFrame df; try { df.read("SHORT_IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; } CrossCorrVisitor<double, std::string> cc(-16, 16); df.single_act_visit<double, double>("IBM_Close", "IBM_Open", cc); assert(cc.get_result().size() == 32); assert(std::fabs(cc.get_result()[0] - 0.906) < 0.0001); assert(std::fabs(cc.get_result()[1] - 0.9117) < 0.0001); assert(std::fabs(cc.get_result()[15] - 0.9919) < 0.0001); assert(std::fabs(cc.get_result()[16] - 0.9971) < 0.0001); assert(std::fabs(cc.get_result()[30] - 0.9239) < 0.0001); assert(std::fabs(cc.get_result()[31] - 0.9179) < 0.0001); }