| Signature | Description | Parameters |
|---|---|---|
template<typename T> Matrix<T, matrix_orient::column_major> covariance_matrix(std::vector<const char *> &&col_names, normalization_type norm_type = normalization_type::none) const; |
This calculates and returns the variance/covariance matrix of the specified columns, optionally normalizing the columns first. If you normalize the data with z-score method first, you will get a correlation matrix. This works with both scalar and multidimensional (MD), vectors and arrays, data. For a nXm scalar matrix, you will get a mXm scalar covariance matrix. For a nXm MD matrix you will get a m*dXm*d scalar matrix, where d is the dimensionality of data |
T: Type of the named columns col_names: Vector of column names norm_type: The method to normalize the columns first before calculations. Default is not normalizing |
static void test_covariance_matrix() { std::cout << "\nTesting covariance_matrix( ) ..." << std::endl; StrDataFrame df; try { df.read("IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } const auto cov_mat = df.covariance_matrix<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }); assert(cov_mat.rows() == 4); assert(cov_mat.cols() == 4); assert(std::fabs(cov_mat(0, 0) - 1467.58) < 0.01); assert(std::fabs(cov_mat(0, 2) - 1469.69) < 0.01); assert(std::fabs(cov_mat(2, 1) - 1469.48) < 0.01); assert(std::fabs(cov_mat(2, 2) - 1472.86) < 0.01); assert(std::fabs(cov_mat(3, 2) - 1466.15) < 0.01); assert(std::fabs(cov_mat(3, 3) - 1461.0) < 0.01); const auto cov_mat2 = df.covariance_matrix<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }, normalization_type::z_score); assert(cov_mat2.rows() == 4); assert(cov_mat2.cols() == 4); assert(std::fabs(cov_mat2(0, 0) - 1.0) < 0.01); assert(std::fabs(cov_mat2(0, 2) - 0.99964) < 0.00001); assert(std::fabs(cov_mat2(2, 1) - 0.99963) < 0.00001); assert(std::fabs(cov_mat2(2, 2) - 1.0) < 0.01); assert(std::fabs(cov_mat2(3, 2) - 0.99948) < 0.00001); assert(std::fabs(cov_mat2(3, 3) - 1.0) < 0.01); // Now multidimensional data // constexpr std::size_t dim { 3 }; using ary_col_t = std::array<double, dim>; using vec_col_t = std::vector<double>; std::vector<ary_col_t> md_ary_col1 { { 1.0, 2.0, 3.0 }, { 2.0, 4.0, 1.0 }, { 3.0, 1.0, 4.0 }, { 4.0, 3.0, 2.0 } }; std::vector<ary_col_t> md_ary_col2 { { 4.0, 1.0, 2.0 }, { 6.0, 2.0, 4.0 }, { 5.0, 3.0, 1.0 }, { 7.0, 4.0, 3.0 } }; std::vector<ary_col_t> md_ary_col3 { { 7.0, 3.0, 5.0 }, { 5.0, 1.0, 3.0 }, { 6.0, 4.0, 2.0 }, { 8.0, 2.0, 4.0 } }; std::vector<vec_col_t> md_vec_col1 { { 1.0, 2.0, 3.0 }, { 2.0, 4.0, 1.0 }, { 3.0, 1.0, 4.0 }, { 4.0, 3.0, 2.0 } }; std::vector<vec_col_t> md_vec_col2 { { 4.0, 1.0, 2.0 }, { 6.0, 2.0, 4.0 }, { 5.0, 3.0, 1.0 }, { 7.0, 4.0, 3.0 } }; std::vector<vec_col_t> md_vec_col3 { { 7.0, 3.0, 5.0 }, { 5.0, 1.0, 3.0 }, { 6.0, 4.0, 2.0 }, { 8.0, 2.0, 4.0 } }; df.load_column<ary_col_t>("ARY COL 1", std::move(md_ary_col1), nan_policy::dont_pad_with_nans); df.load_column<ary_col_t>("ARY COL 2", std::move(md_ary_col2), nan_policy::dont_pad_with_nans); df.load_column<ary_col_t>("ARY COL 3", std::move(md_ary_col3), nan_policy::dont_pad_with_nans); df.load_column<vec_col_t>("VEC COL 1", std::move(md_vec_col1), nan_policy::dont_pad_with_nans); df.load_column<vec_col_t>("VEC COL 2", std::move(md_vec_col2), nan_policy::dont_pad_with_nans); df.load_column<vec_col_t>("VEC COL 3", std::move(md_vec_col3), nan_policy::dont_pad_with_nans); const auto ary_cov = df.covariance_matrix<ary_col_t>( { "ARY COL 1", "ARY COL 2", "ARY COL 3" }, normalization_type::z_score); const auto vec_cov = df.covariance_matrix<vec_col_t>( { "VEC COL 1", "VEC COL 2", "VEC COL 3" }, normalization_type::none); assert(ary_cov.rows() == 9); assert(ary_cov.cols() == 9); assert(vec_cov.rows() == 9); assert(vec_cov.cols() == 9); assert(std::abs(ary_cov(0, 0) - 1.3333) < 0.0001); assert(std::abs(ary_cov(0, 1) - 0.0) < 0.0001); assert(std::abs(ary_cov(0, 2) - 0.0) < 0.0001); assert(std::abs(ary_cov(0, 3) - 1.0667) < 0.0001); assert(std::abs(ary_cov(3, 5) - 0.8) < 0.0001); assert(std::abs(ary_cov(3, 6) - 0.2667) < 0.0001); assert(std::abs(ary_cov(3, 8) - -0.2667) < 0.0001); assert(std::abs(ary_cov(8, 0) - -0.5333) < 0.0001); assert(std::abs(ary_cov(8, 4) - -0.5333) < 0.0001); assert(std::abs(ary_cov(8, 7) - -0.2667) < 0.0001); assert(std::abs(ary_cov(8, 8) - 1.3333) < 0.0001); assert(std::abs(vec_cov(0, 0) - 1.6667) < 0.0001); assert(std::abs(vec_cov(0, 1) - 0.0) < 0.0001); assert(std::abs(vec_cov(0, 2) - 0.0) < 0.0001); assert(std::abs(vec_cov(0, 3) - 1.3333) < 0.0001); assert(std::abs(vec_cov(3, 5) - 1.0) < 0.0001); assert(std::abs(vec_cov(3, 6) - 0.3333) < 0.0001); assert(std::abs(vec_cov(3, 8) - -0.3333) < 0.0001); assert(std::abs(vec_cov(8, 0) - -0.6667) < 0.0001); assert(std::abs(vec_cov(8, 4) - -0.6667) < 0.0001); assert(std::abs(vec_cov(8, 7) - -0.3333) < 0.0001); assert(std::abs(vec_cov(8, 8) - 1.6667) < 0.0001); }