| Signature | Description | Parameters |
|---|---|---|
template<typename T> Matrix<T, matrix_orient::column_major> covariance_matrix(std::vector<const char *> &&col_names, normalization_type norm_type = normalization_type::none) const; |
This calculates and returns the variance/covariance matrix of the specified columns, optionally normalizing the columns first. If you normalize the data with z-score method first, you will get a correlation matrix. |
T: Type of the named columns col_names: Vector of column names norm_type: The method to normalize the columns first before calculations. Default is not normalizing |
static void test_covariance_matrix() { std::cout << "\nTesting covariance_matrix( ) ..." << std::endl; StrDataFrame df; try { df.read("IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; } const auto cov_mat = df.covariance_matrix<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }); assert(cov_mat.rows() == 4); assert(cov_mat.cols() == 4); assert(std::fabs(cov_mat(0, 0) - 1467.58) < 0.01); assert(std::fabs(cov_mat(0, 2) - 1469.69) < 0.01); assert(std::fabs(cov_mat(2, 1) - 1469.48) < 0.01); assert(std::fabs(cov_mat(2, 2) - 1472.86) < 0.01); assert(std::fabs(cov_mat(3, 2) - 1466.15) < 0.01); assert(std::fabs(cov_mat(3, 3) - 1461.0) < 0.01); const auto cov_mat2 = df.covariance_matrix<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }, normalization_type::z_score); assert(cov_mat2.rows() == 4); assert(cov_mat2.cols() == 4); assert(std::fabs(cov_mat2(0, 0) - 1.0) < 0.01); assert(std::fabs(cov_mat2(0, 2) - 0.99964) < 0.00001); assert(std::fabs(cov_mat2(2, 1) - 0.99963) < 0.00001); assert(std::fabs(cov_mat2(2, 2) - 1.0) < 0.01); assert(std::fabs(cov_mat2(3, 2) - 0.99948) < 0.00001); assert(std::fabs(cov_mat2(3, 3) - 1.0) < 0.01); }