Back to Documentations

Signature Description Parameters
template<typename T>
Matrix<T, matrix_orient::column_major>
covariance_matrix(std::vector<const char *> &&col_names,
                  normalization_type norm_type =
                      normalization_type::none) const;
This calculates and returns the variance/covariance matrix of the specified columns, optionally normalizing the columns first.
If you normalize the data with z-score method first, you will get a correlation matrix.

This works with both scalar and multidimensional (MD), vectors and arrays, data. For a nXm scalar matrix, you will get a mXm scalar covariance matrix. For a nXm MD matrix you will get a m*dXm*d scalar matrix, where d is the dimensionality of data
T: Type of the named columns
col_names: Vector of column names
norm_type: The method to normalize the columns first before calculations. Default is not normalizing
static void test_covariance_matrix()  {

    std::cout << "\nTesting covariance_matrix( ) ..." << std::endl;

    StrDataFrame    df;

    try  {
        df.read("IBM.csv", io_format::csv2);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    const auto  cov_mat = df.covariance_matrix<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" });

    assert(cov_mat.rows() == 4);
    assert(cov_mat.cols() == 4);
    assert(std::fabs(cov_mat(0, 0) - 1467.58) < 0.01);
    assert(std::fabs(cov_mat(0, 2) - 1469.69) < 0.01);
    assert(std::fabs(cov_mat(2, 1) - 1469.48) < 0.01);
    assert(std::fabs(cov_mat(2, 2) - 1472.86) < 0.01);
    assert(std::fabs(cov_mat(3, 2) - 1466.15) < 0.01);
    assert(std::fabs(cov_mat(3, 3) - 1461.0) < 0.01);

    const auto  cov_mat2 = df.covariance_matrix<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }, normalization_type::z_score);

    assert(cov_mat2.rows() == 4);
    assert(cov_mat2.cols() == 4);
    assert(std::fabs(cov_mat2(0, 0) - 1.0) < 0.01);
    assert(std::fabs(cov_mat2(0, 2) - 0.99964) < 0.00001);
    assert(std::fabs(cov_mat2(2, 1) - 0.99963) < 0.00001);
    assert(std::fabs(cov_mat2(2, 2) - 1.0) < 0.01);
    assert(std::fabs(cov_mat2(3, 2) - 0.99948) < 0.00001);
    assert(std::fabs(cov_mat2(3, 3) - 1.0) < 0.01);

    // Now multidimensional data
    //
    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;
    using vec_col_t = std::vector<double>;

    std::vector<ary_col_t>  md_ary_col1  { { 1.0, 2.0, 3.0 }, { 2.0, 4.0, 1.0 }, { 3.0, 1.0, 4.0 }, { 4.0, 3.0, 2.0 } };
    std::vector<ary_col_t>  md_ary_col2  { { 4.0, 1.0, 2.0 }, { 6.0, 2.0, 4.0 }, { 5.0, 3.0, 1.0 }, { 7.0, 4.0, 3.0 } };
    std::vector<ary_col_t>  md_ary_col3  { { 7.0, 3.0, 5.0 }, { 5.0, 1.0, 3.0 }, { 6.0, 4.0, 2.0 }, { 8.0, 2.0, 4.0 } };

    std::vector<vec_col_t>  md_vec_col1  { { 1.0, 2.0, 3.0 }, { 2.0, 4.0, 1.0 }, { 3.0, 1.0, 4.0 }, { 4.0, 3.0, 2.0 } };
    std::vector<vec_col_t>  md_vec_col2  { { 4.0, 1.0, 2.0 }, { 6.0, 2.0, 4.0 }, { 5.0, 3.0, 1.0 }, { 7.0, 4.0, 3.0 } };
    std::vector<vec_col_t>  md_vec_col3  { { 7.0, 3.0, 5.0 }, { 5.0, 1.0, 3.0 }, { 6.0, 4.0, 2.0 }, { 8.0, 2.0, 4.0 } };

    df.load_column<ary_col_t>("ARY COL 1", std::move(md_ary_col1), nan_policy::dont_pad_with_nans);
    df.load_column<ary_col_t>("ARY COL 2", std::move(md_ary_col2), nan_policy::dont_pad_with_nans);
    df.load_column<ary_col_t>("ARY COL 3", std::move(md_ary_col3), nan_policy::dont_pad_with_nans);

    df.load_column<vec_col_t>("VEC COL 1", std::move(md_vec_col1), nan_policy::dont_pad_with_nans);
    df.load_column<vec_col_t>("VEC COL 2", std::move(md_vec_col2), nan_policy::dont_pad_with_nans);
    df.load_column<vec_col_t>("VEC COL 3", std::move(md_vec_col3), nan_policy::dont_pad_with_nans);

    const auto  ary_cov = df.covariance_matrix<ary_col_t>( { "ARY COL 1", "ARY COL 2", "ARY COL 3" }, normalization_type::z_score);
    const auto  vec_cov = df.covariance_matrix<vec_col_t>( { "VEC COL 1", "VEC COL 2", "VEC COL 3" }, normalization_type::none);

    assert(ary_cov.rows() == 9);
    assert(ary_cov.cols() == 9);
    assert(vec_cov.rows() == 9);
    assert(vec_cov.cols() == 9);

    assert(std::abs(ary_cov(0, 0) - 1.3333) < 0.0001);
    assert(std::abs(ary_cov(0, 1) - 0.0) < 0.0001);
    assert(std::abs(ary_cov(0, 2) - 0.0) < 0.0001);
    assert(std::abs(ary_cov(0, 3) - 1.0667) < 0.0001);
    assert(std::abs(ary_cov(3, 5) - 0.8) < 0.0001);
    assert(std::abs(ary_cov(3, 6) - 0.2667) < 0.0001);
    assert(std::abs(ary_cov(3, 8) - -0.2667) < 0.0001);
    assert(std::abs(ary_cov(8, 0) - -0.5333) < 0.0001);
    assert(std::abs(ary_cov(8, 4) - -0.5333) < 0.0001);
    assert(std::abs(ary_cov(8, 7) - -0.2667) < 0.0001);
    assert(std::abs(ary_cov(8, 8) - 1.3333) < 0.0001);

    assert(std::abs(vec_cov(0, 0) - 1.6667) < 0.0001);
    assert(std::abs(vec_cov(0, 1) - 0.0) < 0.0001);
    assert(std::abs(vec_cov(0, 2) - 0.0) < 0.0001);
    assert(std::abs(vec_cov(0, 3) - 1.3333) < 0.0001);
    assert(std::abs(vec_cov(3, 5) - 1.0) < 0.0001);
    assert(std::abs(vec_cov(3, 6) - 0.3333) < 0.0001);
    assert(std::abs(vec_cov(3, 8) - -0.3333) < 0.0001);
    assert(std::abs(vec_cov(8, 0) - -0.6667) < 0.0001);
    assert(std::abs(vec_cov(8, 4) - -0.6667) < 0.0001);
    assert(std::abs(vec_cov(8, 7) - -0.3333) < 0.0001);
    assert(std::abs(vec_cov(8, 8) - 1.6667) < 0.0001);
}

C++ DataFrame