Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct DotProdVisitor;
This functor class calculates the dot-product of two given columns. It also calculates the magnitude, Euclidean distance, and Manhattan distaice of two columns.

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets. For multidimensiional datasetes, you must use the single_act_visit() interface. These two cases represent different concepts. In case of scalar columns, the two columns are two N-dimensional points. In case of multidimensional columns, the two columns are two samples from a D-dimensional space.

get_result(): Returns the dot product of both columns. In case of scalar columns, this is just the regular dot product. In case of multidimensional columns, this is the dot product of the flattened columns (ML loss calculations, Similarity measures, Gradient descent, Physics simulations, Multivariate time series similarity). In both cases it is just a scalar number.
get_comp_dp(): Returns the vector of component-wise dot products of the multidimensional columns (Multivariate time series analysis, Signal processing, Feature-wise similarity). This returns a valid result only in case of multidimensional datasets.
get_magnitude1(): Returns the magnitude of the first column. In case of a multidimensional column, it is the vector of the magnitudes of the first column.
get_magnitude2(): Returns the magnitude of the second column. In case of a multidimensional column, it is the vector of the magnitudes of the second column.
get_euclidean_dist(): Returns the Euclidean distance of two columns. This returns a valid value only for scalar columns.
get_manhattan_dist(): Returns the Manhattan distance of two columns (Multivariate time series analysis, Signal processing, Feature-wise similarity). This returns a valid value only for scalar columns.
T: Column data type.
I: Index type.
static void test_view_visitors()  {

    std::cout << "\nTesting View visitors ..." << std::endl;

    MyDataFrame df;

    StlVecType<unsigned long>  idxvec = { 1UL, 2UL, 3UL, 4UL, 5UL, 6UL, 7UL, 8UL, 9UL, 10UL };
    StlVecType<double>         dblvec1 = { 1.1, 2.2, 3.3, 4.4, 5.5 };
    StlVecType<double>         dblvec2 = { 2.2, 3.3, 4.4, 5.5, 6.6 };
    StlVecType<double>         dblvec3 = { 0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9 };
    StlVecType<double>         dblvec4 = { 5.9, 4.4, 1.0, 9.8, 5.3, 7.2, 3.8, 4.1 };
    StlVecType<double>         dblvec5 = { 1.1, 5.9, 4.4, 1.0, 9.8, 5.3, 7.2, 3.8, 4.1, 10.1 };
    StlVecType<double>         dblvec6 = { 1.1, 1.1, 3.3, 3.3, 1.1 };

    df.load_data(std::move(idxvec),
                 std::make_pair("dbl_col1", dblvec1),
                 std::make_pair("dbl_col2", dblvec2),
                 std::make_pair("dbl_col3", dblvec3),
                 std::make_pair("dbl_col4", dblvec4),
                 std::make_pair("dbl_col5", dblvec5),
                 std::make_pair("dbl_col6", dblvec6));

    typedef StdDataFrame128<unsigned long>::View MyDataFrameView;

    MyDataFrameView dfv = df.get_view_by_idx<double>(Index2D<unsigned long> { 2, 4 });

    assert(dfv.get_index().size() == 3);

    MeanVisitor<double> mean_visitor;

    assert(fabs(dfv.visit<double>("dbl_col1", mean_visitor).get_result() - 3.3) < 0.00001);

    DotProdVisitor<double>  dp_visitor;

    dfv.visit<double, double>("dbl_col1", "dbl_col2", dp_visitor);
    assert(fabs(dp_visitor.get_result() - 45.98) < 0.00001);
    assert(fabs(dp_visitor.get_magnitude1() - 5.92368) < 0.00001);
    assert(fabs(dp_visitor.get_magnitude2() - 7.77817) < 0.00001);
    assert(fabs(dp_visitor.get_euclidean_dist() - 1.90526) < 0.00001);
    assert(fabs(dp_visitor.get_manhattan_dist() - 3.3) < 0.00001);

    dfv.single_act_visit<double, double>("dbl_col1", "dbl_col2", dp_visitor);
    assert(fabs(dp_visitor.get_result() - 45.98) < 0.00001);
    assert(fabs(dp_visitor.get_magnitude1() - 5.92368) < 0.00001);
    assert(fabs(dp_visitor.get_magnitude2() - 7.77817) < 0.00001);
    assert(fabs(dp_visitor.get_euclidean_dist() - 1.90526) < 0.00001);
    assert(fabs(dp_visitor.get_manhattan_dist() - 3.3) < 0.00001);

    // Now multidimensional data
    //
    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;

    StlVecType<ary_col_t>   md_ary_col1 =
        { {1, 2, 3}, {1, 1, 1}, {2, 1, 1}, {-1, 0, -2}, {2, 1, 1}, {3, 1, 0}, {-2, -1, -2}, {1, -1, 2}, {2, 0, 2}, {1, 3, 2} };
    StlVecType<ary_col_t>   md_ary_col2 =
        { {-1, -2, -3}, {-1, -1, -1}, {-2, -1, -1}, {1, 0, 2}, {-2, -1, -1}, {-3, -1, 0}, {2, 1, 2}, {-1, 1, -2}, {-2, 0, -2}, {-1, -3, -2} };

    df.load_column<ary_col_t>("MD_col 1", std::move(md_ary_col1));
    df.load_column<ary_col_t>("MD_col 2", std::move(md_ary_col2));

    DotProdVisitor<ary_col_t>   md_dp;

    df.single_act_visit<ary_col_t, ary_col_t>("MD_col 1", "MD_col 2", md_dp);
    assert(md_dp.get_result() == -81.0);
    assert(md_dp.get_comp_dp().size() == dim);
    assert(md_dp.get_comp_dp()[0] == -30.0);
    assert(md_dp.get_comp_dp()[2] == -32.0);
    assert(md_dp.get_magnitude1().size() == df.get_index().size());
    assert(md_dp.get_magnitude2().size() == df.get_index().size());
    assert(std::fabs(md_dp.get_magnitude1()[0] - 3.74166) < 0.00001);
    assert(std::fabs(md_dp.get_magnitude2()[0] - 3.74166) < 0.00001);
    assert(std::fabs(md_dp.get_magnitude1()[5] - 3.16228) < 0.00001);
    assert(std::fabs(md_dp.get_magnitude2()[5] - 3.16228) < 0.00001);
    assert(std::fabs(md_dp.get_magnitude1()[9] - 3.74166) < 0.00001);
    assert(std::fabs(md_dp.get_magnitude2()[9] - 3.74166) < 0.00001);
}

C++ DataFrame