Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct CountVisitor;
This is a single action visitor, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor class counts the number of data points in the given column.
The result is a coutn number
    explicit
    CountVisitor(bool skipnan = false);
        
T: Column data type.
I: Index type.
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct CumCountVisitor;
This is a single action visitor, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor class cumulatively counts the number of non-NaN data points in the given column.
The result is a vector of running counts
    CumCountVisitor();
        
T: Column data type.
I: Index type.
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_groupby_2()  {

    std::cout << "\nTesting groupby_2( ) ..." << std::endl;

    StlVecType<unsigned long>  ulgvec2 ={ 123450, 123451, 123452, 123450, 123455, 123450, 123449,123450, 123451, 123450, 123452, 123450, 123455, 123450,
                                          123454, 123450, 123450, 123457, 123458, 123459, 123450, 123441, 123442, 123432, 123450, 123450, 123435, 123450 };
    StlVecType<unsigned long>  xulgvec2 = ulgvec2;
    StlVecType<int>            intvec2 = { 1, 2, 3, 4, 5, 3, 7, 3, 9, 10, 3, 2, 3, 14, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 36, 2, 45, 2 };
    StlVecType<double>         xdblvec2 = { 10, 20, 11, 11, 30, 40, 50, 40, 60, 70, 80, 90, 50, 100, 11, 25, 20, 30, 1, 3, 4, 12, 6, 2, 3, 10, 4, 5 };
    StlVecType<double>         dblvec22 = { 0.998, 1.545, 0.056, 0.15678, 1.545, 0.923, 0.06743, 0.1, -1.545, 0.07865, -0.9999, 1.545, 0.1002, -0.8888,
                                            0.14, 0.0456, -1.545, -0.8999, 0.01119, 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, 1.545, 0.1902, -1.545 };
    StlVecType<std::string>    strvec2 = { "A", "B", "C", "D", "X", "Y", "W", "P", "Z", "S", "M", "B", "A", "H", "X", "Q", "V", "P", "W", "K", "I", "L", "J", "N", "Y", "G", "T", "U" };
    StlVecType<double>         dblvec33 = { 0.998, 1.545, 0.056, 0.15678, 1.545, std::sqrt(-1), 0.06743, 0.1, -1.545, std::sqrt(-1), -0.9999, 1.545, 0.1002, -0.8888,
                                            0.14, 0.0456, -1.545, -0.8999, std::sqrt(-1), 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, std::sqrt(-1), 0.1902, -1.545 };

    MyDataFrame df;

    df.load_data(std::move(ulgvec2),
                 std::make_pair("xint_col", intvec2),
                 std::make_pair("dbl_col", xdblvec2),
                 std::make_pair("dbl_col_2", dblvec22),
                 std::make_pair("dbl_col_3", dblvec33),
                 std::make_pair("str_col", strvec2),
                 std::make_pair("ul_col", xulgvec2));

    auto    vw = df.get_view<double, int, unsigned long, std::string>({ "xint_col", "dbl_col", "dbl_col_2", "str_col", "ul_col" });
    auto    result1 = df.groupby2<unsigned long, double>(DF_INDEX_COL_NAME,
                                                         "dbl_col_2",
                                                         LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(),
                                                         std::make_tuple("str_col",  "sum_str",  SumVisitor<std::string>()),
                                                         std::make_tuple("xint_col",  "max_int", MaxVisitor<int>()),
                                                         std::make_tuple("xint_col",  "min_int", MinVisitor<int>()),
                                                         std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                                         std::make_tuple("dbl_col",   "sum_dbl", SumVisitor<double>()));
    auto    result1_from_vw = vw.groupby2<unsigned long, double>(DF_INDEX_COL_NAME,
                                                                 "dbl_col_2",
                                                                 LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(),
                                                                 std::make_tuple("str_col",  "sum_str",  SumVisitor<std::string>()),
                                                                 std::make_tuple("xint_col",  "max_int", MaxVisitor<int>()),
                                                                 std::make_tuple("xint_col",  "min_int", MinVisitor<int>()),
                                                                 std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
                                                                 std::make_tuple("dbl_col",   "sum_dbl", SumVisitor<double>()));

    result1.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);
    assert(result1.get_index()[4] == result1_from_vw.get_index()[4]);
    assert((result1.get_column<int>("max_int")[8] == result1_from_vw.get_column<int>("max_int")[8]));
}

C++ DataFrame