Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I,
         template<typename> typename C>
struct  NExtremumVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long>
using NLargestVisitor =
    NExtremumVisitor<T, I, std::greater>;

template<typename T, typename I = unsigned long>
using NSmallestVisitor =
    NExtremumVisitor<T, I, std::less>;
This functor class calculates the N smallest/largest values of the the given column, depending on type of C. It runs in O(M * log(N)), where N is the number of extremum values and M is the total number of all values.

     explicit
     NExtremumVisitor(size_type n, bool skipnan = false);
        
Each item in the result is the following struct:
    struct  DataItem  {
        value_type  value { };     // Column value
        index_type  index_val { }; // index value
        size_type   index_idx { }; // 0-based row number of the value and index
    };
        
result_type get_result() const; : Returns the result vector
void sort_by_index_val(); : Sort the result vector by index value
void sort_by_index_idx(); : Sort the result vector by 0-based row number
void sort_by_value(); : Sort the result vector by value
T: Column data type
I: Index type
C: Comparison functor
n: Number of extremum values
skipnan: If true, it skips over NaN values
static void test_largest_smallest_visitors()  {

    std::cout << "\nTesting Largest/Smallest visitors ..." << std::endl;

    StlVecType<unsigned long>   idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 };
    StlVecType<double>          d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
    StlVecType<double>          d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89 };
    StlVecType<double>          d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 };
    StlVecType<int>             i1 = { 22, 23, 24, 25, 99 };
    MyDataFrame                 df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", i1));

    std::cout << "Original DF:" << std::endl;
    df.write<std::ostream, double, int>(std::cout);

    NLargestVisitor<double> nl_visitor { 5, true };

    df.visit<double>("col_3", nl_visitor, true);
    std::cout << "N largest result for col_3:" << std::endl;
    for (auto iter : nl_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;
    nl_visitor.sort_by_index_val();
    std::cout << "N largest result for col_3 sorted by index_val:" << std::endl;
    for (auto iter : nl_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;
    nl_visitor.sort_by_value();
    std::cout << "N largest result for col_3 sorted by value:" << std::endl;
    for (auto iter : nl_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;

    NSmallestVisitor<double>    ns_visitor { 5, true };

    df.visit<double>("col_3", ns_visitor);
    std::cout << "N smallest result for col_3:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;
    ns_visitor.sort_by_index_val();
    std::cout << "N smallest result for col_3 sorted by index_val:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;
    ns_visitor.sort_by_value();
    std::cout << "N smallest result for col_3 sorted by value:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;
    ns_visitor.sort_by_index_idx();
    std::cout << "N smallest result for col_3 sorted by index_idx:" << std::endl;
    for (auto iter : ns_visitor.get_result())
        std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " ";
    std::cout << std::endl;
}

C++ DataFrame