| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I, template<typename> typename C> struct NExtremumVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using NLargestVisitor = NExtremumVisitor<T, I, std::greater>; template<typename T, typename I = unsigned long> using NSmallestVisitor = NExtremumVisitor<T, I, std::less>; |
This functor class calculates the N smallest/largest values of the the given column, depending on type of C. It runs in O(M * log(N)), where N is the number of extremum values and M is the total number of all values.
explicit
NExtremumVisitor(size_type n, bool skipnan = false);
Each item in the result is the following struct:
struct DataItem {
value_type value { }; // Column value
index_type index_val { }; // index value
size_type index_idx { }; // 0-based row number of the value and index
};
result_type get_result() const; : Returns the result vectorvoid sort_by_index_val(); : Sort the result vector by index value void sort_by_index_idx(); : Sort the result vector by 0-based row number void sort_by_value(); : Sort the result vector by value |
T: Column data type I: Index type C: Comparison functor n: Number of extremum values skipnan: If true, it skips over NaN values |
static void test_largest_smallest_visitors() { std::cout << "\nTesting Largest/Smallest visitors ..." << std::endl; StlVecType<unsigned long> idx = { 123450, 123451, 123452, 123453, 123454, 123455, 123456, 123457, 123458, 123459, 123460, 123461, 123462, 123466 }; StlVecType<double> d1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; StlVecType<double> d2 = { 8, 9, 10, 11, 12, 13, 14, 20, 22, 23, 30, 31, 32, 1.89 }; StlVecType<double> d3 = { 15, 16, 15, 18, 19, 16, 21, 0.34, 1.56, 0.34, 2.3, 0.34, 19.0 }; StlVecType<int> i1 = { 22, 23, 24, 25, 99 }; MyDataFrame df; df.load_data(std::move(idx), std::make_pair("col_1", d1), std::make_pair("col_2", d2), std::make_pair("col_3", d3), std::make_pair("col_4", i1)); std::cout << "Original DF:" << std::endl; df.write<std::ostream, double, int>(std::cout); NLargestVisitor<double> nl_visitor { 5, true }; df.visit<double>("col_3", nl_visitor, true); std::cout << "N largest result for col_3:" << std::endl; for (auto iter : nl_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; nl_visitor.sort_by_index_val(); std::cout << "N largest result for col_3 sorted by index_val:" << std::endl; for (auto iter : nl_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; nl_visitor.sort_by_value(); std::cout << "N largest result for col_3 sorted by value:" << std::endl; for (auto iter : nl_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; NSmallestVisitor<double> ns_visitor { 5, true }; df.visit<double>("col_3", ns_visitor); std::cout << "N smallest result for col_3:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; ns_visitor.sort_by_index_val(); std::cout << "N smallest result for col_3 sorted by index_val:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; ns_visitor.sort_by_value(); std::cout << "N smallest result for col_3 sorted by value:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; ns_visitor.sort_by_index_idx(); std::cout << "N smallest result for col_3 sorted by index_idx:" << std::endl; for (auto iter : ns_visitor.get_result()) std::cout << iter.index_val << '|' << iter.index_idx << '|' << iter.value << " "; std::cout << std::endl; }