| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameMLVisitors.h> template<typename T, typename I = unsigned long, std::size_t A = 0> struct AnomalyDetectByZScoreVisitor; // ------------------------------------- template<typename T, typename I = unsigned long, std::size_t A = 0> using and_zscr_v = AnomalyDetectByZScoreVisitor<T, I, A>; |
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This visitor applies Z-Score method to find outliers. explicit AnomalyDetectByZScoreVisitor(value_type threshold); threshold: Number of stdev's higher than that it is a outlier |
T: Column data type I: Index type A: Memory alignment boundary for vectors. Default is system default alignment |
static void test_AnomalyDetectByZScoreVisitor() { std::cout << "\nTesting AnomalyDetectByZScoreVisitor{ } ..." << std::endl; constexpr std::size_t item_cnt = 1024; MyStdDataFrame df; df.load_index(MyStdDataFrame::gen_sequence_index(0, item_cnt, 1)); std::vector<double> sine_col; sine_col.reserve(item_cnt); for (std::size_t i = 0; i < item_cnt; ++i) { sine_col.push_back(std::sin(2.0 * M_PI * i / 20.0)); // Base sine wave if (i % 30 == 0) sine_col.back() += 2.0; // Inject anomalies } df.load_column("sine col", std::move(sine_col)); and_zscr_v<double> anomaly1 { 2.0 }; const std::vector<std::size_t> result1 = { 0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 480, 510, 540, 570, 600, 630, 660, 690, 720, 750, 780, 810, 840, 870, 900, 930, 960, 990, 1020 }; df.single_act_visit<double>("sine col", anomaly1); assert((anomaly1.get_result() == result1)); // Now do the same thing for IBM market data // StrDataFrame ibm; try { ibm.read("IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } ibm.get_column<double>("IBM_Close")[502] = 800.0; ibm.get_column<double>("IBM_Close")[1001] = 900.0; ibm.get_column<double>("IBM_Close")[2002] = 850.0; and_zscr_v<double> anomaly2 { 15.0 }; const std::vector<std::size_t> result2 = { 502, 1001, 2002 }; ibm.single_act_visit<double>("IBM_Close", anomaly2); assert((anomaly2.get_result() == result2)); }