Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameMLVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct AnomalyDetectByZScoreVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using and_zscr_v = AnomalyDetectByZScoreVisitor<T, I, A>;
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This visitor applies Z-Score method to find outliers.
explicit
AnomalyDetectByZScoreVisitor(value_type threshold);

threshold: Number of stdev's higher than that it is a outlier
T: Column data type
I: Index type
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_AnomalyDetectByZScoreVisitor()  {

    std::cout << "\nTesting AnomalyDetectByZScoreVisitor{ } ..." << std::endl;

    constexpr std::size_t   item_cnt = 1024;
    MyStdDataFrame          df;

    df.load_index(MyStdDataFrame::gen_sequence_index(0, item_cnt, 1));

    std::vector<double>   sine_col;

    sine_col.reserve(item_cnt);
    for (std::size_t i = 0; i < item_cnt; ++i)  {
        sine_col.push_back(std::sin(2.0 * M_PI * i / 20.0)); // Base sine wave
        if (i % 30 == 0)  sine_col.back() += 2.0;  // Inject anomalies
    }
    df.load_column("sine col", std::move(sine_col));

    and_zscr_v<double>              anomaly1 { 2.0 };
    const std::vector<std::size_t>  result1 =
        { 0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 480, 510, 540,
          570, 600, 630, 660, 690, 720, 750, 780, 810, 840, 870, 900, 930, 960, 990, 1020 };

    df.single_act_visit<double>("sine col", anomaly1);
    assert((anomaly1.get_result() == result1));

    // Now do the same thing for IBM market data
    //
    StrDataFrame    ibm;

    try  {
        ibm.read("IBM.csv", io_format::csv2);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }
    ibm.get_column<double>("IBM_Close")[502] = 800.0;
    ibm.get_column<double>("IBM_Close")[1001] = 900.0;
    ibm.get_column<double>("IBM_Close")[2002] = 850.0;

    and_zscr_v<double>              anomaly2 { 15.0 };
    const std::vector<std::size_t>  result2 = { 502, 1001, 2002 };

    ibm.single_act_visit<double>("IBM_Close", anomaly2);
    assert((anomaly2.get_result() == result2));
}

C++ DataFrame