Back to Documentations

Signature Description Parameters
template<arithmetic T, typename ... Ts>
void
remove_data_by_hampel(const char *col_name,
                      size_type window_size,
                      hampel_type htype = hampel_type::median,
                      T num_of_stdev = 3);
This uses an Hampel filter to detect and remove outliers in the named column and all rows corresponding to those outliers in the DataFrame.

NOTE: Type T must support arithmetic operations
T: Type of the named column
Ts: The list of types for all columns. A type should be specified only once
col_name: Name of the data column
window_size: Size of the sliding window for MAD calculations
htype: Use either Median or Mean of Absolute Deviation (MAD)
num_of_stdev: Number of stdev used in the filter
static void test_remove_data_by_hampel()  {

    std::cout << "\nTesting remove_data_by_hampel( ) ..." << std::endl;

    typedef StdDataFrame64<std::string> StrDataFrame;

    StrDataFrame    df;

    try  {
        df.read("SHORT_IBM.csv", io_format::csv2);
        assert(df.get_index().size() == 1721);
        assert(df.get_column<double>("IBM_Open").size() == 1721);
        assert(df.get_column<double>("IBM_Close").size() == 1721);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
    }

    StrDataFrame    df2 = df;

    auto    lbd = [](const std::string &, const double &) -> bool { return (true); };
    auto    view = df2.get_view_by_sel<double, decltype(lbd), double, long>("IBM_Open", lbd);

    df.remove_data_by_hampel <double, double, long>("IBM_Close", 10);
    view.remove_data_by_hampel<double, double, long>("IBM_Close", 10);

    assert(df.get_index().size() == 1644);
    assert(view.get_index().size() == 1644);
    assert(view.get_column<double>("IBM_Open").size() == 1644);
    assert(view.get_column<long>("IBM_Volume").size() == 1644);
    assert(df.get_column<double>("IBM_Low").size() == 1644);
    assert(df.get_column<long>("IBM_Volume").size() == 1644);
    assert(df.get_index()[500] == "2016-01-25");
    assert(view.get_index()[101] == "2014-05-30");
    assert(view.get_column<double>("IBM_Open")[45] == 187.550003);
    assert(df.get_column<long>("IBM_Volume")[300] == 4255400);
}

C++ DataFrame