| Signature | Description | Parameters |
|---|---|---|
template<arithmetic T, typename ... Ts> void remove_data_by_hampel(const char *col_name, size_type window_size, hampel_type htype = hampel_type::median, T num_of_stdev = 3); |
This uses an Hampel filter to detect and remove outliers in the named column and all rows corresponding to those outliers in the DataFrame. NOTE: Type T must support arithmetic operations |
T: Type of the named column Ts: The list of types for all columns. A type should be specified only once col_name: Name of the data column window_size: Size of the sliding window for MAD calculations htype: Use either Median or Mean of Absolute Deviation (MAD) num_of_stdev: Number of stdev used in the filter |
static void test_remove_data_by_hampel() { std::cout << "\nTesting remove_data_by_hampel( ) ..." << std::endl; typedef StdDataFrame64<std::string> StrDataFrame; StrDataFrame df; try { df.read("SHORT_IBM.csv", io_format::csv2); assert(df.get_index().size() == 1721); assert(df.get_column<double>("IBM_Open").size() == 1721); assert(df.get_column<double>("IBM_Close").size() == 1721); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; } StrDataFrame df2 = df; auto lbd = [](const std::string &, const double &) -> bool { return (true); }; auto view = df2.get_view_by_sel<double, decltype(lbd), double, long>("IBM_Open", lbd); df.remove_data_by_hampel <double, double, long>("IBM_Close", 10); view.remove_data_by_hampel<double, double, long>("IBM_Close", 10); assert(df.get_index().size() == 1644); assert(view.get_index().size() == 1644); assert(view.get_column<double>("IBM_Open").size() == 1644); assert(view.get_column<long>("IBM_Volume").size() == 1644); assert(df.get_column<double>("IBM_Low").size() == 1644); assert(df.get_column<long>("IBM_Volume").size() == 1644); assert(df.get_index()[500] == "2016-01-25"); assert(view.get_index()[101] == "2014-05-30"); assert(view.get_column<double>("IBM_Open")[45] == 187.550003); assert(df.get_column<long>("IBM_Volume")[300] == 4255400); }