| Signature | Description |
|---|---|
template<typename T> struct SeasonalityParams { bool detrend { true }; // Remove trend bool de_serial_corr { false }; // Remove serial correlation by differencing // Parameters to generate trend using LOWESS // The two parameters below must be adjusted for different datasets carefully sometimes by trail and error. // The defaults are suitable for financial market data // std::size_t num_loops { 3 }; // Number of loops T frac { 0.08 }; // The fraction of the data used when estimating each y-value. T delta { 0.0001 }; // Distance with which to use linear-interpolation instead of regression std::size_t sampling_rate { 1 }; // Assume the time series is per 1 unit of time }; |
Parameter to the SeasonalPeriodVisitor constructor |
| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameMLVisitors.h> template<arithmetic T, typename I = unsigned long> struct SeasonalPeriodVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using ssp_v = SeasonalPeriodVisitor<T, I>; |
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This visitor calculates seasonality of the given column (time series). Seasons mean any repeating pattern in your data. It doesn’t have to correspond to natural seasons. To do that you must know your data well. If there are no seasons in the data, the following method may give you misleading clues. You also must know other things (mentioned below) about your data. These are the steps it goes through:
This visitor has the following methods to get results: get_result(): Returns the length of seasons, for scalar input columns. get_period(): Returns the same thing as get_result(). get_max_magnitude(): Returns the maximum frequency magnitude, for scalar input columns. get_dominant_frequency(): Returns the dominant frequency, for scalar input columns. get_dominant_index(): Returns index of thw column corresponding to the dominant frequency, for scalar input columns. get_md_result(): Returns the length of seasons, for multidimensional input columns. get_md_period(): Returns the same thing as get_result(). get_md_max_magnitude(): Returns the maximum frequency magnitude, for multidimensional input columns. get_md_dominant_frequency(): Returns the dominant frequency, for multidimensional input columns. get_md_dominant_index(): Returns index of thw column corresponding to the dominant frequency, for multidimensional input columns. explicit SeasonalPeriodVisitor(const SeasonalityParamsparams: Necessary parameters as explained above. |
T: Column data type. I: Index type. |
static void test_SeasonalPeriodVisitor() { std::cout << "\nTesting SeasonalPeriodVisitor{ } ..." << std::endl; DTDataFrame df; try { df.read("IcecreamProduction.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } ssp_v<double, DateTime> ssp({ .de_serial_corr = true}); df.single_act_visit<double>("IceCreamProduction", ssp); assert(std::fabs(ssp.get_max_magnitude() - 4073.55) < 0.01); assert(ssp.get_dominant_index() == 53); assert(std::fabs(ssp.get_dominant_frequency() - 0.08346) < 0.00001); assert(std::fabs(ssp.get_period() - 11.9811) < 0.0001); assert(ssp.get_period() == ssp.get_result()); // Now multidimensional data // constexpr std::size_t dim { 3 }; using ary_col_t = std::array<double, dim>; using vec_col_t = std::vector<double>; constexpr std::size_t n { 120 }; constexpr double two_pi { 2.0 * M_PI }; constexpr double period_0 { 12.0 }; constexpr double period_1 { 6.0 }; constexpr double period_2 { 4.0 }; // Build index and data vectors // StlVecType<vec_col_t> vec_col(n, vec_col_t(dim)); StlVecType<ary_col_t> ary_col(n, ary_col_t()); for (std::size_t t { 0 }; t < n; ++t) { vec_col[t][0] = std::sin(two_pi * double(t) / period_0); vec_col[t][1] = std::sin(two_pi * double(t) / period_1); vec_col[t][2] = std::sin(two_pi * double(t) / period_2); ary_col[t][0] = vec_col[t][0]; ary_col[t][1] = vec_col[t][1]; ary_col[t][2] = vec_col[t][2]; } df.load_column<vec_col_t>("VEC MD COL", std::move(vec_col), nan_policy::dont_pad_with_nans); df.load_column<ary_col_t>("ARY MD COL", std::move(ary_col), nan_policy::dont_pad_with_nans); // No detrending or de-serial-correlation needed for pure sine waves. // sampling_rate = 1 (one sample per time unit). // SeasonalityParams<double> params; params.detrend = false; params.de_serial_corr = false; params.sampling_rate = 1; SeasonalPeriodVisitor<vec_col_t> vec_sp_v { params }; SeasonalPeriodVisitor<ary_col_t> ary_sp_v { params }; df.single_act_visit<vec_col_t>("VEC MD COL", vec_sp_v); df.single_act_visit<ary_col_t>("ARY MD COL", ary_sp_v); assert(vec_sp_v.get_md_result().size() == dim); assert(std::abs(vec_sp_v.get_md_result()[0] - 12.0) < 1e-9); assert(std::abs(vec_sp_v.get_md_result()[1] - 6.0) < 1e-9); assert(std::abs(vec_sp_v.get_md_result()[2] - 4.0) < 1e-9); assert(ary_sp_v.get_md_result().size() == dim); assert(std::abs(ary_sp_v.get_md_result()[0] - 12.0) < 1e-9); assert(std::abs(ary_sp_v.get_md_result()[1] - 6.0) < 1e-9); assert(std::abs(ary_sp_v.get_md_result()[2] - 4.0) < 1e-9); assert(vec_sp_v.get_md_max_magnitude().size() == dim); assert(std::abs(vec_sp_v.get_md_max_magnitude()[0] - 60.0) < 1e-9); assert(std::abs(vec_sp_v.get_md_max_magnitude()[1] - 60.0) < 1e-9); assert(std::abs(vec_sp_v.get_md_max_magnitude()[2] - 60.0) < 1e-9); assert(ary_sp_v.get_md_max_magnitude().size() == dim); assert(std::abs(ary_sp_v.get_md_max_magnitude()[0] - 60.0) < 1e-9); assert(std::abs(ary_sp_v.get_md_max_magnitude()[1] - 60.0) < 1e-9); assert(std::abs(ary_sp_v.get_md_max_magnitude()[2] - 60.0) < 1e-9); assert(vec_sp_v.get_md_dominant_frequency().size() == dim); assert(std::abs(vec_sp_v.get_md_dominant_frequency()[0] - 0.08333) < 1e-5); assert(std::abs(vec_sp_v.get_md_dominant_frequency()[1] - 0.16667) < 1e-5); assert(std::abs(vec_sp_v.get_md_dominant_frequency()[2] - 0.25) < 1e-9); assert(ary_sp_v.get_md_dominant_frequency().size() == dim); assert(std::abs(ary_sp_v.get_md_dominant_frequency()[0] - 0.08333) < 1e-5); assert(std::abs(ary_sp_v.get_md_dominant_frequency()[1] - 0.16667) < 1e-5); assert(std::abs(ary_sp_v.get_md_dominant_frequency()[2] - 0.25) < 1e-9); assert(vec_sp_v.get_md_dominant_index().size() == dim); assert(vec_sp_v.get_md_dominant_index()[0] == 10); assert(vec_sp_v.get_md_dominant_index()[1] == 20); assert(vec_sp_v.get_md_dominant_index()[2] == 30); assert(ary_sp_v.get_md_dominant_index().size() == dim); assert(ary_sp_v.get_md_dominant_index()[0] == 10); assert(ary_sp_v.get_md_dominant_index()[1] == 20); assert(ary_sp_v.get_md_dominant_index()[2] == 30); }