| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameMLVisitors.h> template<arithmetic T, typename I = unsigned long> struct LSTMForecastVisitor; // ------------------------------------- template<typename T, typename I = unsigned long> using lstm_v = LSTMForecastVisitor<T, I>; |
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. Long Short-Term Memory (LSTM) forecasting is a technique that uses a type of recurrent neural network to predict future values in a time series, excelling at capturing long-term dependencies in sequential data. LSTMs are effective for forecasting because their architecture, featuring a memory cell and three gates, allows them to remember and process information from past time steps, overcoming the vanishing gradient problem that affects traditional recurrent neural networks. This makes them well-suited for predicting future trends in fields like stock prices, streamflow, and groundwater levels. This works with both scalar and multidimensional (i.e. vectors and arrays) datasets. This visitor has the following methods to get result: get_result(): Retruns a vector of forecasted datapoints for the next periods periods ahead. In case of multidimensional input column, the result is a vector of input column type. If input is a column of vectors, the result will be vector of vectors. If input is a column of arrays, the result will be vector of arrays.
explicit
LSTMForecastVisitor(long hidden_size = 32,
long seq_len = 10,
long batch_size = 1,
long epochs = 20,
data_t learning_rate = 0.001,
long periods = 3,
unsigned int seed = static_cast
|
T: Column data type. I: Index type. |
static void test_LSTMForecastVisitor() { std::cout << "\nTesting LSTMForecastVisitor{ } ..." << std::endl; std::vector<unsigned long> idxvec = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; std::vector<double> col1 = { 266.0, 145.9, 183.1, 119.3, 180.3, 168.5, 231.8, 224.5, 192.8, 122.9, 336.5, 185.9, 194.3 }; std::vector<double> oscil = { 1.5, 1.8, 1.62, 1.78, 1.5, 1.68, 1.6, 1.8, 1.71, 1.9, 1.78, 1.84, 1.69 }; std::vector<double> constant = { 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56, 10.56 }; std::vector<double> increasing = { 10.56, 10.68, 10.78, 10.90, 11.01, 11.45, 11.99, 12.01, 12.21, 12.35, 12.67, 13.89, 14.01 }; std::vector<double> decreasing = { 10.56, 10.30, 10.12, 10.01, 9.80, 9.74, 9.41, 9.03, 9.0, 8.20, 8.01, 7.9, 7.55 }; ULDataFrame df; df.load_data(std::move(idxvec), std::make_pair("col1", col1), std::make_pair("oscil", oscil), std::make_pair("constant", constant), std::make_pair("increasing", increasing), std::make_pair("decreasing", decreasing)); lstm_v<double> lstm { 40, 4, 1, 100, 0.001, 3, 123 }; df.single_act_visit<double>("col1", lstm); const auto result1 = lstm.get_result(); assert(result1.size() == 3); assert(std::fabs(result1[0] - 177.075) < 0.001); assert(std::fabs(result1[1] - 181.576) < 0.001); assert(std::fabs(result1[2] - 185.103) < 0.001); lstm_v<double> lstm2 { 40, 4, 1, 1000, 0.001, 3, 123 }; df.single_act_visit<double>("oscil", lstm2); const auto result2 = lstm2.get_result(); assert(result2.size() == 3); assert(std::fabs(result2[0] - 1.7698) < 0.00001); assert(std::fabs(result2[1] - 1.64903) < 0.00001); assert(std::fabs(result2[2] - 1.74927) < 0.00001); df.single_act_visit<double>("constant", lstm); const auto result3 = lstm.get_result(); assert(result3.size() == 3); assert(std::fabs(result3[0] - 10.56) < 0.00001); assert(std::fabs(result3[1] - 10.56) < 0.00001); assert(std::fabs(result3[2] - 10.56) < 0.00001); lstm_v<double> lstm3 { 100, 4, 1, 100, 0.001, 3, 123 }; df.single_act_visit<double>("increasing", lstm3); const auto result4 = lstm3.get_result(); // It doesn't see the pattern here // assert(result4.size() == 3); assert(std::fabs(result4[0] - 13.4408) < 0.0001); assert(std::fabs(result4[1] - 13.4358) < 0.0001); assert(std::fabs(result4[2] - 13.4193) < 0.0001); df.single_act_visit<double>("decreasing", lstm3); const auto result5 = lstm3.get_result(); // Strangely, it sees the pattern here // assert(result5.size() == 3); assert(std::fabs(result5[0] - 7.23448) < 0.00001); assert(std::fabs(result5[1] - 7.14929) < 0.00001); assert(std::fabs(result5[2] - 7.10167) < 0.00001); // Now some real data // StrDataFrame df2; try { df2.read("IBM.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; ::exit(-1); } lstm_v<double> lstm4 { 50, 4, 1, 100, 0.001, 4, 123 }; df2.single_act_visit<double>("IBM_Close", lstm4); const auto result6 = lstm4.get_result(); assert(result6.size() == 4); assert(std::fabs(result6[0] - 118.424) < 0.001); assert(std::fabs(result6[1] - 132.139) < 0.001); assert(std::fabs(result6[2] - 132.311) < 0.001); assert(std::fabs(result6[3] - 132.382) < 0.001); // Now multidimensional data // constexpr std::size_t dim { 3 }; using ary_col_t = std::array<double, dim>; using vec_col_t = std::vector<double>; // 3-dimensional time-series: 60 time steps, 3 features each // Feature 0: price-like (trending upward with noise) // Feature 1: volume-like (mean-reverting around 1000) // Feature 2: volatility-like (small positive values) // std::vector<ary_col_t> ary_col { { 100.0, 980.0, 0.012 }, { 101.2, 995.0, 0.015 }, { 100.8, 1020.0, 0.011 }, { 102.1, 1005.0, 0.018 }, { 103.4, 990.0, 0.014 }, { 102.9, 1010.0, 0.013 }, { 104.2, 1030.0, 0.016 }, { 105.0, 970.0, 0.020 }, { 104.6, 985.0, 0.017 }, { 106.1, 1015.0, 0.014 }, { 107.3, 1025.0, 0.013 }, { 106.8, 995.0, 0.019 }, { 108.0, 1040.0, 0.015 }, { 107.5, 1010.0, 0.012 }, { 109.2, 980.0, 0.016 }, { 110.0, 1000.0, 0.014 }, { 109.7, 1020.0, 0.018 }, { 111.1, 975.0, 0.021 }, { 110.5, 990.0, 0.017 }, { 112.0, 1035.0, 0.013 }, { 111.8, 1015.0, 0.015 }, { 113.2, 985.0, 0.019 }, { 112.9, 1005.0, 0.016 }, { 114.0, 1025.0, 0.014 }, { 113.5, 995.0, 0.018 }, { 115.1, 970.0, 0.022 }, { 114.8, 1010.0, 0.015 }, { 116.0, 980.0, 0.017 }, { 115.6, 1030.0, 0.013 }, { 117.2, 1000.0, 0.016 }, { 116.9, 1020.0, 0.019 }, { 118.0, 990.0, 0.014 }, { 117.5, 1040.0, 0.012 }, { 119.1, 975.0, 0.020 }, { 118.8, 1005.0, 0.018 }, { 120.0, 995.0, 0.015 }, { 119.7, 1025.0, 0.017 }, { 121.2, 985.0, 0.021 }, { 120.9, 1015.0, 0.013 }, { 122.0, 970.0, 0.016 }, { 121.6, 1000.0, 0.019 }, { 123.1, 990.0, 0.014 }, { 122.8, 1030.0, 0.018 }, { 124.0, 1010.0, 0.015 }, { 123.5, 980.0, 0.017 }, { 125.2, 1020.0, 0.012 }, { 124.9, 995.0, 0.020 }, { 126.0, 975.0, 0.016 }, { 125.6, 1005.0, 0.019 }, { 127.1, 1035.0, 0.014 }, { 126.8, 985.0, 0.018 }, { 128.0, 1015.0, 0.015 }, { 127.5, 990.0, 0.021 }, { 129.2, 1000.0, 0.013 }, { 128.9, 1025.0, 0.017 }, { 130.0, 970.0, 0.016 }, { 129.7, 980.0, 0.020 }, { 131.1, 1010.0, 0.014 }, { 130.8, 1030.0, 0.018 }, { 132.0, 995.0, 0.015 }, }; std::vector<vec_col_t> vec_col { { 100.0, 980.0, 0.012 }, { 101.2, 995.0, 0.015 }, { 100.8, 1020.0, 0.011 }, { 102.1, 1005.0, 0.018 }, { 103.4, 990.0, 0.014 }, { 102.9, 1010.0, 0.013 }, { 104.2, 1030.0, 0.016 }, { 105.0, 970.0, 0.020 }, { 104.6, 985.0, 0.017 }, { 106.1, 1015.0, 0.014 }, { 107.3, 1025.0, 0.013 }, { 106.8, 995.0, 0.019 }, { 108.0, 1040.0, 0.015 }, { 107.5, 1010.0, 0.012 }, { 109.2, 980.0, 0.016 }, { 110.0, 1000.0, 0.014 }, { 109.7, 1020.0, 0.018 }, { 111.1, 975.0, 0.021 }, { 110.5, 990.0, 0.017 }, { 112.0, 1035.0, 0.013 }, { 111.8, 1015.0, 0.015 }, { 113.2, 985.0, 0.019 }, { 112.9, 1005.0, 0.016 }, { 114.0, 1025.0, 0.014 }, { 113.5, 995.0, 0.018 }, { 115.1, 970.0, 0.022 }, { 114.8, 1010.0, 0.015 }, { 116.0, 980.0, 0.017 }, { 115.6, 1030.0, 0.013 }, { 117.2, 1000.0, 0.016 }, { 116.9, 1020.0, 0.019 }, { 118.0, 990.0, 0.014 }, { 117.5, 1040.0, 0.012 }, { 119.1, 975.0, 0.020 }, { 118.8, 1005.0, 0.018 }, { 120.0, 995.0, 0.015 }, { 119.7, 1025.0, 0.017 }, { 121.2, 985.0, 0.021 }, { 120.9, 1015.0, 0.013 }, { 122.0, 970.0, 0.016 }, { 121.6, 1000.0, 0.019 }, { 123.1, 990.0, 0.014 }, { 122.8, 1030.0, 0.018 }, { 124.0, 1010.0, 0.015 }, { 123.5, 980.0, 0.017 }, { 125.2, 1020.0, 0.012 }, { 124.9, 995.0, 0.020 }, { 126.0, 975.0, 0.016 }, { 125.6, 1005.0, 0.019 }, { 127.1, 1035.0, 0.014 }, { 126.8, 985.0, 0.018 }, { 128.0, 1015.0, 0.015 }, { 127.5, 990.0, 0.021 }, { 129.2, 1000.0, 0.013 }, { 128.9, 1025.0, 0.017 }, { 130.0, 970.0, 0.016 }, { 129.7, 980.0, 0.020 }, { 131.1, 1010.0, 0.014 }, { 130.8, 1030.0, 0.018 }, { 132.0, 995.0, 0.015 }, }; df2.load_column<ary_col_t>("ARY COL", std::move(ary_col), nan_policy::dont_pad_with_nans); df2.load_column<vec_col_t>("VEC COL", std::move(vec_col), nan_policy::dont_pad_with_nans); lstm_v<ary_col_t, std::string> ary_lstm { 16, 10, 1, 30, 0.001, 4, 42 }; lstm_v<vec_col_t, std::string> vec_lstm { 16, 10, 1, 30, 0.001, 4, 42 }; df2.single_act_visit<ary_col_t>("ARY COL", ary_lstm); df2.single_act_visit<vec_col_t>("VEC COL", vec_lstm); assert(ary_lstm.get_result().size() == 4); for (const auto &ary : ary_lstm.get_result()) assert(ary.size() == dim); assert(vec_lstm.get_result().size() == 4); for (const auto &vec : vec_lstm.get_result()) assert(vec.size() == dim); assert(std::abs(ary_lstm.get_result()[0][0] - 125.733) < 0.001); assert(std::abs(ary_lstm.get_result()[0][1] - 1001.01) < 0.01); assert(std::abs(ary_lstm.get_result()[2][1] - 1000.86) < 0.01); assert(std::abs(ary_lstm.get_result()[3][2] - 0.016641) < 0.000001); assert(std::abs(vec_lstm.get_result()[0][0] - 125.733) < 0.001); assert(std::abs(vec_lstm.get_result()[0][1] - 1001.01) < 0.01); assert(std::abs(vec_lstm.get_result()[2][1] - 1000.86) < 0.01); assert(std::abs(vec_lstm.get_result()[3][2] - 0.016641) < 0.000001); }