| Signature | Description | Parameters |
|---|---|---|
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct FirstVisitor; |
This functor class chooses the first item in the given column. The result is the first dataitem in the given column
explicit
FirstVisitor(bool skipnan = false);
|
T: Column data type. I: Index type. |
#include <DataFrame/DataFrameStatsVisitors.h> template<typename T, typename I = unsigned long> struct LastVisitor; |
This functor class chooses the last item in the given column. The result is the last dataitem in the given column
explicit
LastVisitor(bool skipnan = false);
|
T: Column data type. I: Index type. |
// Now let’s declare two DataFrames with index type of DateTime which is a handy object for date/time manipulations. // DTDataFrame ibm_dt_df; DTDataFrame aapl_dt_df; // Let’s read the AAPL and IBM market data from their files. The data for these two stocks start and end at different // dates. But there is overlapping data between them. // ibm_dt_df.read("DT_IBM.csv", io_format::csv2); aapl_dt_df.read("DT_AAPL.csv", io_format::csv2); // First let’s make sure if there are missing data in our important columns, we fill them up. // ibm_dt_df.fill_missing<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }, fill_policy::linear_interpolate); // Now we join the AAPL and IBM DataFrames using their indices and applying inner-join policy. // DTDataFrame aapl_ibm = ibm_dt_df.join_by_index<DTDataFrame, double, long>(aapl_dt_df, join_policy::inner_join); // Now we calculate the Pearson correlation coefficient between AAPL and IBM close prices. The visitor's data columns are // of type double and its index column is of type DateTime. // CorrVisitor<double, DateTime> corrl_v; std::cout << "Correlation between AAPL and IBM close prices: " << aapl_ibm.visit<double, double>("AAPL_Close", "IBM_Close", corrl_v).get_result() << std::endl; // Now let’s do something more sophisticated and calculate rolling exponentially weighted correlations between IBM and // Apple close prices. Since this is a rolling -- moving -- analysis the result is a vector of exponentially weighted // correlations for each date in the data stream. // ewm_corr_v<double> ewmcorr { exponential_decay_spec::span, 3 }; const auto &ewmcorr_result = aapl_ibm.single_act_visit<double, double>("AAPL_Close", "IBM_Close", ewmcorr).get_result(); std::cout << "The last exponentailly weighted correlation between AAPL and IBM close prices: " << ewmcorr_result.back() << std::endl; using dt_idx_t = DTDataFrame::IndexType; // This is just DateTime. // Appel data are daily. Let’s create 10-day OHLC (plus a bunch of other stats) for close prices. // DTDataFrame aapl_ohlc = aapl_dt_df.bucketize(bucket_type::by_count, 10, LastVisitor<dt_idx_t, dt_idx_t>(), // How to bucketize the index column std::make_tuple("AAPL_Close", "Open", FirstVisitor<double, dt_idx_t>()), std::make_tuple("AAPL_Close", "High", MaxVisitor<double, dt_idx_t>()), std::make_tuple("AAPL_Close", "Low", MinVisitor<double, dt_idx_t>()), std::make_tuple("AAPL_Close", "Close", LastVisitor<double, dt_idx_t>()), std::make_tuple("AAPL_Close", "Mean", MeanVisitor<double, dt_idx_t>()), std::make_tuple("AAPL_Close", "Median", MedianVisitor<double, dt_idx_t>()), std::make_tuple("AAPL_Close", "25% Quantile", QuantileVisitor<double, dt_idx_t>(0.25)), std::make_tuple("AAPL_Close", "Std", StdVisitor<double, dt_idx_t>()), // "Mode" column is a column of std::array<ModeVisitor::DataItem, 2>'s std::make_tuple("AAPL_Close", "Mode", ModeVisitor<2, double, dt_idx_t>()), std::make_tuple("AAPL_Close", "MAD", MADVisitor<double, dt_idx_t>(mad_type::mean_abs_dev_around_mean)), // "Z Score" column is a column of std::vector<double>'s std::make_tuple("AAPL_Close", "Z Score", ZScoreVisitor<double, dt_idx_t>()), // "Return Vector" column is a column of std::vector<double>'s std::make_tuple("AAPL_Close", "Return Vector", ReturnVisitor<double, dt_idx_t>(return_policy::log)), std::make_tuple("AAPL_Volume", "Volume", SumVisitor<long, dt_idx_t>()));