Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct FirstVisitor;
This functor class chooses the first item in the given column.
The result is the first dataitem in the given column
    explicit
    FirstVisitor(bool skipnan = false);
        
T: Column data type.
I: Index type.
#include <DataFrame/DataFrameStatsVisitors.h>

template<typename T, typename I = unsigned long>
struct LastVisitor;
This functor class chooses the last item in the given column.
The result is the last dataitem in the given column
    explicit
    LastVisitor(bool skipnan = false);
        
T: Column data type.
I: Index type.
    // Now let’s declare two DataFrames with index type of DateTime which is a handy object for date/time manipulations.
    //
    DTDataFrame ibm_dt_df;
    DTDataFrame aapl_dt_df;

    // Let’s read the AAPL and IBM market data from their files. The data for these two stocks start and end at different
    // dates. But there is overlapping data between them.
    //
    ibm_dt_df.read("DT_IBM.csv", io_format::csv2);
    aapl_dt_df.read("DT_AAPL.csv", io_format::csv2);

    // First let’s make sure if there are missing data in our important columns, we fill them up.
    //
    ibm_dt_df.fill_missing<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }, fill_policy::linear_interpolate);

    // Now we join the AAPL and IBM DataFrames using their indices and applying inner-join policy.
    //
    DTDataFrame aapl_ibm = ibm_dt_df.join_by_index<DTDataFrame, double, long>(aapl_dt_df, join_policy::inner_join);

    // Now we calculate the Pearson correlation coefficient between AAPL and IBM close prices. The visitor's data columns are
    // of type double and its index column is of type DateTime.
    //
    CorrVisitor<double, DateTime>   corrl_v;

    std::cout << "Correlation between AAPL and IBM close prices: "
              << aapl_ibm.visit<double, double>("AAPL_Close", "IBM_Close", corrl_v).get_result()
              << std::endl;

    // Now let’s do something more sophisticated and calculate rolling exponentially weighted correlations between IBM and
    // Apple close prices. Since this is a rolling -- moving -- analysis the result is a vector of exponentially weighted
    // correlations for each date in the data stream.
    //
    ewm_corr_v<double>  ewmcorr { exponential_decay_spec::span, 3 };
    const auto          &ewmcorr_result = aapl_ibm.single_act_visit<double, double>("AAPL_Close", "IBM_Close", ewmcorr).get_result();

    std::cout << "The last exponentailly weighted correlation between AAPL and IBM close prices: "
              << ewmcorr_result.back() << std::endl;

    using dt_idx_t = DTDataFrame::IndexType;  // This is just DateTime.

    // Appel data are daily. Let’s create 10-day OHLC (plus a bunch of other stats) for close prices.
    //
    DTDataFrame aapl_ohlc =
        aapl_dt_df.bucketize(bucket_type::by_count,
                             10,
                             LastVisitor<dt_idx_t, dt_idx_t>(),  // How to bucketize the index column
                             std::make_tuple("AAPL_Close",  "Open",          FirstVisitor<double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "High",          MaxVisitor<double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "Low",           MinVisitor<double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "Close",         LastVisitor<double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "Mean",          MeanVisitor<double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "Median",        MedianVisitor<double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "25% Quantile",  QuantileVisitor<double, dt_idx_t>(0.25)),
                             std::make_tuple("AAPL_Close",  "Std",           StdVisitor<double, dt_idx_t>()),
                             // "Mode" column is a column of std::array<ModeVisitor::DataItem, 2>'s
                             std::make_tuple("AAPL_Close",  "Mode",          ModeVisitor<2, double, dt_idx_t>()),
                             std::make_tuple("AAPL_Close",  "MAD",           MADVisitor<double, dt_idx_t>(mad_type::mean_abs_dev_around_mean)),
                             // "Z Score" column is a column of std::vector<double>'s
                             std::make_tuple("AAPL_Close",  "Z Score",       ZScoreVisitor<double, dt_idx_t>()),
                             // "Return Vector" column is a column of std::vector<double>'s
                             std::make_tuple("AAPL_Close",  "Return Vector", ReturnVisitor<double, dt_idx_t>(return_policy::log)),
                             std::make_tuple("AAPL_Volume", "Volume",        SumVisitor<long, dt_idx_t>()));


C++ DataFrame