Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameMLVisitors.h>

template<typename T, typename I = unsigned long>
struct  DynamicTimeWarpVisitor

// ---------------------------------------

template<typename T, typename I = unsigned 
using dtw_v = DynamicTimeWarpVisitor<T, I>;
This is a single action visitor, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

Dynamic Time Warping (DTW) is a powerful algorithm used in time series analysis to measure the similarity between two temporal sequences. Unlike traditional distance metrics like Euclidean distance, DTW can handle sequences of different lengths. It can align sequences that may be out of sync, making it particularly useful in fields such as speech recognition, gesture analysis, and finance.
This functor optionally normalizes the columns before calculating the distance. The complexity of the algorithm is O(nXm) and currently it is not multithreaded.

Normalization, especially z-score, makes DTW shape-sensitive but magnitude/offset blind. Two sequences that differ only by a linear scaling or offset will have DTW = 0 after normalization/z-score. This is a feature when comparing signals of different amplitudes (e.g. sensor readings in different units), but a trap if you expect sequences in different regions of space to be far apart. This works with both scalar and multidimensional (i.e. vector and arrays) datasets.
explicit
DynamicTimeWarpVisitor(normalization_type norm_type = normalization_type::none,
                       distance_func &&f = def_dist_);
        
Where def_dist_ is:
static result_type def_dist_(const value_type &x, const value_type &y)  {

    if constexpr (! is_md_)  {  // Scalar path
        return (std::fabs(x - y));
    }
    else  {  // Multidimensional path
        // Works for both std::vector<data_t> and std::array<data_t, N>
        // Requires x.size() == y.size() (same dimensionality)
        //
        result_type sum { 0 };
        auto        xi { std::begin(x) };
        auto        yi { std::begin(y) };

        for (; xi != std::end(x); ++xi, ++yi)  {
            const result_type   diff { static_cast<result_type>(*xi) - static_cast<result_type>(*yi) };

            sum += diff * diff;
        }
        return (std::sqrt(sum));
    }
}
get_results() Returns the numeric distance
T: Column data type
I: Index type
static void test_DynamicTimeWarpVisitor()  {

    std::cout << "\nTesting DynamicTimeWarpVisitor{ } ..." << std::endl;

    StrDataFrame    df;

    try  {
        df.read("IBM.csv", io_format::csv2);
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    dtw_v<double, std::string>  dtw;

    df.single_act_visit<double, double>("IBM_Close", "IBM_Close", dtw);
    assert((std::fabs(dtw.get_result() - 0.0) < 1e-15));

    df.single_act_visit<double, double>("IBM_Open", "IBM_Close", dtw);
    assert((std::fabs(dtw.get_result() - 2682.91) < 0.01));

    df.single_act_visit<double, double>("IBM_High", "IBM_Low", dtw);
    assert((std::fabs(dtw.get_result() - 6255.33) < 0.01));

    df.single_act_visit<double, double>("IBM_Open", "IBM_Low", dtw);
    assert((std::fabs(dtw.get_result() - 3649.94) < 0.01));

    df.single_act_visit<double, double>("IBM_Close", "IBM_Low", dtw);
    assert((std::fabs(dtw.get_result() - 3851.43) < 0.01));

    df.single_act_visit<double, double>("IBM_Open", "IBM_High", dtw);
    assert((std::fabs(dtw.get_result() - 3833.26) < 0.01));

    df.single_act_visit<double, double>("IBM_Close", "IBM_High", dtw);
    assert((std::fabs(dtw.get_result() - 3737.44) < 0.01));

    DynamicTimeWarpVisitor<double, std::string> dtw2 (normalization_type::z_score);

    df.single_act_visit<double, double>("IBM_Close", "IBM_Close", dtw2);
    assert((std::fabs(dtw2.get_result() - 0.0) < 1e-15));

    df.single_act_visit<double, double>("IBM_Open", "IBM_Close", dtw2);
    assert((std::fabs(dtw2.get_result() - 70.4392) < 0.0001));

    df.single_act_visit<double, double>("IBM_High", "IBM_Low", dtw2);
    assert((std::fabs(dtw2.get_result() - 90.0254) < 0.0001));

    df.single_act_visit<double, double>("IBM_Open", "IBM_Low", dtw2);
    assert((std::fabs(dtw2.get_result() - 77.116) < 0.0001));

    df.single_act_visit<double, double>("IBM_Close", "IBM_Low", dtw2);
    assert((std::fabs(dtw2.get_result() - 76.7581) < 0.0001));

    df.single_act_visit<double, double>("IBM_Open", "IBM_High", dtw2);
    assert((std::fabs(dtw2.get_result() - 77.2208) < 0.0001));

    df.single_act_visit<double, double>("IBM_Close", "IBM_High", dtw2);
    assert((std::fabs(dtw2.get_result() - 77.2344) < 0.0001));

    // Now multidimensional data
    //
    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;
    using vec_col_t = std::vector<double>;

    std::vector<vec_col_t>  x_vec  { { 1.0, 0.0, 0.0 }, { 2.0, 1.0, 0.5 }, { 3.0, 2.0, 1.0 }, { 4.0, 3.0, 1.5 }, { 5.0, 4.0, 2.0 }, };
    std::vector<ary_col_t>  x_ary  { { 1.0, 0.0, 0.0 }, { 2.0, 1.0, 0.5 }, { 3.0, 2.0, 1.0 }, { 4.0, 3.0, 1.5 }, { 5.0, 4.0, 2.0 }, };
    std::vector<vec_col_t>  y_vec  { { 1.1, 0.1, 0.1 }, { 1.9, 0.9, 0.4 }, { 2.8, 1.8, 0.9 }, { 3.5, 2.8, 1.4 }, { 4.2, 3.5, 1.7 }, { 5.1, 4.1, 2.1 }, };
    std::vector<ary_col_t>  y_ary  { { 1.1, 0.1, 0.1 }, { 1.9, 0.9, 0.4 }, { 2.8, 1.8, 0.9 }, { 3.5, 2.8, 1.4 }, { 4.2, 3.5, 1.7 }, { 5.1, 4.1, 2.1 }, };
    std::vector<vec_col_t>  z_vec  { { 10.0, 20.0, 30.0 }, { 11.0, 21.0, 31.0 }, { 12.0, 22.0, 32.0 }, { 13.0, 23.0, 33.0 }, { 14.0, 24.0, 34.0 }, };
    std::vector<ary_col_t>  z_ary  { { 10.0, 20.0, 30.0 }, { 11.0, 21.0, 31.0 }, { 12.0, 22.0, 32.0 }, { 13.0, 23.0, 33.0 }, { 14.0, 24.0, 34.0 }, };

    df.load_column<vec_col_t>("X COL VEC", std::move(x_vec), nan_policy::dont_pad_with_nans);
    df.load_column<ary_col_t>("X COL ARY", std::move(x_ary), nan_policy::dont_pad_with_nans);
    df.load_column<vec_col_t>("Y COL VEC", std::move(y_vec), nan_policy::dont_pad_with_nans);
    df.load_column<ary_col_t>("Y COL ARY", std::move(y_ary), nan_policy::dont_pad_with_nans);
    df.load_column<vec_col_t>("Z COL VEC", std::move(z_vec), nan_policy::dont_pad_with_nans);
    df.load_column<ary_col_t>("Z COL ARY", std::move(z_ary), nan_policy::dont_pad_with_nans);

    DynamicTimeWarpVisitor<vec_col_t, std::string>  vec_dtw { normalization_type::z_score };
    DynamicTimeWarpVisitor<ary_col_t, std::string>  ary_dtw { normalization_type::z_score };

    df.single_act_visit<vec_col_t, vec_col_t>("X COL VEC", "Y COL VEC", vec_dtw);
    df.single_act_visit<ary_col_t, ary_col_t>("X COL ARY", "Y COL ARY", ary_dtw);
    assert(std::abs(vec_dtw.get_result() - 1.91304) < 0.00001);
    assert(std::abs(ary_dtw.get_result() - 1.91304) < 0.00001);

    // Z-score normalization makes DTW shape-sensitive but
    // magnitude/offset blind. Two sequences that differ only by a linear
    // scaling or offset will have DTW = 0 after z-score. This is a feature
    // when comparing signals of different amplitudes (e.g. sensor readings
    // in different units), but a trap if you expect sequences in different
    // regions of space to be far apart.
    //
    df.single_act_visit<ary_col_t, ary_col_t>("X COL ARY", "Z COL ARY", ary_dtw);
    assert(std::abs(ary_dtw.get_result() - 0.0) < 1e-12);

    DynamicTimeWarpVisitor<ary_col_t, std::string>  ary_dtw_nn { normalization_type::none };

    df.single_act_visit<ary_col_t, ary_col_t>("X COL ARY", "Z COL ARY", ary_dtw_nn);
    assert(std::abs(ary_dtw_nn.get_result() - 189.879) < 0.001);
}

C++ DataFrame