Back to Documentations

Signature Description

enum class loss_function_type : unsigned char  {
    // P = Probability(Actual), Q = Probability(Model)
    kullback_leibler = 1,      // L = ∑[P(x) * log 
P(x) / Q(x)
] // y = Actual, ŷ = Model mean_abs_error = 2, // L =
∑|yi - ŷi| / N
// y = Actual, ŷ = Model mean_sqr_error = 3, // L =
∑(yi - ŷi)2 / N
// y = Actual, ŷ = Model mean_sqr_log_error = 4, // L =
∑[(log(1 + yi) - log(1 + ŷi))2] / N
// y = Actual, P(yi) = Model probability prediction cross_entropy = 5, // L =
-∑[yi * log(P(yi))] / N
// y = Actual binary (0/1), P(yi) = Model probability prediction binary_cross_entropy = 6, // L =
∑[-(yi * log(P(yi))) + (1 - yi) * log(1 - P(yi))] / N
// y = Actual, ŷ = Model categorical_hinge = 7, // L = max[∑[(1 - yi) * ŷi] - ∑[yi * ŷi] + 1, 0] // Y = Actual, Ŷ = Model // This only applies to scalar columns (not multidimensional). cosine_similarity = 8, // L =
Y . Ŷ / ||Y|| * ||Ŷ||
// y = Actual, ŷ = Model log_cosh = 9, // L =
∑log(cosh(ŷi - yi)) / N
};
Different loss function types

Signature Description Parameters
#include <DataFrame/DataFrameMLVisitors.h>

template<typename T, typename I = unsigned long>
struct LossFunctionVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long>
using loss_v = LossFunctionVisitor<T, I>;
        
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This visitor implements different loss functions specified above. It needs two columns actual and predicted or model. The result is a single figure.

This works with both scalar and multidimensional (i.e. vectors or arrays) datasets.

get_result() returns the loss value. In case of a multidimensional column, it returns a vector of loss vectors. Each inner vector is the length of data dimension.

explicit
LossFunctionVisitor(loss_function_type lft);
        
T: Column data type.
I: Index type.
static void test_LossFunctionVisitor()  {

    std::cout << "\nTesting LossFunctionVisitor{  } ..." << std::endl;

    using IntDataFrame = StdDataFrame256<int>;

    IntDataFrame            df;
    StlVecType<int>         idxvec = { 1, 2, 3, 10, 5, 7, 8, 12, 9, 12, 10, 13, 10, 15, 14 };
    StlVecType<double>      actual = { 1.0, 15.0, 14.0, 2.0, 1.0, 12.0, 11.0, 8.0, 7.0, 4.0, 5.0, 4.0, 3.0, 9.0, 10.0 };
    StlVecType<double>      bin_actual = { 1, 0, 1, 1, 1.0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1 };
    StlVecType<double>      model = { 1.01, 14.908, 14.03, 1.0, 1.5, 12.0, 19.75, 8.6, 7.1, 4.8, 4.4, 4.0, 3.4, 9.0, 9.098 };
    StlVecType<double>      model_prob = { 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667, 0.06667 };
    StlVecType<std::string> strvec = { "zz", "bb", "cc", "ww", "ee", "ff", "gg", "hh", "ii", "jj", "kk", "ll", "mm", "nn", "oo" };

    df.load_data(std::move(idxvec),
                 std::make_pair("actual", actual),
                 std::make_pair("binary actual", bin_actual),
                 std::make_pair("model", model),
                 std::make_pair("model_prob", model_prob),
                 std::make_pair("str_col", strvec));

    loss_v<double, int> loss { loss_function_type::kullback_leibler };

    df.single_act_visit<double, double>("actual", "model_prob", loss);
    assert(std::abs(loss.get_result() - 517.6888) < 0.0001);

    loss_v<double, int> loss2 { loss_function_type::mean_abs_error };

    df.single_act_visit<double, double>("actual", "model", loss2);
    assert(std::abs(loss2.get_result() - 0.9189) < 0.0001);

    loss_v<double, int> loss3 { loss_function_type::mean_sqr_error };

    df.single_act_visit<double, double>("actual", "model", loss3);
    assert(std::abs(loss3.get_result() - 5.3444) < 0.0001);

    loss_v<double, int> loss4 { loss_function_type::mean_sqr_log_error };

    df.single_act_visit<double, double>("actual", "model", loss4);
    assert(std::abs(loss4.get_result() - 0.0379) < 0.0001);

    loss_v<double, int> loss5 { loss_function_type::categorical_hinge };

    df.single_act_visit<double, double>("actual", "model", loss5);
    assert(std::abs(loss5.get_result() - 0) < 0.0001);

    loss_v<double, int> loss6 { loss_function_type::cosine_similarity };

    df.single_act_visit<double, double>("actual", "model", loss6);
    assert(std::abs(loss6.get_result() - 0.9722) < 0.0001);

    loss_v<double, int> loss7 { loss_function_type::log_cosh };

    df.single_act_visit<double, double>("actual", "model", loss7);
    assert(std::abs(loss7.get_result() - 0.646) < 0.0001);

    loss_v<double, int> loss8 { loss_function_type::binary_cross_entropy };

    df.single_act_visit<double, double>("binary actual", "model_prob", loss8);
    assert(std::abs(loss8.get_result() - 1.6524) < 0.0001);

    loss_v<double, int> loss9 { loss_function_type::cross_entropy };

    df.single_act_visit<double, double>("actual", "model_prob", loss9);
    assert(std::abs(loss9.get_result() - 19.1365) < 0.0001);

    // Now multidimensional data
    //
    constexpr std::size_t   dim { 3 };

    using ary_col_t = std::array<double, dim>;
    using vec_col_t = std::vector<double>;

    StlVecType<ary_col_t>   ary_actual  {
        { 0.933, 0.033, 0.033 }, { 0.033, 0.933, 0.033 }, { 0.033, 0.033, 0.933 }, { 0.933, 0.033, 0.033 }, { 0.033, 0.933, 0.033 }, { 0.033, 0.033, 0.933 },
    };
    StlVecType<vec_col_t>   vec_actual  {
        { 0.933, 0.033, 0.033 }, { 0.033, 0.933, 0.033 }, { 0.033, 0.033, 0.933 }, { 0.933, 0.033, 0.033 }, { 0.033, 0.933, 0.033 }, { 0.033, 0.033, 0.933 },
    };
    StlVecType<ary_col_t>   ary_model  {
        { 0.700, 0.200, 0.100 }, { 0.100, 0.800, 0.100 }, { 0.200, 0.300, 0.500 }, { 0.400, 0.400, 0.200 }, { 0.100, 0.600, 0.300 }, { 0.100, 0.100, 0.800 },
    };
    StlVecType<vec_col_t>   vec_model  {
        { 0.700, 0.200, 0.100 }, { 0.100, 0.800, 0.100 }, { 0.200, 0.300, 0.500 }, { 0.400, 0.400, 0.200 }, { 0.100, 0.600, 0.300 }, { 0.100, 0.100, 0.800 },
    };

    df.load_column<ary_col_t>("ARY ACTUAL", std::move(ary_actual), nan_policy::dont_pad_with_nans);
    df.load_column<vec_col_t>("VEC ACTUAL", std::move(vec_actual), nan_policy::dont_pad_with_nans);
    df.load_column<ary_col_t>("ARY MODEL", std::move(ary_model), nan_policy::dont_pad_with_nans);
    df.load_column<vec_col_t>("VEC MODEL", std::move(vec_model), nan_policy::dont_pad_with_nans);

    loss_v<ary_col_t, int>  ary_loss1 { loss_function_type::kullback_leibler };
    loss_v<vec_col_t, int>  vec_loss1 { loss_function_type::kullback_leibler };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss1);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss1);
    assert(ary_loss1.get_result().size() == dim);
    assert(vec_loss1.get_result().size() == dim);
    assert(std::abs(ary_loss1.get_result()[0] - 0.889052) < 0.000001);
    assert(std::abs(ary_loss1.get_result()[2] - 0.52002) < 0.00001);
    assert(std::abs(vec_loss1.get_result()[0] - 0.889052) < 0.000001);
    assert(std::abs(vec_loss1.get_result()[2] - 0.52002) < 0.00001);

    loss_v<ary_col_t, int>  ary_loss2 { loss_function_type::mean_abs_error };
    loss_v<vec_col_t, int>  vec_loss2 { loss_function_type::mean_abs_error };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss2);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss2);
    assert(ary_loss2.get_result().size() == dim);
    assert(vec_loss2.get_result().size() == dim);
    assert(std::abs(ary_loss2.get_result()[0] - 0.189) < 0.001);
    assert(std::abs(ary_loss2.get_result()[2] - 0.189) < 0.001);
    assert(std::abs(vec_loss2.get_result()[0] - 0.189) < 0.001);
    assert(std::abs(vec_loss2.get_result()[2] - 0.189) < 0.001);

    loss_v<ary_col_t, int>  ary_loss3 { loss_function_type::mean_sqr_error };
    loss_v<vec_col_t, int>  vec_loss3 { loss_function_type::mean_sqr_error };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss3);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss3);
    assert(ary_loss3.get_result().size() == dim);
    assert(vec_loss3.get_result().size() == dim);
    assert(std::abs(ary_loss3.get_result()[0] - 0.063289) < 0.000001);
    assert(std::abs(ary_loss3.get_result()[2] - 0.052222) < 0.000001);
    assert(std::abs(vec_loss3.get_result()[0] - 0.063289) < 0.000001);
    assert(std::abs(vec_loss3.get_result()[2] - 0.052222) < 0.000001);

    loss_v<ary_col_t, int> ary_loss4 { loss_function_type::mean_sqr_log_error };
    loss_v<vec_col_t, int> vec_loss4 { loss_function_type::mean_sqr_log_error };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss4);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss4);
    assert(ary_loss4.get_result().size() == dim);
    assert(vec_loss4.get_result().size() == dim);
    assert(std::abs(ary_loss4.get_result()[0] - 0.025812) < 0.000001);
    assert(std::abs(ary_loss4.get_result()[2] - 0.025434) < 0.000001);
    assert(std::abs(vec_loss4.get_result()[0] - 0.025812) < 0.000001);
    assert(std::abs(vec_loss4.get_result()[2] - 0.025434) < 0.000001);

    loss_v<ary_col_t, int>  ary_loss5 { loss_function_type::categorical_hinge };
    loss_v<vec_col_t, int>  vec_loss5 { loss_function_type::categorical_hinge };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss5);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss5);
    assert(ary_loss5.get_result().size() == dim);
    assert(vec_loss5.get_result().size() == dim);
    assert(std::abs(ary_loss5.get_result()[0] - 0.5144) < 0.0001);
    assert(std::abs(ary_loss5.get_result()[2] - 0.528) < 0.001);
    assert(std::abs(vec_loss5.get_result()[0] - 0.5144) < 0.0001);
    assert(std::abs(vec_loss5.get_result()[2] - 0.528) < 0.001);

    loss_v<ary_col_t, int>  ary_loss6 { loss_function_type::log_cosh };
    loss_v<vec_col_t, int>  vec_loss6 { loss_function_type::log_cosh };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss6);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss6);
    assert(ary_loss6.get_result().size() == dim);
    assert(vec_loss6.get_result().size() == dim);
    assert(std::abs(ary_loss6.get_result()[0] - 0.0305499) < 0.0000001);
    assert(std::abs(ary_loss6.get_result()[2] - 0.0255612) < 0.0000001);
    assert(std::abs(vec_loss6.get_result()[0] - 0.0305499) < 0.0000001);
    assert(std::abs(vec_loss6.get_result()[2] - 0.0255612) < 0.0000001);

    loss_v<ary_col_t, int>  ary_loss7 { loss_function_type::binary_cross_entropy };
    loss_v<vec_col_t, int>  vec_loss7 { loss_function_type::binary_cross_entropy };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss7);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss7);
    assert(ary_loss7.get_result().size() == dim);
    assert(vec_loss7.get_result().size() == dim);
    assert(std::abs(ary_loss7.get_result()[0] - 0.350844) < 0.000001);
    assert(std::abs(ary_loss7.get_result()[2] - 0.336406) < 0.000001);
    assert(std::abs(vec_loss7.get_result()[0] - 0.350844) < 0.000001);
    assert(std::abs(vec_loss7.get_result()[2] - 0.336406) < 0.000001);

    loss_v<ary_col_t, int>  ary_loss8 { loss_function_type::cross_entropy };
    loss_v<vec_col_t, int>  vec_loss8 { loss_function_type::cross_entropy };

    df.single_act_visit<ary_col_t, ary_col_t>("ARY ACTUAL", "ARY MODEL", ary_loss8);
    df.single_act_visit<vec_col_t, vec_col_t>("VEC ACTUAL", "VEC MODEL", vec_loss8);
    assert(ary_loss8.get_result().size() == dim);
    assert(vec_loss8.get_result().size() == dim);
    assert(std::abs(ary_loss8.get_result()[0] - 0.244791) < 0.000001);
    assert(std::abs(ary_loss8.get_result()[2] - 0.183285) < 0.000001);
    assert(std::abs(vec_loss8.get_result()[0] - 0.244791) < 0.000001);
    assert(std::abs(vec_loss8.get_result()[2] - 0.183285) < 0.000001);
}

C++ DataFrame