← Back to Documentations

Signature	Description	Parameters
#include <DataFrame/DataFrameMLVisitors.h> template<typename T, typename I = unsigned long, std::size_t A = 0> struct AffinityPropVisitor;	This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface. This functor class finds clusters in data, without a need to specify the number of clusters, using Affinity Propagation algorithm. NOTE: This is a resource consuming and relatively slow algorithm. Its time complexity is O(I * n²) where I is number of iterations. Its space complexity is O(2 * n²). Also look at KMeansVisitor. The constructor takes 4 parameters 1. Number of iterations 2. Boolean flag to whether populate the actual clusters values and indices 3. A function to calculate distance between two data points of type T with a default implementation 4. Damping factor used in the algorithm. The default is 0.9. (1 – damping factor) prevents numerical oscillations. explicit AffinityPropVisitor(std::size_t num_of_iter, bool calc_clusters = true, distance_func f = [](const T &x, const T &y) -> double { return ((x - y) * (x - y)); }, double damping_factor = 0.9); get_result() returns a VectorPtrView of type T containing the centers of clusters. get_clusters() returns an vector of VectorPtrView's which contains the data clustered around the centers. get_clusters_idxs() returns an vector of std::vector<std::size_t>'s which contains the indices of data clustered around the centers.	T: Column data type I: Index type A: Memory alignment boundary for vectors. Default is system default alignment

static void test_affinity_propagation()  {

    std::cout << "\nTesting affinity propagation visitor ..." << std::endl;

    const size_t           item_cnt = 50;
    MyDataFrame            df;
    RandGenParams<double>  p;
    StlVecType<double>     final_col;
    StlVecType<double>     col_data;

    p.seed = 10U;

    p.min_value = 0;
    p.max_value = 10;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 20;
    p.max_value = 30;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 40;
    p.max_value = 50;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 60;
    p.max_value = 70;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 80;
    p.max_value = 90;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    df.load_data(MyDataFrame::gen_sequence_index(0, item_cnt * 5, 1), std::make_pair("col1", final_col));
    df.shuffle<double>({"col1"}, false, 10);

    KMeansVisitor<5, double, unsigned long, 128>  km_visitor(1000, true,
                                                             [](const double &x, const double &y)  {
                                                                 return ((x - y) * (x - y));
                                                             },
                                                             10);
    AffinityPropVisitor<double, unsigned long, 128> ap_visitor(50);

    df.single_act_visit<double>("col1", km_visitor);
    df.single_act_visit<double>("col1", ap_visitor);

    // Using the calculated means, separate the given column into clusters
    const auto  k_means = km_visitor.get_result();
    const auto  results = ap_visitor.get_clusters();

    for (auto iter : k_means)  {
        std::cout << iter << ", ";
    }
    std::cout << "\n\n" << std::endl;
    for (auto iter : results)  {
        for (auto iter2 : iter)  {
            std::cout << iter2 << ", ";
        }
        std::cout << "\n" << std::endl;
    }
}