Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameMLVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct AffinityPropVisitor;
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This functor class finds clusters in data, without a need to specify the number of clusters, using Affinity Propagation algorithm.

NOTE: This is a resource consuming and relatively slow algorithm. Its time complexity is O(I * n2) where I is number of iterations. Its space complexity is O(2 * n2). Also look at KMeansVisitor.

The constructor takes 4 parameters
1. Number of iterations
2. Boolean flag to whether populate the actual clusters values and indices
3. A function to calculate distance between two data points of type T with a default implementation
4. Damping factor used in the algorithm. The default is 0.9. (1 – damping factor) prevents numerical oscillations.
    explicit
    AffinityPropVisitor(std::size_t num_of_iter,
                        bool calc_clusters = true,
                        distance_func f =
                            [](const T &x, const T &y) -> double {
                                return ((x - y) * (x - y));
                            },
                        double damping_factor = 0.9);
        

get_result() returns a VectorPtrView of type T containing the centers of clusters.
get_clusters() returns an vector of VectorPtrView's which contains the data clustered around the centers.
get_clusters_idxs() returns an vector of std::vector<std::size_t>'s which contains the indices of data clustered around the centers.
T: Column data type
I: Index type
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_affinity_propagation()  {

    std::cout << "\nTesting affinity propagation visitor ..." << std::endl;

    const size_t           item_cnt = 50;
    MyDataFrame            df;
    RandGenParams<double>  p;
    StlVecType<double>     final_col;
    StlVecType<double>     col_data;

    p.seed = 10U;

    p.min_value = 0;
    p.max_value = 10;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 20;
    p.max_value = 30;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 40;
    p.max_value = 50;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 60;
    p.max_value = 70;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    p.min_value = 80;
    p.max_value = 90;
    col_data = gen_uniform_real_dist<double, 128>(item_cnt, p);
    final_col.insert(final_col.end(), col_data.begin(), col_data.end());

    df.load_data(MyDataFrame::gen_sequence_index(0, item_cnt * 5, 1), std::make_pair("col1", final_col));
    df.shuffle<double>({"col1"}, false, 10);

    KMeansVisitor<5, double, unsigned long, 128>  km_visitor(1000, true,
                                                             [](const double &x, const double &y)  {
                                                                 return ((x - y) * (x - y));
                                                             },
                                                             10);
    AffinityPropVisitor<double, unsigned long, 128> ap_visitor(50);

    df.single_act_visit<double>("col1", km_visitor);
    df.single_act_visit<double>("col1", ap_visitor);

    // Using the calculated means, separate the given column into clusters
    const auto  k_means = km_visitor.get_result();
    const auto  results = ap_visitor.get_clusters();

    for (auto iter : k_means)  {
        std::cout << iter << ", ";
    }
    std::cout << "\n\n" << std::endl;
    for (auto iter : results)  {
        for (auto iter2 : iter)  {
            std::cout << iter2 << ", ";
        }
        std::cout << "\n" << std::endl;
    }
}

C++ DataFrame