← Back to Documentations

Signature	Description	Parameters
template<std::size_t K, arithmetic T, typename ... Ts> std::array<DataFrame, K> get_data_by_spectral(const char col_name, double sigma, seed_t seed = seed_t(-1), std::function<double(const T &x, const T &y, double sigma)> &&sfunc = [](const T &x, const T &y, double sigma) -> double { return (std::exp(-((x - y) (x - y)) / (2 * sigma * sigma))); }, size_type num_of_iter = 1000) const;	This uses spectral clustering algorithm to divide the named column into K clusters. It returns a std::array of K DataFrame's each containing one of the clusters of data based on the named column. Self is unchanged. NOTE Type T must support arithmetic operations	K: Number of clusters for k-means clustering algorithm T: Type of the named column Ts: The list of types for all columns. A type should be specified only once col_name: Name of the data column sfunc: A function to calculate the similarity matrix between data points in the named column num_of_iter: Maximum number of iterations for k-means clustering algorithm before converging seed: Seed for random number generator to initialize k-means clustering algorithm. Default is a random number for each call.
template<std::size_t K, arithmetic T, typename ... Ts> std::array<PtrView, K> get_view_by_spectral(const char col_name, double sigma, seed_t seed = seed_t(-1), std::function<double(const T &x, const T &y, double sigma)> &&sfunc = [](const T &x, const T &y, double sigma) -> double { return (std::exp(-((x - y) (x - y)) / (2 * sigma * sigma))); }, size_type num_of_iter = 1000);	This is identical to above get_data_by_spectral(), but: The result is a std::array of K views Since the result is a view, you cannot call make_consistent() on the result. NOTE: There are certain operations that you cannot do with a view. For example, you cannot add/delete columns, etc.	K: Number of clusters for k-means clustering algorithm T: Type of the named column Ts: The list of types for all columns. A type should be specified only once col_name: Name of the data column sfunc: A function to calculate the similarity matrix between data points in the named column num_of_iter: Maximum number of iterations for k-means clustering algorithm before converging seed: Seed for random number generator to initialize k-means clustering algorithm. Default is a random number for each call.
template<std::size_t K, arithmetic T, typename ... Ts> std::array<ConstPtrView, K> get_view_by_spectral(const char col_name, double sigma, seed_t seed = seed_t(-1), std::function<double(const T &x, const T &y, double sigma)> &&sfunc = [](const T &x, const T &y, double sigma) -> double { return (std::exp(-((x - y) (x - y)) / (2 * sigma * sigma))); }, size_type num_of_iter = 1000) const;	Same as above view, but it returns a std::array of K const view. You can not change data in const views. But if the data is changed in the original DataFrame or through another view, it is reflected in the const view.	K: Number of clusters for k-means clustering algorithm T: Type of the named column Ts: The list of types for all columns. A type should be specified only once col_name: Name of the data column sfunc: A function to calculate the similarity matrix between data points in the named column num_of_iter: Maximum number of iterations for k-means clustering algorithm before converging seed: Seed for random number generator to initialize k-means clustering algorithm. Default is a random number for each call.

static void test_get_data_by_spectral()  {

    std::cout << "\nTesting get_data_by_spectral( ) ..." << std::endl;

    typedef StdDataFrame64<std::string> StrDataFrame;

    StrDataFrame    df;

    try  {
        df.read("SHORT_IBM.csv", io_format::csv2, { .starting_row = 1000, .num_rows = 500 });
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    StrDataFrame    df2 = df;

    auto    lbd = [](const std::string &, const double &) -> bool { return (true); };
    auto    view = df2.get_view_by_sel<double, decltype(lbd), double, long>("IBM_Open", lbd);

    auto    result_df = df.get_data_by_spectral <3, double, double, long>("IBM_Close", 8, 89);
    auto    result_view = view.get_view_by_spectral<3, double, double, long>("IBM_Close", 8, 89);

    assert(result_df.size() == 3);
    assert(result_df.size() == result_view.size());

    assert(result_df[0].get_index().size() == 1);
    assert(result_df[0].get_column<double>("IBM_Open").size() == 1);
    assert(result_df[0].get_index()[0] == "2018-12-24");
    assert(result_df[0].get_column<double>("IBM_High")[0] == 111.0);
    assert(result_df[0].get_column<long>("IBM_Volume")[0] == 3821400);
    assert(result_view[0].get_column<double>("IBM_High")[0] == 111.0);
    assert(result_view[0].get_column<long>("IBM_Volume")[0] == 3821400);

    assert(result_df[1].get_index().size() == 47);
    assert(result_df[1].get_column<double>("IBM_Open").size() == 47);
    assert(result_df[1].get_index()[0] == "2018-10-29");
    assert(result_df[1].get_index()[46] == "2019-01-22");
    assert(result_df[1].get_column<double>("IBM_High")[20] == 121.68);
    assert(result_df[1].get_column<long>("IBM_Volume")[35] == 4346700);
    assert(result_view[1].get_index().size() == 47);
    assert(result_view[1].get_column<double>("IBM_Open").size() == 47);
    assert(result_view[1].get_index()[0] == "2018-10-29");
    assert(result_view[1].get_index()[46] == "2019-01-22");
    assert(result_view[1].get_column<double>("IBM_High")[20] == 121.68);
    assert(result_view[1].get_column<long>("IBM_Volume")[35] == 4346700);

    assert(result_df[2].get_index().size() == 452);
    assert(result_df[2].get_column<double>("IBM_Open").size() == 452);
    assert(result_df[2].get_index()[0] == "2017-12-20");
    assert(result_df[2].get_index()[451] == "2019-12-16");
    assert(result_df[2].get_column<double>("IBM_High")[200] == 149.070007);
    assert(result_df[2].get_column<long>("IBM_Volume")[300] == 4958000);
    assert(result_view[2].get_index().size() == 452);
    assert(result_view[2].get_column<double>("IBM_Open").size() == 452);
    assert(result_view[2].get_index()[0] == "2017-12-20");
    assert(result_view[2].get_index()[451] == "2019-12-16");
    assert(result_view[2].get_column<double>("IBM_High")[200] == 149.070007);
    assert(result_view[2].get_column<long>("IBM_Volume")[300] == 4958000);
}