| Signature | Description | Parameters |
|---|---|---|
template<container T, typename ... Ts> DataFrame explode(const char *col_name) const; |
This transforms a column of containers into column of container’s value types. For each data point in each container, index and data columns data points will be repeated. It returns a new DataFrame. Self will be unchanged. |
T: Type of the container column Ts: The list of types for all columns. A type should be specified only once. col_name: Name of the container column |
template<typename VAR_T, typename VAL_T> DataFrame unpivot(const char *pvt_col_name, std::vector<const char *> &&value_col_names = { }, const char *var_name = "variable", const char *value_name = "values") const; |
This rotates down (unpivots) a wide DataFrame 90 degrees into a long DataFrame. It rotates the DataFrame on pvt_col_name column. It adds a string column (i.e. variable) to the returned DataFrame that contains the names of the value columns. If value_col_names is empty, it will include all the columns of VAL_T type. This function in other packages might be called melt. This function is useful for reshaping data to facilitate analysis, visualization, or machine learning tasks. It returns a new DataFrame. Self will be unchanged. |
VAR_T: Type of the variable (unpivoting) column VAL_T: Type of all value columns pvt_col_name: Name of the variable (unpivoting) column value_col_name: Names of value columns. If it is empty, it will include all the columns of type VAL_T var_name: Name of the variable column in the returned DataFrame value_name: Name of the value column in the returned DataFrame |
template<typename VAL_T, StringOnly C_T = std::string> DataFrame pivot(const char *col_names_column, std::vector<const char *> &&value_col_names) const; |
This rotates up (pivots) a long DataFrame 90 degrees into a wide DataFrame. It rotates based on repeating values in the col_names_column. It adds those names as columns of type VAL_T to the returned DataFrame. col_names_column column must be of a string type in the calling DataFrame. It returns a new DataFrame. Self will be unchanged. NOTE: This implementation is not as extensively tested yet. Make sure to check the result carefully. |
VAL_T: Type of all value columns. C_T: Type of all col_names_column which must be some kind of string col_names_column: Name of the column that contains repeating names of the value columns in the returned DataFrame value_col_names: Names of value columns to be spread horizontally |
static void test_explode() { std::cout << "\nTesting load_explode( ) ..." << std::endl; using DT_DataFrame = StdDataFrame<DateTime>; DT_DataFrame df; try { df.read("AAPL_10dBucketWithMaps_small.csv", io_format::csv2); } catch (const DataFrameError &ex) { std::cout << ex.what() << std::endl; } df.write<std::ostream, double, long, std::map<std::string, double>, std::unordered_map<std::string, double>, std::vector<std::string>, std::set<double>, std::set<std::string>>(std::cout, io_format::csv2); auto exploded1 = df.explode<std::unordered_map<std::string, double>, double, long, std::map<std::string, double>, std::vector<std::string>, std::set<double>, std::set<std::string>>("Unordered Map"); std::cout << "\n\n"; exploded1.write<std::ostream, std::pair<std::string, double>, double, long, std::map<std::string, double>, std::vector<std::string>, std::set<double>, std::set<std::string>>(std::cout, io_format::csv2); auto exploded2 = df.explode<std::map<std::string, double>, double, long, std::vector<std::string>, std::unordered_map<std::string, double>, std::set<double>, std::set<std::string>>("Map 1"); std::cout << "\n\n"; exploded2.write<std::ostream, std::pair<std::string, double>, double, long, std::unordered_map<std::string, double>, std::vector<std::string>, std::set<double>, std::set<std::string>>(std::cout, io_format::csv2); auto exploded3 = df.explode<std::set<double>, double, long, std::vector<std::string>, std::map<std::string, double>, std::unordered_map<std::string, double>, std::set<std::string>>("Double Set"); std::cout << "\n\n"; exploded3.write<std::ostream, double, long, std::map<std::string, double>, std::unordered_map<std::string, double>, std::vector<std::string>, std::set<std::string>>(std::cout, io_format::csv2); auto exploded4 = df.explode<std::vector<std::string>, double, long, std::map<std::string, double>, std::set<double>, std::unordered_map<std::string, double>, std::set<std::string>>("Str Vec"); std::cout << "\n\n"; exploded4.write<std::ostream, std::string, double, long, std::map<std::string, double>, std::unordered_map<std::string, double>, std::set<double>, std::set<std::string>>(std::cout, io_format::csv2); }
// ---------------------------------------------------------------------------- static void test_unpivot() { std::cout << "\nTesting unpivot( ) ..." << std::endl; ULDataFrame df; df.load_data(std::vector<unsigned long>{ 1, 2, 3, 4, 5, 6, 7 }, std::make_pair("day", std::vector<std::string>{ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" }), std::make_pair("Chicago", std::vector<double>{ 32, 30, 28, 22, 30, 20, 25 }), std::make_pair("Tehran", std::vector<double>{ 75, 77, 75, 82, 83, 81, 77 }), std::make_pair("Berlin", std::vector<double>{ 41, 43, 45, 38, 30, 45, 47 }), std::make_pair("Str Column", std::vector<std::string>{ "AA", "BB", "CC", "DD", "EE", "FF", "GG" })); df.write<std::ostream, std::string, double>(std::cout, io_format::csv2); std::cout << "\n\n"; const auto mdf1 = df.unpivot<std::string, double>("day", { "Chicago", "Tehran", "Berlin" }); mdf1.write<std::ostream, std::string, double>(std::cout, io_format::csv2); std::cout << "\n\n"; const auto mdf2 = df.unpivot<std::string, double>("day"); mdf2.write<std::ostream, std::string, double>(std::cout, io_format::csv2); assert((mdf1.is_equal<double, std::string>(mdf2))); }
// ---------------------------------------------------------------------------- static void test_pivot() { std::cout << "\nTesting pivot( ) ..." << std::endl; ULDataFrame df; df.load_data( std::vector<unsigned long>{ 1, 2, 3, 4, 5, 6, 7 }, std::make_pair("day", std::vector<std::string>{ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" }), std::make_pair("Chicago", std::vector<double>{ 32, 30, 28, 22, 30, 20, 25 }), std::make_pair("Tehran", std::vector<double>{ 75, 77, 75, 82, 83, 81, 77 }), std::make_pair("Berlin", std::vector<double>{ 41, 43, 45, 38, 30, 45, 47 }), std::make_pair("Str Column", std::vector<std::string>{ "AA", "BB", "CC", "DD", "EE", "FF", "GG" })); df.write<std::ostream, std::string, double>(std::cout, io_format::csv2); std::cout << "\n\n"; const auto mdf1 = df.unpivot<std::string, double>("day", { "Chicago", "Tehran", "Berlin" }); mdf1.write<std::ostream, std::string, double>(std::cout, io_format::csv2); std::cout << "\n\n"; const auto mdf2 = mdf1.pivot<double>("variable", { "values" }); mdf2.write<std::ostream, double>(std::cout, io_format::csv2); }