Back to Documentations

Signature Description Parameters
template<typename ... Ts>
DataFrame
get_data_by_loc(Index2D<long> range,
                inclusiveness incld = inclusiveness::begin) const;
It returns a DataFrame (including the index and data columns) containing the data from location begin to location end.
This function supports Python-like negative indexing. That is why the range type is long.
Ts: The list of types for all columns. A type should be specified only once.
range: The begin and end iterators for data
incld: How to include/exclude start and end indices
template<typename ... Ts>
View
get_view_by_loc(Index2D<long> range,
                inclusiveness incld = inclusiveness::begin);
It behaves like get_data_by_loc(), but it returns a View.
A view is a DataFrame that is a reference to the original DataFrame.
So if you modify anything in the view the original DataFrame will also be modified.
NOTE: There are certain operations that you cannot do with a view. For example, you cannot add/delete columns, etc.
Ts: The list of types for all columns. A type should be specified only once.
range: The begin and end iterators for data
incld: How to include/exclude start and end indices
template<typename ... Ts>
ConstView
get_view_by_loc(Index2D<long> range,
                inclusiveness incld = inclusiveness::begin) const;
It behaves like get_data_by_loc(), but it returns a const View.
A view is a DataFrame that is a reference to the original DataFrame.
So if you modify anything in the view the original DataFrame will also be modified.
NOTE: There are certain operations that you cannot do with a view. For example, you cannot add/delete columns, etc.
Ts: The list of types for all columns. A type should be specified only once.
range: The begin and end iterators for data
incld: How to include/exclude start and end indices
template<typename ... Ts>
DataFrame<I, H>
get_data_by_loc(const std::vector<long> &locations) const;
It returns a DataFrame (including the index and data columns) containing the data from locations specified in the locations vector.
Ts: The list of types for all columns. A type should be specified only once.
locations: List of indices into the index column to copy data
template<typename ... Ts>
PtrView
get_view_by_loc (const std::vector<long> &locations);
Same as above get_data_by_loc(), but it returns a view. You can not change data in const views. But if the data is changed in the original DataFrame or through another view, it is reflected in the const view. Ts: The list of types for all columns. A type should be specified only once.
locations: List of indices into the index column to copy data
template<typename ... Ts>
ConstPtrView
get_view_by_loc(const std::vector<long> &locations) const;
Same as above view, but it returns a const view. You can not change data in const views. But if the data is changed in the original DataFrame or through another view, it is reflected in the const view. Ts: The list of types for all columns. A type should be specified only once.
locations: List of indices into the index column to copy data
static void test_get_data_by_loc_slicing()  {

    std::cout << "\nTesting get_data_by_loc()/slicing ..." << std::endl;

    StlVecType<unsigned long>   idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 };
    StlVecType<double>          d1 = { 1, 2, 3, 4, 5, 6, 7 };
    StlVecType<double>          d2 = { 8, 9, 10, 11, 12, 13, 14 };
    StlVecType<double>          d3 = { 15, 16, 17, 18, 19, 20, 21 };
    StlVecType<double>          d4 = { 22, 23, 24, 25 };
    MyDataFrame                 df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", d4));

    MyDataFrame df2 = df.get_data_by_loc<double>(Index2D<long> { 3, 6 });
    MyDataFrame df3 = df.get_data_by_loc<double>(Index2D<long> { 0, 7 });
    MyDataFrame df4 = df.get_data_by_loc<double>(Index2D<long> { -4, -1 });
    auto        vw5 = df.get_view_by_loc<double>(Index2D<long> { -4, 6 });

    df.write<std::ostream, double>(std::cout);
    df2.write<std::ostream, double>(std::cout);
    df3.write<std::ostream, double>(std::cout);
    df4.write<std::ostream, double>(std::cout);
    vw5.write<std::ostream, double>(std::cout);

    const auto  vw6 = df.get_view_by_loc<double>(Index2D<long> { 3, 6 }, inclusiveness::end);

    assert(vw6.get_index().size() == 3);
    assert(vw6.get_index()[1] == 123450);
    assert(vw6.get_index()[2] == 123449);
    assert(vw6.get_column<double>("col_2").size() == 3);
    assert(vw6.get_column<double>("col_2")[0] == 12.0);
    assert(vw6.get_column<double>("col_1")[1] == 6.0);
    assert(vw6.get_column<double>("col_3")[2] == 21.0);
    assert(vw6.get_column<double>("col_4").size() == 3);
    assert(std::isnan(vw6.get_column<double>("col_4")[0]));
    assert(std::isnan(vw6.get_column<double>("col_4")[1]));
    assert(std::isnan(vw6.get_column<double>("col_4")[2]));

    const auto  df7 = df.get_data_by_loc<double>(Index2D<long> { 3, 6 }, inclusiveness::both);

    assert(df7.get_index().size() == 4);
    assert(df7.get_index()[1] == 123455);
    assert(df7.get_index()[3] == 123449);
    assert(df7.get_column<double>("col_2").size() == 4);
    assert(df7.get_column<double>("col_2")[0] == 11.0);
    assert(df7.get_column<double>("col_1")[1] == 5.0);
    assert(df7.get_column<double>("col_3")[0] == 18.0);
    assert(df7.get_column<double>("col_3")[3] == 21.0);
    assert(df7.get_column<double>("col_4").size() == 4);
    assert(df7.get_column<double>("col_4")[0] == 25.0);
    assert(std::isnan(df7.get_column<double>("col_4")[1]));
    assert(std::isnan(df7.get_column<double>("col_4")[2]));
    assert(std::isnan(df7.get_column<double>("col_4")[3]));

    auto    vw8 = df.get_view_by_loc<double>(Index2D<long> { 3, 6 }, inclusiveness::neither);

    assert(vw8.get_index().size() == 2);
    assert(vw8.get_index()[0] == 123455);
    assert(vw8.get_index()[1] == 123450);
    assert(vw8.get_column<double>("col_2").size() == 2);
    assert(vw8.get_column<double>("col_2")[0] == 12.0);
    assert(vw8.get_column<double>("col_1")[1] == 6.0);
    assert(vw8.get_column<double>("col_3")[1] == 20.0);
    assert(vw8.get_column<double>("col_4").size() == 2);
    assert(std::isnan(vw8.get_column<double>("col_4")[0]));
    assert(std::isnan(vw8.get_column<double>("col_4")[1]));

    try  {
        MyDataFrame df6 = df.get_data_by_loc<double>(Index2D<long> { 3, 8 });
    }
    catch (const BadRange &ex)  {
        std::cout << "Caught: " << ex.what() << std::endl;
    }
    try  {
        MyDataFrame df7 = df.get_data_by_loc<double>(Index2D<long> { -8, -1 });
    }
    catch (const BadRange &ex)  {
        std::cout << "Caught: " << ex.what() << std::endl;
    }
}

// -----------------------------------------------------------------------------

static void test_get_view_by_loc()  {

    std::cout << "\nTesting get_view_by_loc() ..." << std::endl;

    StlVecType<unsigned long>  idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 };
    StlVecType<double>         d1 = { 1, 2, 3, 4, 5, 6, 7 };
    StlVecType<double>         d2 = { 8, 9, 10, 11, 12, 13, 14 };
    StlVecType<double>         d3 = { 15, 16, 17, 18, 19, 20, 21 };
    StlVecType<double>         d4 = { 22, 23, 24, 25 };
    StlVecType<std::string>    s1 = { "11", "22", "33", "xx", "yy", "gg", "string" };
    MyDataFrame                df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", d4),
                 std::make_pair("col_str", s1));

    auto  memory_use1 = df.get_memory_usage<double>("col_3");

    std::cout << "DataFrame Memory Usage:\n" << memory_use1 << std::endl;

    typedef MyDataFrame::View MyDataFrameView;
    typedef MyDataFrame::ConstView MyDataFrameConstView;

    const MyDataFrame       &const_df = df;
    MyDataFrameView         dfv = df.get_view_by_loc<double, std::string>(Index2D<long> { 3, 6 });
    MyDataFrameView         dfv2 = df.get_view_by_loc<double, std::string>(Index2D<long> { -5, -1 });
    MyDataFrameConstView    dfcv = const_df.get_view_by_loc<double, std::string>(Index2D<long> { 3, 6 });
    MyDataFrameConstView    dfcv2 = const_df.get_view_by_loc<double, std::string>(Index2D<long> { -5, -1 });

    dfv.shrink_to_fit<double, std::string>();
    dfv.write<std::ostream, double, std::string>(std::cout);
    dfv2.write<std::ostream, double, std::string>(std::cout);
    dfv.get_column<double>("col_3")[0] = 88.0;
    assert(dfv.get_column<double>("col_3")[0] == df.get_column<double>("col_3")[3]);
    assert(dfv.get_column<double>("col_3")[0] == 88.0);
    assert(dfcv.get_column<double>("col_3")[0] == df.get_column<double>("col_3")[3]);
    assert(dfcv.get_column<double>("col_3")[0] == 88.0);

    auto  memory_use2 = dfv.get_memory_usage<double>("col_3");

    std::cout << "View Memory Usage:\n" << memory_use2 << std::endl;
}

// -----------------------------------------------------------------------------

static void test_get_data_by_loc_location()  {

    std::cout << "\nTesting get_data_by_loc(locations) ..." << std::endl;

    StlVecType<unsigned long>  idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 };
    StlVecType<double> d1 = { 1, 2, 3, 4, 5, 6, 7 };
    StlVecType<double> d2 = { 8, 9, 10, 11, 12, 13, 14 };
    StlVecType<double> d3 = { 15, 16, 17, 18, 19, 20, 21 };
    StlVecType<double> d4 = { 22, 23, 24, 25 };
    MyDataFrame        df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", d4));

    MyDataFrame df2 = df.get_data_by_loc<double>(StlVecType<long> { 3, 6 });
    MyDataFrame df3 = df.get_data_by_loc<double>(StlVecType<long> { -4, -1 , 5 });

    assert(df2.get_index().size() == 2);
    assert(df2.get_column<double>("col_3").size() == 2);
    assert(df2.get_column<double>("col_2").size() == 2);
    assert(df2.get_index()[0] == 123450);
    assert(df2.get_index()[1] == 123449);
    assert(df2.get_column<double>("col_3")[0] == 18.0);
    assert(df2.get_column<double>("col_2")[1] == 14.0);
    assert(std::isnan(df2.get_column<double>("col_4")[1]));

    assert(df3.get_index().size() == 3);
    assert(df3.get_column<double>("col_3").size() == 3);
    assert(df3.get_column<double>("col_2").size() == 3);
    assert(df3.get_column<double>("col_1").size() == 3);
    assert(df3.get_index()[0] == 123450);
    assert(df3.get_index()[1] == 123449);
    assert(df3.get_index()[2] == 123450);
    assert(df3.get_column<double>("col_1")[0] == 4.0);
    assert(df3.get_column<double>("col_2")[2] == 13.0);
    assert(df3.get_column<double>("col_4")[0] == 25.0);
    assert(std::isnan(df3.get_column<double>("col_4")[1]));
    assert(std::isnan(df3.get_column<double>("col_4")[2]));
}

// -----------------------------------------------------------------------------

static void test_get_view_by_loc_location()  {

    std::cout << "\nTesting get_view_by_loc(locations) ..." << std::endl;

    StlVecType<unsigned long>  idx = { 123450, 123451, 123452, 123450, 123455, 123450, 123449 };
    StlVecType<double> d1 = { 1, 2, 3, 4, 5, 6, 7 };
    StlVecType<double> d2 = { 8, 9, 10, 11, 12, 13, 14 };
    StlVecType<double> d3 = { 15, 16, 17, 18, 19, 20, 21 };
    StlVecType<double> d4 = { 22, 23, 24, 25 };
    MyDataFrame        df;

    df.load_data(std::move(idx),
                 std::make_pair("col_1", d1),
                 std::make_pair("col_2", d2),
                 std::make_pair("col_3", d3),
                 std::make_pair("col_4", d4));

    const MyDataFrame   &const_df = df;

    auto    dfv1 = df.get_view_by_loc<double>(StlVecType<long> { 3, 6 });
    auto    dfv2 = df.get_view_by_loc<double>(StlVecType<long> { -4, -1 , 5 });
    auto    const_dfv1 = const_df.get_view_by_loc<double>(StlVecType<long> { 3, 6 });
    auto    const_dfv2 = const_df.get_view_by_loc<double>(StlVecType<long> { -4, -1 , 5 });

    assert(dfv1.get_index().size() == 2);
    assert(dfv1.get_column<double>("col_3").size() == 2);
    assert(dfv1.get_column<double>("col_2").size() == 2);
    assert(dfv1.get_index()[0] == 123450);
    assert(dfv1.get_index()[1] == 123449);
    assert(dfv1.get_column<double>("col_3")[0] == 18.0);
    assert(dfv1.get_column<double>("col_2")[1] == 14.0);
    assert(std::isnan(dfv1.get_column<double>("col_4")[1]));

    assert(const_dfv1.get_index().size() == 2);
    assert(const_dfv1.get_column<double>("col_3").size() == 2);
    assert(const_dfv1.get_column<double>("col_2").size() == 2);
    assert(const_dfv1.get_index()[0] == 123450);
    assert(const_dfv1.get_index()[1] == 123449);
    assert(const_dfv1.get_column<double>("col_3")[0] == 18.0);
    assert(const_dfv1.get_column<double>("col_2")[1] == 14.0);
    assert(std::isnan(const_dfv1.get_column<double>("col_4")[1]));

    assert(dfv2.get_index().size() == 3);
    assert(dfv2.get_column<double>("col_3").size() == 3);
    assert(dfv2.get_column<double>("col_2").size() == 3);
    assert(dfv2.get_column<double>("col_1").size() == 3);
    assert(dfv2.get_index()[0] == 123450);
    assert(dfv2.get_index()[1] == 123449);
    assert(dfv2.get_index()[2] == 123450);
    assert(dfv2.get_column<double>("col_1")[0] == 4.0);
    assert(dfv2.get_column<double>("col_2")[2] == 13.0);
    assert(dfv2.get_column<double>("col_4")[0] == 25.0);
    assert(std::isnan(dfv2.get_column<double>("col_4")[1]));
    assert(std::isnan(dfv2.get_column<double>("col_4")[2]));

    assert(const_dfv2.get_index().size() == 3);
    assert(const_dfv2.get_column<double>("col_3").size() == 3);
    assert(const_dfv2.get_column<double>("col_2").size() == 3);
    assert(const_dfv2.get_column<double>("col_1").size() == 3);
    assert(const_dfv2.get_index()[0] == 123450);
    assert(const_dfv2.get_index()[1] == 123449);
    assert(const_dfv2.get_index()[2] == 123450);
    assert(const_dfv2.get_column<double>("col_1")[0] == 4.0);
    assert(const_dfv2.get_column<double>("col_2")[2] == 13.0);
    assert(const_dfv2.get_column<double>("col_4")[0] == 25.0);
    assert(std::isnan(const_dfv2.get_column<double>("col_4")[1]));
    assert(std::isnan(const_dfv2.get_column<double>("col_4")[2]));

    dfv2.get_column<double>("col_1")[0] = 101.0;
    assert(dfv2.get_column<double>("col_1")[0] == 101.0);
    assert(const_dfv2.get_column<double>("col_1")[0] == 101.0);
    assert(df.get_column<double>("col_1")[3] == 101.0);

    auto  memory_use = dfv2.get_memory_usage<double>("col_3");

    std::cout << "View Memory Usage:\n" << memory_use << std::endl;
}

C++ DataFrame