Back to Documentations

Signature Description Parameters
#include <DataFrame/DataFrameMLVisitors.h>

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
struct FastFourierTransVisitor;

// -------------------------------------

template<typename T, typename I = unsigned long,
         std::size_t A = 0>
using fft_v = FastFourierTransVisitor<T, I, A>;
This is a "single action visitor", meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

This works with both scalar and multidimensional (i.e. vector and arrays) datasets.

This visitor calculates Fast Fourier Transform on a column. If column length is a power of 2, the algorithm will be more efficient in terms of time and memory. The input could be either a column of floating-point values or a column of complex values. The result is always a vector of complex values with same number of items as the given column.

get_result() returns the FFT vector in complex values. In case of a scalar column, the result is a vector of complex numbers with the same size as input column. In case of a multidimensional column, the result is a matrix complex numbers whose columns are DFT of each dimension (so the number of columns in the matrix is the number of dimensions and number of rows is the input column size).
get_magnitude() returns vector of real value magnitudes. Same as the result in case of scalar columns, this is a vector and in case of multidimensional columns it is a matrix.
get_angle() returns real value vector of phase angles. Same as the result in case of scalar columns, this is a vector and in case of multidimensional columns it is a matrix.

A fast Fourier transform (FFT) is an algorithm that computes the discrete Fourier transform (DFT) of a sequence, or its inverse (IDFT). Fourier analysis converts a signal from its original domain (often time or space) to a representation in the frequency domain and vice versa. The DFT is obtained by decomposing a sequence of values into components of different frequencies. This operation is useful in many fields, but computing it directly from the definition is often too slow to be practical. This process has many engineering applications including decomposing a time-series and finding seasonality in a time-series.

    explicit
    FastFourierTransVisitor(bool inverse = false);

    inverse: Calculate the inverse.
    
T: Column data type
I: Index type
A: Memory alignment boundary for vectors. Default is system default alignment
static void test_FastFourierTransVisitor()  {

    std::cout << "\nTesting FastFourierTransVisitor{  } ..." << std::endl;

    using cx = std::complex<double>;

    StlVecType<unsigned long>  idxvec = { 1UL, 2UL, 3UL, 10UL, 5UL, 7UL, 8UL, 12UL };
    StlVecType<double>         dblvec = { 1, 1, 1, 1, 0, 0, 0, 0 };
    StlVecType<std::string>    strvec = { "11", "22", "33", "44", "55", "66", "-77", "88" };
    StlVecType<cx>             cplxvec = { cx(0, 0), cx(1, 1), cx(3, 3), cx(4, 4), cx(4, 4), cx(3, 3), cx(1, 1) };

    MyDataFrame df;

    df.load_data(std::move(idxvec),
                 std::make_pair("str_col", strvec),
                 std::make_pair("dbl_col", dblvec));
    df.load_column("cplx_col", cplxvec, nan_policy::dont_pad_with_nans);

    fft_v<double, unsigned long, 64>   fft;

    df.single_act_visit<double>("dbl_col", fft);

    for (auto citer : fft.get_result())
        std::cout << citer << " | ";
    std::cout << std::endl;
    df.load_column("FFT int col", fft.get_result(), nan_policy::dont_pad_with_nans);

    fft_v<std::complex<double>, unsigned long, 64> i_fft (true);

    df.single_act_visit<std::complex<double>>("FFT int col", i_fft);

    for (auto citer : i_fft.get_result())
        std::cout << citer << " | ";
    std::cout << std::endl;

    // The case of size is not a power of 2
    //
    fft_v<cx, unsigned long, 64>   fft_cx;

    df.single_act_visit<cx>("cplx_col", fft_cx);

    for (auto citer : fft_cx.get_result())
        std::cout << citer << " | ";
    std::cout << std::endl;
    df.load_column("FFT int col 2", fft_cx.get_result(), nan_policy::dont_pad_with_nans);

    fft_v<cx, unsigned long, 64>   i_fft2 (true);

    df.single_act_visit<std::complex<double>>("FFT int col 2", i_fft2);

    for (auto citer : i_fft2.get_result())
        std::cout << citer << " | ";
    std::cout << std::endl;

    try  {
        typedef StdDataFrame64<std::string> StrDataFrame;

        StrDataFrame    df2;

        df2.read("SHORT_IBM.csv", io_format::csv2);

        fft_v<double, std::string, 64>  fft2;

        df2.single_act_visit<double>("IBM_Close", fft2);
        df2.load_column("FFT Close", fft2.get_result());

        fft_v<cx, std::string, 64>   i_fft2_2 (true);

        df2.single_act_visit<cx>("FFT Close", i_fft2_2);

        // for (auto citer : i_fft2_2.get_result())
        //     std::cout << citer << ", ";
        // std::cout << std::endl;
    }
    catch (const DataFrameError &ex)  {
        std::cout << ex.what() << std::endl;
        ::exit(-1);
    }

    // Now multidimensional data
    //
    constexpr std::size_t   dim { 3};

    using col_t = std::array<double, dim>;

    auto    naive_dft =
        [](const std::vector<double> &input) -> std::vector<std::complex<double>>  {
            const std::size_t                   col_s { input.size() };
            std::vector<std::complex<double>>   result (col_s);

            for (std::size_t k = 0; k < col_s; ++k)  {
                result[k] = { 0.0, 0.0 };
                for (std::size_t n = 0; n < col_s; ++n)  {
                    const double    angle = -2.0 * M_PI * double(k) * double(n) / double(col_s);

                    result[k] += input[n] * std::complex<double>(std::cos(angle), std::sin(angle));
                }
            }

            return (result);
        };

    const std::size_t               col_s = df.get_index().size();
    StlVecType<col_t>               multi_dimen_col(col_s);
    StlVecType<std::vector<double>> scalar_channels(dim);

    for (std::size_t d { 0 }; d < dim; ++d)
        scalar_channels[d].resize(col_s);
    for (std::size_t n { 0 }; n < col_s; ++n)  {
        for (std::size_t d { 0 }; d < dim; ++d)  {
            const double    val = std::sin(2.0 * M_PI * double(d + 1) * double(n) / double(dim));

            multi_dimen_col[n][d] = val;
            scalar_channels[d][n] = val;
        }
    }
    df.load_column<col_t>("multi_dimen_col", std::move(multi_dimen_col));

    fft_v<col_t, unsigned long, 64> md_fft;

    df.single_act_visit<col_t>("multi_dimen_col", md_fft);

    const auto  &md_result { md_fft.get_result() };

    assert(md_result.rows() == 8);
    assert(md_result.cols() == 3);
    for (long c { 0 }; c < md_result.cols(); ++c)  {
        const auto  expected = naive_dft(scalar_channels[c]);

        for (long r { 0 }; r < md_result.rows(); ++r)  {
            assert(std::fabs(md_result(r, c).real() - expected[r].real()) < 0.00001);
            assert(std::fabs(md_result(r, c).imag() - expected[r].imag()) < 0.00001);
        }
    }
}

C++ DataFrame