Source code for neurokit2.complexity.fractal_dfa

# -*- coding: utf-8 -*-
from warnings import warn

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..misc import find_knee



[docs]
def fractal_dfa(
    signal,
    scale="default",
    overlap=True,
    integrate=True,
    order=1,
    multifractal=False,
    q="default",
    maxdfa=False,
    show=False,
    **kwargs,
):
    """**(Multifractal) Detrended Fluctuation Analysis (DFA or MFDFA)**

    Detrended fluctuation analysis (DFA) is used to find long-term statistical dependencies in time
    series.

    For monofractal DFA, the output *alpha* :math:`\\alpha` corresponds to the slope of the linear
    trend between the scale factors and the fluctuations. For multifractal DFA, the slope values
    under different *q* values are actually generalised Hurst exponents *h*. Monofractal DFA
    corresponds to MFDFA with *q = 2*, and its output is actually an estimation of the
    **Hurst exponent** (:math:`h_{(2)}`).

    The Hurst exponent is the measure of long range autocorrelation of a signal, and
    :math:`h_{(2)} > 0.5` suggests the presence of long range correlation, while
    :math:`h_{(2)} < 0.5`suggests short range correlations. If :math:`h_{(2)} = 0.5`, it indicates
    uncorrelated indiscriminative fluctuations, i.e. a Brownian motion.

    .. figure:: ../img/douglas2022a.png
       :alt: Illustration of DFA (Douglas et al., 2022).

    Multifractal DFA returns the generalised Hurst exponents *h* for different values of *q*. It is
    converted to the multifractal **scaling exponent** *Tau* :math:`\\tau_{(q)}`, which non-linear
    relationship with *q* can indicate multifractility. From there, we derive the singularity
    exponent *H* (or :math:`\\alpha`) (also known as Hölder's exponents) and the singularity
    dimension *D* (or :math:`f(\\alpha)`). The variation of *D* with *H* is known as multifractal
    singularity spectrum (MSP), and usually has shape of an inverted parabola. It measures the long
    range correlation property of a signal. From these elements, different features are extracted:

    * **Width**: The width of the singularity spectrum, which quantifies the degree of the
      multifractality. In the case of monofractal signals, the MSP width is zero, since *h*\\(q) is
      independent of *q*.
    * **Peak**: The value of the singularity exponent *H* corresponding to the peak of
      singularity dimension *D*. It is a measure of the self-affinity of the signal, and a high
      value is an indicator of high degree of correlation between the data points. In the other
      words, the process is recurrent and repetitive.
    * **Mean**: The mean of the maximum and minimum values of singularity exponent *H*, which
      quantifies the average fluctuations of the signal.
    * **Max**: The value of singularity spectrum *D* corresponding to the maximum value of
      singularity exponent *H*, which indicates the maximum fluctuation of the signal.
    * **Delta**: the vertical distance between the singularity spectrum *D* where the singularity
      exponents are at their minimum and maximum. Corresponds to the range of fluctuations of the
      signal.
    * **Asymmetry**: The Asymmetric Ratio (AR) corresponds to the centrality of the peak of the
      spectrum. AR = 0.5 indicates that the multifractal spectrum is symmetric (Orozco-Duque et
      al., 2015).
    * **Fluctuation**: The *h*-fluctuation index (hFI) is defined as the power of the second
      derivative of *h*\\(q). See Orozco-Duque et al. (2015).
    * **Increment**: The cumulative function of the squared increments (:math:`\\alpha CF`) of the
      generalized Hurst's exponents between consecutive moment orders is a more robust index of the
      distribution of the generalized Hurst's exponents (Faini et al., 2021).

    This function can be called either via ``fractal_dfa()`` or ``complexity_dfa()``, and its
    multifractal variant can be directly accessed via ``fractal_mfdfa()`` or ``complexity_mfdfa()``.

    .. note ::

      Help is needed to implement the modified formula to compute the slope when
      *q* = 0. See for instance Faini et al. (2021). See https://github.com/LRydin/MFDFA/issues/33

    Parameters
    ----------
    signal : Union[list, np.array, pd.Series]
        The signal (i.e., a time series) in the form of a vector of values.
    scale : list
        A list containing the lengths of the windows (number of data points in each subseries) that
        the signal is divided into. Also referred to as Tau :math:`\\tau`. If ``"default"``, will
        set it to a logarithmic scale (so that each window scale has the same weight) with a
        minimum of 4 and maximum of a tenth of the length (to have more than 10 windows to
        calculate the average fluctuation).
    overlap : bool
        Defaults to ``True``, where the windows will have a 50% overlap with each other, otherwise
        non-overlapping windows will be used.
    integrate : bool
        It is common practice to convert the signal to a random walk (i.e., detrend and integrate,
        which corresponds to the signal 'profile') in order to avoid having too small exponent
        values. Note that it leads to the flattening of the signal, which can lead to the loss of
        some details (see Ihlen, 2012 for an explanation). Note that for strongly anticorrelated
        signals, this transformation should be applied  two times (i.e., provide
        ``np.cumsum(signal - np.mean(signal))`` instead of ``signal``).
    order : int
       The order of the polynomial trend for detrending. 1 corresponds to a linear detrending.
    multifractal : bool
        If ``True``, compute Multifractal Detrended Fluctuation Analysis (MFDFA), in which case the
        argument ``q`` is taken into account.
    q : Union[int, list, np.array]
        The sequence of fractal exponents when ``multifractal=True``. Must be a sequence between
        -10 and 10 (note that zero will be removed, since the code does not converge there).
        Setting ``q = 2`` (default for DFA) gives a result of a standard DFA. For instance, Ihlen
        (2012) uses ``q = [-5, -3, -1, 0, 1, 3, 5]`` (default when for multifractal). In general,
        positive q moments amplify the contribution of fractal components with larger amplitude and
        negative q moments amplify the contribution of fractal with smaller amplitude (Kantelhardt
        et al., 2002).
    maxdfa : bool
        If ``True``, it will locate the knee of the fluctuations (using :func:`.find_knee`) and use
        that as a maximum scale value. It computes max. DFA (a similar method exists in
        :func:`entropy_rate`).
    show : bool
        Visualise the trend between the window size and the fluctuations.
    **kwargs : optional
        Currently not used.

    Returns
    ----------
    dfa : float or pd.DataFrame
        If ``multifractal`` is ``False``, one DFA value is returned for a single time series.
    parameters : dict
        A dictionary containing additional information regarding the parameters used
        to compute DFA. If ``multifractal`` is ``False``, the dictionary contains q value, a
        series of windows, fluctuations of each window and the
        slopes value of the log2(windows) versus log2(fluctuations) plot. If
        ``multifractal`` is ``True``, the dictionary additionally contains the
        parameters of the singularity spectrum.

    See Also
    --------
    fractal_hurst, fractal_tmf, entropy_rate

    Examples
    ----------
    **Example 1:** Monofractal DFA

    .. ipython:: python

      import neurokit2 as nk

      signal = nk.signal_simulate(duration=10, frequency=[5, 7, 10, 14], noise=0.05)

      @savefig p_fractal_dfa1.png scale=100%
      dfa, info = nk.fractal_dfa(signal, show=True)
      @suppress
      plt.close()

    .. ipython:: python

      dfa

    As we can see from the plot, the final value, corresponding to the slope of the red line,
    doesn't capture properly the relationship. We can adjust the *scale factors* to capture the
    fractality of short-term fluctuations.

    .. ipython:: python

      scale = nk.expspace(10, 100, 20, base=2)

      @savefig p_fractal_dfa2.png scale=100%
      dfa, info = nk.fractal_dfa(signal, scale=scale, show=True)
      @suppress
      plt.close()

    **Example 2:** Multifractal DFA (MFDFA)

    .. ipython:: python

      @savefig p_fractal_dfa3.png scale=100%
      mfdfa, info = nk.fractal_mfdfa(signal, q=[-5, -3, -1, 0, 1, 3, 5], show=True)
      @suppress
      plt.close()

    .. ipython:: python

      mfdfa

    References
    -----------
    * Faini, A., Parati, G., & Castiglioni, P. (2021). Multiscale assessment of the degree of
      multifractality for physiological time series. Philosophical Transactions of the Royal
      Society A, 379(2212), 20200254.
    * Orozco-Duque, A., Novak, D., Kremen, V., & Bustamante, J. (2015). Multifractal analysis for
      grading complex fractionated electrograms in atrial fibrillation. Physiological Measurement,
      36(11), 2269-2284.
    * Ihlen, E. A. F. E. (2012). Introduction to multifractal detrended
      fluctuation analysis in Matlab. Frontiers in physiology, 3, 141.
    * Kantelhardt, J. W., Zschiegner, S. A., Koscielny-Bunde, E., Havlin, S.,
      Bunde, A., & Stanley, H. E. (2002). Multifractal detrended fluctuation
      analysis of nonstationary time series. Physica A: Statistical
      Mechanics and its Applications, 316(1-4), 87-114.
    * Hardstone, R., Poil, S. S., Schiavone, G., Jansen, R., Nikulin, V. V.,
      Mansvelder, H. D., & Linkenkaer-Hansen, K. (2012). Detrended
      fluctuation analysis: a scale-free view on neuronal oscillations.
      Frontiers in physiology, 3, 450.

    """
    # Sanity checks
    if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1:
        raise ValueError(
            "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet."
        )

    n = len(signal)
    scale = _fractal_dfa_findscales(n, scale)

    # Sanitize fractal power (cannot be close to 0)
    q = _sanitize_q(q, multifractal=multifractal)

    # Store parameters
    info = {"scale": scale, "q": q}

    # Preprocessing
    if integrate is True:
        # Get signal profile
        signal = np.cumsum(signal - np.mean(signal))

    # Function to store fluctuations. For DFA this is an array. For MFDFA, this
    # is a matrix of size (len(scale),len(q))
    fluctuations = np.zeros((len(scale), len(q)))

    # Start looping over scale
    for i, window in enumerate(scale):

        # Get window
        segments = _fractal_dfa_getwindow(signal, n, window, overlap=overlap)

        # Get polynomial trends
        trends = _fractal_dfa_trends(segments, window, order=order)

        # Get local fluctuation
        fluctuations[i] = _fractal_dfa_fluctuation(segments, trends, q)

    if len(fluctuations) == 0:
        return np.nan, info

    # Max. DFA ---------------------
    if maxdfa is True:
        # Find knees of fluctuations
        knee = np.repeat(len(scale), fluctuations.shape[1])
        for i in range(fluctuations.shape[1]):
            knee[i] = find_knee(
                y=np.log2(fluctuations[:, i]), x=np.log2(scale), show=False, verbose=False
            )
        knee = np.exp2(np.nanmax(knee))
        # Cut fluctuations
        fluctuations = fluctuations[scale <= knee, :]
        scale = scale[scale <= knee]
    # ------------------------------

    # Get slopes
    slopes = _slopes(scale, fluctuations, q)
    if len(slopes) == 1:
        slopes = slopes[0]

    # Prepare output
    info["Fluctuations"] = fluctuations
    info["Alpha"] = slopes

    # Extract features
    if multifractal is True:
        info.update(_singularity_spectrum(q=q, slopes=slopes))
        out = pd.DataFrame(
            {
                k: v
                for k, v in info.items()
                if k
                in [
                    "Peak",
                    "Width",
                    "Mean",
                    "Max",
                    "Delta",
                    "Asymmetry",
                    "Fluctuation",
                    "Increment",
                ]
            },
            index=[0],
        )
    else:
        out = slopes

    # Plot if show is True.
    if show is True:
        if multifractal is True:
            _fractal_mdfa_plot(
                info,
                scale=scale,
                fluctuations=fluctuations,
                q=q,
            )
        else:
            _fractal_dfa_plot(info=info, scale=scale, fluctuations=fluctuations)

    return out, info



# =============================================================================
#  Utils
# =============================================================================
def _fractal_dfa_findscales(n, scale="default"):
    # Convert to array
    if isinstance(scale, list):
        scale = np.asarray(scale)

    # Default scale number
    if scale is None or isinstance(scale, str):
        scale = int(n / 10)

    # See https://github.com/neuropsychology/NeuroKit/issues/206
    if isinstance(scale, int):
        scale = np.exp(np.linspace(np.log(10), np.log(int(n / 10)), scale)).astype(int)
        scale = np.unique(scale)  # keep only unique

    # Sanity checks (return warning for too short scale)
    if len(scale) < 2:
        raise ValueError("NeuroKit error: more than one window is needed. Increase 'scale'.")

    if np.min(scale) < 2:
        raise ValueError(
            "NeuroKit error: there must be at least 2 data points in each window. Decrease 'scale'."
        )
    if np.max(scale) >= n:
        raise ValueError(
            "NeuroKit error: the window cannot contain more data points than the time series. Decrease 'scale'."
        )

    return scale


def _sanitize_q(q=2, multifractal=False):
    # Turn to list
    if isinstance(q, str):
        if multifractal is False:
            q = [2]
        else:
            q = [-5, -3, -1, 0, 1, 3, 5]
    elif isinstance(q, (int, float)):
        q = [q]

    # Fractal powers as floats
    q = np.asarray_chkfinite(q, dtype=float)

    return q


def _slopes(scale, fluctuations, q):
    # Extract the slopes of each `q` power obtained with MFDFA to later produce
    # either the singularity spectrum or the multifractal exponents.
    # Note: added by Leonardo Rydin (see https://github.com/LRydin/MFDFA/)

    # Ensure mfdfa has the same q-power entries as q
    if fluctuations.shape[1] != q.shape[0]:
        raise ValueError("Fluctuation function and q powers don't match in dimension.")

    # Allocated array for slopes
    slopes = np.zeros(len(q))
    # Find slopes of each q-power
    for i in range(len(q)):
        # if fluctiations is zero, log2 wil encounter zero division
        old_setting = np.seterr(divide="ignore", invalid="ignore")
        slopes[i] = np.polyfit(np.log2(scale), np.log2(fluctuations[:, i]), 1)[0]
        np.seterr(**old_setting)

    return slopes


def _fractal_dfa_getwindow(signal, n, window, overlap=True):
    # This function reshapes the segments from a one-dimensional array to a
    # matrix for faster polynomail fittings. 'Overlap' reshapes into several
    # overlapping partitions of the data

    # TODO: see whether this step could be integrated within complexity_coarsegraining

    if overlap:
        segments = np.array([signal[i : i + window] for i in np.arange(0, n - window, window // 2)])
    else:
        segments = signal[: n - (n % window)]
        segments = segments.reshape((signal.shape[0] // window, window))

    return segments


def _fractal_dfa_trends(segments, window, order=1):
    # TODO: can we rely on signal_detrend?
    x = np.arange(window)

    coefs = np.polyfit(x[:window], segments.T, order).T

    # TODO: Could this be optimized? Something like np.polyval(x[:window], coefs)
    trends = np.array([np.polyval(coefs[j], x) for j in np.arange(len(segments))])

    return trends


def _fractal_dfa_fluctuation(segments, trends, q=2):

    # Detrend
    detrended = segments - trends

    # Compute variance
    var = np.var(detrended, axis=1)

    # Remove where var is zero
    var = var[var > 1e-08]
    if len(var) == 0:
        warn("All detrended segments have no variance. Retuning NaN.")
        return np.nan

    # Find where q is close to zero. Limit set at |q| < 0.1
    # See https://github.com/LRydin/MFDFA/issues/33
    is0 = np.abs(q) < 0.1

    # Reshape q to perform np.float_power
    q_non0 = q[~is0].reshape(-1, 1)

    # Get the fluctuation function, which is a function of the windows and of q
    # When q = 2 (i.e., multifractal = False)
    # The formula is equivalent to np.sqrt(np.mean(var))
    # And corresponds to the Root Mean Square (RMS)
    fluctuation = np.float_power(np.mean(np.float_power(var, q_non0 / 2), axis=1), 1 / q_non0.T)

    if np.sum(is0) > 0:
        fluc0 = np.exp(0.5 * np.mean(np.log(var)))
        fluctuation = np.insert(fluctuation, np.where(is0)[0], [fluc0])

    return fluctuation


# =============================================================================
#  Utils MFDFA
# =============================================================================
def _singularity_spectrum(q, slopes):
    """Extract the slopes of the fluctuation function to futher obtain the
    singularity strength `α` (or Hölder exponents) and singularity spectrum
    `f(α)` (or fractal dimension). This is iconically shaped as an inverse
    parabola, but most often it is difficult to obtain the negative `q` terms,
    and one can focus on the left side of the parabola (`q>0`).

    Note that these measures rarely match the theoretical expectation,
    thus a variation of ± 0.25 is absolutely reasonable.

    The parameters are mostly identical to `fractal_mfdfa()`, as one needs to
    perform MFDFA to obtain the singularity spectrum. Calculating only the
    DFA is insufficient, as it only has `q=2`, and a set of `q` values are
    needed. Here defaulted to `q = list(range(-5,5))`, where the `0` element
    is removed by `_cleanse_q()`.

    This was first designed and implemented by Leonardo Rydin in
    `MFDFA <https://github.com/LRydin/MFDFA/>`_ and ported here by the same.

    """
    # Components of the singularity spectrum
    # ---------------------------------------
    # The generalised Hurst exponents `h(q)` from MFDFA, which are simply the slopes of each DFA
    # for various `q` values
    out = {"h": slopes}

    # The generalised Hurst exponent h(q) is related to the Scaling Exponent Tau t(q):
    out["Tau"] = q * slopes - 1

    # Calculate Singularity Exponent H or α, which needs tau
    out["H"] = np.gradient(out["Tau"]) / np.gradient(q)

    # Calculate Singularity Dimension Dq or f(α), which needs tau and q
    # The relation between α and f(α) (H and D) is called the Multifractal (MF) spectrum or
    # singularity spectrum, which resembles the shape of an inverted parabola.
    out["D"] = q * out["H"] - out["Tau"]

    # Features (Orozco-Duque et al., 2015)
    # ---------------------------------------
    # The width of the MSP quantifies the degree of the multifractality
    # the spectrum width delta quantifies the degree of the multifractality.
    out["Width"] = np.nanmax(out["H"]) - np.nanmin(out["H"])

    # the singularity exponent, for which the spectrum D takes its maximum value (α0)
    out["Peak"] = out["H"][np.nanargmax(out["D"])]

    # The mean of the maximum and minimum values of singularity exponent H
    out["Mean"] = (np.nanmax(out["H"]) + np.nanmin(out["H"])) / 2

    # The value of singularity spectrum D, corresponding to the maximum value of
    # singularity exponent H, indicates the maximum fluctuation of the signal.
    out["Max"] = out["D"][np.nanargmax(out["H"])]

    # The vertical distance between the singularity spectrum *D* where the singularity
    # exponents are at their minimum and maximum.
    out["Delta"] = out["D"][np.nanargmax(out["H"])] - out["D"][np.nanargmin(out["H"])]

    # the asymmetric ratio (AR) defined as the ratio between h calculated with negative q and the
    # total width of the spectrum. If the multifractal spectrum is symmetric, AR should be equal to
    # 0.5
    out["Asymmetry"] = (np.nanmin(out["H"]) - out["Peak"]) / out["Width"]

    # h-fluctuation index (hFI), which is defined as the power of the second derivative of h(q)
    # hFI tends to zero in high fractionation signals.
    if len(slopes) > 3:
        # Help needed to double check that!
        out["Fluctuation"] = np.sum(np.gradient(np.gradient(out["h"])) ** 2) / (
            2 * np.max(np.abs(q)) + 2
        )
    else:
        out["Fluctuation"] = np.nan
    # hFI tends to zero in high fractionation signals. hFI has no reference point when a set of
    # signals is evaluated, so hFI must be normalisedd, so that hFIn = 1 is the most organised and
    # the most regular signal in the set
    # BUT the formula in Orozco-Duque (2015) is weird, as HFI is a single value so cannot be
    # normalized by its range...

    # Faini (2021): new index that describes the distribution of the generalized Hurst's exponents
    # without requiring the Legendre transform. This index, αCF, is the cumulative function of the
    # squared increments of the generalized Hurst's exponents between consecutive moment orders.
    out["Increment"] = np.sum(np.gradient(slopes) ** 2 / np.gradient(q))

    return out


# =============================================================================
#  Plots
# =============================================================================
def _fractal_dfa_plot(info, scale, fluctuations):

    polyfit = np.polyfit(np.log2(scale), np.log2(fluctuations), 1)
    fluctfit = 2 ** np.polyval(polyfit, np.log2(scale))
    plt.loglog(scale, fluctuations, "o", c="#90A4AE")
    plt.xlabel(r"$\log_{2}$(Scale)")
    plt.ylabel(r"$\log_{2}$(Fluctuations)")
    plt.loglog(scale, fluctfit, c="#E91E63", label=r"$\alpha$ = {:.3f}".format(info["Alpha"]))

    plt.legend(loc="lower right")
    plt.title("Detrended Fluctuation Analysis (DFA)")

    return None


def _fractal_mdfa_plot(info, scale, fluctuations, q):

    # Prepare figure
    fig = plt.figure(constrained_layout=False)
    spec = matplotlib.gridspec.GridSpec(ncols=2, nrows=2)

    ax_fluctuation = fig.add_subplot(spec[0, 0])
    ax_spectrum = fig.add_subplot(spec[0, 1])
    ax_tau = fig.add_subplot(spec[1, 0])
    ax_hq = fig.add_subplot(spec[1, 1])

    n = len(q)
    colors = plt.cm.viridis(np.linspace(0, 1, n))

    for i in range(n):
        # Plot the points
        ax_fluctuation.loglog(
            scale,
            fluctuations[:, i],
            "o",
            fillstyle="full",
            markeredgewidth=0.0,
            c=colors[i],
            alpha=0.2,
            markersize=6,
            base=2,
            zorder=1,
        )

        # Plot the polyfit line
        polyfit = np.polyfit(np.log2(scale), np.log2(fluctuations[:, i]), 1)
        fluctfit = 2 ** np.polyval(polyfit, np.log2(scale))
        ax_fluctuation.loglog(scale, fluctfit, c=colors[i], base=2, label="_no_legend_", zorder=2)

        # Add labels for max and min
        if i == 0:
            ax_fluctuation.plot(
                [],
                label=f"$h$ = {polyfit[0]:.3f}, $q$ = {q[0]:.1f}",
                c=colors[0],
            )
        elif i == (n - 1):
            ax_fluctuation.plot(
                [],
                label=f"$h$ = {polyfit[0]:.3f}, $q$ = {q[-1]:.1f}",
                c=colors[-1],
            )

    ax_fluctuation.set_xlabel(r"$\log_{2}$(Scale)")
    ax_fluctuation.set_ylabel(r"$\log_{2}$(Fluctuations)")
    ax_fluctuation.legend(loc="lower right")

    # Plot the singularity spectrum
    # ax.set_title("Singularity Spectrum")
    ax_spectrum.set_ylabel(r"Singularity Dimension ($D$)")
    ax_spectrum.set_xlabel(r"Singularity Exponent ($H$)")
    ax_spectrum.axvline(
        x=info["Peak"],
        color="black",
        linestyle="dashed",
        label=r"Peak = {:.3f}".format(info["Peak"]),
    )
    ax_spectrum.plot(
        [np.nanmin(info["H"]), np.nanmax(info["H"])],
        [np.nanmin(info["D"])] * 2,
        color="red",
        linestyle="solid",
        label=r"Width = {:.3f}".format(info["Width"]),
    )
    ax_spectrum.plot(
        [np.nanmin(info["H"]), np.nanmax(info["H"])],
        [info["D"][-1], info["D"][0]],
        color="#B71C1C",
        linestyle="dotted",
        label=r"Delta = {:.3f}".format(info["Delta"]),
    )
    ax_spectrum.plot(info["H"], info["D"], "o-", c="#FFC107")
    ax_spectrum.legend(loc="lower right")

    # Plot tau against q
    # ax.set_title("Scaling Exponents")
    ax_tau.set_ylabel(r"Scaling Exponent ($τ$)")
    ax_tau.set_xlabel(r"Fractal Exponent ($q$)")
    ax_tau.plot(q, info["Tau"], "o-", c="#E91E63")

    # Plot H against q
    # ax.set_title("Generalised Hurst Exponents")
    ax_hq.set_ylabel(r"Singularity Exponent ($H$)")
    ax_hq.set_xlabel(r"Fractal Exponent ($q$)")
    ax_hq.plot(q, info["H"], "o-", c="#2196F3")

    fig.suptitle("Multifractal Detrended Fluctuation Analysis (MFDFA)")

    return None