Source code for neurokit2.complexity.optim_complexity_k

from warnings import warn

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..misc import NeuroKitWarning, find_plateau



[docs]
def complexity_k(signal, k_max="max", show=False):
    """**Automated selection of k for Higuchi Fractal Dimension (HFD)**

    The optimal *k-max* is computed based on the point at which HFD values plateau for a range of
    *k-max* values (see Vega, 2015).

    Parameters
    ----------
    signal : Union[list, np.array, pd.Series]
        The signal (i.e., a time series) in the form of a vector of values.
    k_max : Union[int, str, list], optional
        Maximum number of interval times (should be greater than or equal to 3) to be tested. If
        ``max``, it selects the maximum possible value corresponding to half the length of the
        signal.
    show : bool
        Visualise the slope of the curve for the selected kmax value.

    Returns
    --------
    k : float
        The optimal kmax of the time series.
    info : dict
        A dictionary containing additional information regarding the parameters used
        to compute optimal kmax.

    See Also
    --------
    fractal_higuchi

    Examples
    ----------
    .. ipython:: python

      import neurokit2 as nk

      signal = nk.signal_simulate(duration=2, sampling_rate=100, frequency=[5, 6], noise=0.5)

      @savefig p_complexity_k1.png scale=100%
      k_max, info = nk.complexity_k(signal, k_max='default', show=True)
      @suppress
      plt.close()

    .. ipython:: python

      k_max

    References
    ----------
    * Higuchi, T. (1988). Approach to an irregular time series on the basis of the fractal theory.
      Physica D: Nonlinear Phenomena, 31(2), 277-283.
    * Vega, C. F., & Noel, J. (2015, June). Parameters analyzed of Higuchi's fractal dimension for
      EEG brain signals. In 2015 Signal Processing Symposium (SPSympo) (pp. 1-5). IEEE. https://
      ieeexplore.ieee.org/document/7168285

    """
    # Get the range of k-max values to be tested
    # ------------------------------------------
    if isinstance(k_max, str):  # e.g., "default"
        # upper limit for k value (max possible value)
        k_max = int(np.floor(len(signal) / 2))  # so that normalizing factor is positive

    if isinstance(k_max, int):
        kmax_range = np.arange(2, k_max + 1)
    elif isinstance(k_max, (list, np.ndarray, pd.Series)):
        kmax_range = np.array(k_max)
    else:
        warn(
            "k_max should be an int or a list of values of kmax to be tested.",
            category=NeuroKitWarning,
        )

    # Compute the slope for each kmax value
    # --------------------------------------
    vectorized_k_slope = np.vectorize(_complexity_k_slope, excluded=[1])
    slopes, intercepts, info = vectorized_k_slope(kmax_range, signal)
    # k_values = [d["k_values"] for d in info]
    average_values = [d["average_values"] for d in info]

    # Find plateau (the saturation point of slope)
    # --------------------------------------------
    optimal_point = find_plateau(slopes, show=False)
    if optimal_point is not None:
        kmax_optimal = kmax_range[optimal_point]
    else:
        kmax_optimal = np.max(kmax_range)
        warn(
            "The optimal kmax value detected is 2 or less. There may be no plateau in this case. "
            + f"You can inspect the plot by set `show=True`. We will return optimal k_max = {kmax_optimal} (the max).",
            category=NeuroKitWarning,
        )

    # Plot
    if show:
        _complexity_k_plot(kmax_range, slopes, kmax_optimal, ax=None)

    # Return optimal tau and info dict
    return kmax_optimal, {
        "Values": kmax_range,
        "Scores": slopes,
        "Intercepts": intercepts,
        "Average_Values": average_values,
    }



# =============================================================================
# Utilities
# =============================================================================


def _complexity_k_Lk(k, signal):
    n = len(signal)

    # Step 1: construct k number of new time series for range of k_values from 1 to kmax
    k_subrange = np.arange(1, k + 1)  # where m = 1, 2... k

    idx = np.tile(np.arange(0, len(signal), k), (k, 1)).astype(float)
    idx += np.tile(np.arange(0, k), (idx.shape[1], 1)).T
    mask = idx >= len(signal)
    idx[mask] = 0

    sig_values = signal[idx.astype(int)].astype(float)
    sig_values[mask] = np.nan

    # Step 2: Calculate length Lm(k) of each curve
    normalization = (n - 1) / (np.floor((n - k_subrange) / k).astype(int) * k)
    sets = (np.nansum(np.abs(np.diff(sig_values)), axis=1) * normalization) / k

    # Step 3: Compute average value over k sets of Lm(k)
    return np.sum(sets) / k


def _complexity_k_slope(kmax, signal, k_number="max"):
    if k_number == "max":
        k_values = np.arange(1, kmax + 1)
    else:
        k_values = np.unique(np.linspace(1, kmax + 1, k_number).astype(int))

    # Step 3 of Vega & Noel (2015)
    vectorized_Lk = np.vectorize(_complexity_k_Lk, excluded=[1])

    # Compute length of the curve, Lm(k)
    average_values = vectorized_Lk(k_values, signal)

    # Slope of best-fit line through points (slope equal to FD)
    slope, intercept = -np.polyfit(np.log(k_values), np.log(average_values), 1)
    return slope, intercept, {"k_values": k_values, "average_values": average_values}


# =============================================================================
# Plotting
# =============================================================================


def _complexity_k_plot(k_range, slope_values, k_optimal, ax=None):

    # Prepare plot
    if ax is None:
        fig, ax = plt.subplots()
    else:
        fig = None

    ax.set_title("Optimization of $k_{max}$ parameter")
    ax.set_xlabel("$k_{max}$ values")
    ax.set_ylabel("Higuchi Fractal Dimension (HFD) values")
    colors = plt.cm.PuBu(np.linspace(0, 1, len(k_range)))

    # if single time series
    ax.plot(k_range, slope_values, color="#2196F3", zorder=1)
    for i, _ in enumerate(k_range):
        ax.scatter(k_range[i], slope_values[i], color=colors[i], marker="o", zorder=2)
    ax.axvline(x=k_optimal, color="#E91E63", label="Optimal $k_{max}$: " + str(k_optimal))
    ax.legend(loc="upper right")

    return fig