Source code for neurokit2.complexity.fractal_hurst

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.special

from .fractal_dfa import _fractal_dfa_findscales


[docs] def fractal_hurst(signal, scale="default", corrected=True, show=False): """**Hurst Exponent (H)** This function estimates the Hurst exponent via the standard rescaled range (R/S) approach, but other methods exist, such as Detrended Fluctuation Analysis (DFA, see :func:`fractal_dfa`). The Hurst exponent is a measure for the "long-term memory" of a signal. It can be used to determine whether the time series is more, less, or equally likely to increase if it has increased in previous steps. This property makes the Hurst exponent especially interesting for the analysis of stock data. It typically ranges from 0 to 1, with 0.5 corresponding to a Brownian motion. If H < 0.5, the time-series covers less "distance" than a random walk (the memory of the signal decays faster than at random), and vice versa. The R/S approach first splits the time series into non-overlapping subseries of length n. R and S (sigma) are then calculated for each subseries and the mean is taken over all subseries yielding (R/S)_n. This process is repeated for several lengths *n*. The final exponent is then derived from fitting a straight line to the plot of :math:`log((R/S)_n)` vs :math:`log(n)`. Parameters ---------- signal : Union[list, np.array, pd.Series] The signal (i.e., a time series) in the form of a vector of values. or dataframe. scale : list A list containing the lengths of the windows (number of data points in each subseries) that the signal is divided into. See :func:`fractal_dfa` for more information. corrected : boolean if ``True``, the Anis-Lloyd-Peters correction factor will be applied to the output according to the expected value for the individual (R/S) values. show : bool If ``True``, returns a plot. See Also -------- fractal_dfa Examples ---------- .. ipython:: python import neurokit2 as nk # Simulate Signal with duration of 2s signal = nk.signal_simulate(duration=2, frequency=5) # Compute Hurst Exponent h, info = nk.fractal_hurst(signal, corrected=True, show=True) h References ---------- * Brandi, G., & Di Matteo, T. (2021). On the statistics of scaling exponents and the Multiscaling Value at Risk. The European Journal of Finance, 1-22. * Annis, A. A., & Lloyd, E. H. (1976). The expected value of the adjusted rescaled Hurst range of independent normal summands. Biometrika, 63(1), 111-116. * https://github.com/CSchoel/nolds """ # Sanity checks if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1: raise ValueError( "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet." ) n = len(signal) scale = _fractal_dfa_findscales(n, scale) # get individual values for (R/S)_n rs_vals = np.array([_fractal_hurst_rs(signal, window) for window in scale]) # filter NaNs (zeros should not be possible, because if R is 0 then S is also zero) valid = ~np.isnan(rs_vals) rs_vals = rs_vals[valid] n_vals = np.asarray(scale)[valid] # it may happen that no rs vals are left (if all values of data are the same) if len(rs_vals) == 0: raise ValueError( "All (R/S) values are NaN. Check your data, or try a different window length." ) # Transform RS values rs_vals = np.log10(rs_vals) if corrected: rs_vals -= np.log10([expected_rs(n) for n in n_vals]) n_vals = np.log10(n_vals) # fit a line to the logarithm of the obtained (R/S) values poly = np.polyfit(n_vals, rs_vals, 1) h = poly[0] # Get the slope if corrected: h = poly[0] + 0.5 if show: _fractal_hurst_plot(poly, n_vals, rs_vals, corrected=corrected, ax=None) return h, {"Values": n_vals, "Scores": rs_vals, "Corrected": corrected, "Intercept": poly[1]}
# ============================================================================= # Utilities # ============================================================================= def expected_rs(n): """Calculates the expected (R/S)_n for white noise for a given n. This is used as a correction factor in the function hurst_rs. It uses the formula of Anis-Lloyd-Peters. https://en.wikipedia.org/wiki/Hurst_exponent#cite_note-:2-17 """ # front = (n - 0.5) / n i = np.arange(1, n) back = np.sum(np.sqrt((n - i) / i)) if n <= 340: middle = scipy.special.gamma((n - 1) * 0.5) / np.sqrt(np.pi) / scipy.special.gamma(n * 0.5) else: middle = 1.0 / np.sqrt(n * np.pi * 0.5) return middle * back def _fractal_hurst_rs(signal, window): """Calculates an individual R/S value in the rescaled range approach for a given window size (the size of the subseries in which data should be split). """ n = len(signal) m = n // window # number of sequences # cut values at the end of data to make the array divisible by n signal = signal[: n - (n % window)] # split remaining data into subsequences of length n seqs = np.reshape(signal, (m, window)) # calculate means of subsequences means = np.mean(seqs, axis=1) # normalize subsequences by substracting mean y = seqs - means.reshape((m, 1)) # build cumulative sum of subsequences y = np.cumsum(y, axis=1) # find ranges r = np.max(y, axis=1) - np.min(y, axis=1) # find standard deviation # we should use the unbiased estimator, since we do not know the true mean s = np.std(seqs, ddof=1, axis=1) # some ranges may be zero and have to be excluded from the analysis idx = np.where(r != 0) r = r[idx] s = s[idx] # it may happen that all ranges are zero (if all values in data are equal) if len(r) == 0: return np.nan # return mean of r/s along subsequence index return np.mean(r / s) def _fractal_hurst_plot(poly, n_vals, rs_vals, corrected=False, ax=None): if ax is None: # ax option in case more plots need to be added later fig, ax = plt.subplots() fig.suptitle("Hurst Exponent via Rescaled Range (R/S) Analysis") else: fig = None ax.set_ylabel(r"$log$((R/S)_n)") ax.set_xlabel(r"$log$(n)") # Plot log((R/S)_n) vs log(n) ax.scatter( n_vals, rs_vals, marker="o", zorder=1, label="_no_legend_", ) fit_values = [poly[0] * i + poly[1] for i in n_vals] if corrected: label = "corrected h = {}".format(round(poly[0] + 0.5, 2)) else: label = "h = {}".format(round(poly[0], 2)) ax.plot(n_vals, fit_values, color="#E91E63", zorder=2, linewidth=3, label=label) ax.legend(loc="lower right") return fig # ============================================================================= # Generalized Hurst Exponent # ============================================================================= def _fractal_hurst_generalized(signal, q=2): """TO BE DONE. The Generalized Hurst exponent method is assesses directly the scaling properties of the time series via the qth-order moments of the distribution of the increments. Different exponents `q` are associated with different characterizations of the multi-scaling complexity of the signal. In contrast to the popular R/S statistics approach, it does not deal with max and min functions, and thus less sensitive to outliers. From https://github.com/PTRRupprecht/GenHurst""" n = len(signal) H = np.zeros((len(range(5, 20)), 1)) k = 0 for Tmax in range(5, 20): x = np.arange(1, Tmax + 1, 1) mcord = np.zeros((Tmax, 1)) for tt in range(1, Tmax + 1): dV = signal[np.arange(tt, n, tt)] - signal[np.arange(tt, n, tt) - tt] VV = signal[np.arange(tt, n + tt, tt) - tt] N = len(dV) + 1 X = np.arange(1, N + 1, dtype=np.float64) Y = VV mx = np.sum(X) / N SSxx = np.sum(X ** 2) - N * mx ** 2 my = np.sum(Y) / N SSxy = np.sum(np.multiply(X, Y)) - N * mx * my cc1 = SSxy / SSxx cc2 = my - cc1 * mx ddVd = dV - cc1 VVVd = VV - np.multiply(cc1, np.arange(1, N + 1, dtype=np.float64)) - cc2 mcord[tt - 1] = np.mean(np.abs(ddVd) ** q) / np.mean(np.abs(VVVd) ** q) mx = np.mean(np.log10(x)) SSxx = np.sum(np.log10(x) ** 2) - Tmax * mx ** 2 my = np.mean(np.log10(mcord)) SSxy = np.sum(np.multiply(np.log10(x), np.transpose(np.log10(mcord)))) - Tmax * mx * my H[k] = SSxy / SSxx k = k + 1 mH = np.mean(H) / q return mH