Source code for neurokit2.complexity.complexity_lempelziv

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd

from .utils_complexity_ordinalpatterns import complexity_ordinalpatterns
from .utils_complexity_symbolize import complexity_symbolize

[docs] def complexity_lempelziv( signal, delay=1, dimension=2, permutation=False, symbolize="mean", **kwargs, ): """**Lempel-Ziv Complexity (LZC, PLZC and MSLZC)** Computes Lempel-Ziv Complexity (LZC) to quantify the regularity of the signal, by scanning symbolic sequences for new patterns, increasing the complexity count every time a new sequence is detected. Regular signals have a lower number of distinct patterns and thus have low LZC whereas irregular signals are characterized by a high LZC. While often being interpreted as a complexity measure, LZC was originally proposed to reflect randomness (Lempel and Ziv, 1976). Permutation Lempel-Ziv Complexity (**PLZC**) combines LZC with :func:`permutation <entropy_permutation>`. A sequence of symbols is generated from the permutations observed in the :func:`tine-delay embedding <complexity_embedding>`, and LZC is computed over it. Multiscale (Permutation) Lempel-Ziv Complexity (**MSLZC** or **MSPLZC**) combines permutation LZC with the :func:`multiscale approach <entropy_multiscale>`. It first performs a :func:`coarse-graining <complexity_coarsegraining>` procedure to the original time series. Parameters ---------- signal : Union[list, np.array, pd.Series] The signal (i.e., a time series) in the form of a vector of values. delay : int Time delay (often denoted *Tau* :math:`\\tau`, sometimes referred to as *lag*) in samples. See :func:`complexity_delay` to estimate the optimal value for this parameter. Only used when ``permutation=True``. dimension : int Embedding Dimension (*m*, sometimes referred to as *d* or *order*). See :func:`complexity_dimension` to estimate the optimal value for this parameter. Only used when ``permutation=True`` permutation : bool If ``True``, will return PLZC. symbolize : str Only used when ``permutation=False``. Method to convert a continuous signal input into a symbolic (discrete) signal. By default, assigns 0 and 1 to values below and above the mean. Can be ``None`` to skip the process (in case the input is already discrete). See :func:`complexity_symbolize` for details. **kwargs Other arguments to be passed to :func:`complexity_ordinalpatterns` (if ``permutation=True``) or :func:`complexity_symbolize`. Returns ---------- lzc : float Lempel Ziv Complexity (LZC) of the signal. info : dict A dictionary containing additional information regarding the parameters used to compute LZC. See Also -------- .complexity_symbolize, .complexity_ordinalpatterns, .entropy_permutation, Examples ---------- .. ipython:: python import neurokit2 as nk signal = nk.signal_simulate(duration=2, sampling_rate=200, frequency=[5, 6], noise=0.5) # LZC lzc, info = nk.complexity_lempelziv(signal) lzc # PLZC plzc, info = nk.complexity_lempelziv(signal, delay=1, dimension=3, permutation=True) plzc .. ipython:: python # MSLZC @savefig p_complexity_lempelziv1.png scale=100% mslzc, info = nk.entropy_multiscale(signal, method="LZC", show=True) @suppress plt.close() .. ipython:: python # MSPLZC @savefig p_complexity_lempelziv2.png scale=100% msplzc, info = nk.entropy_multiscale(signal, method="LZC", permutation=True, show=True) @suppress plt.close() References ---------- * Lempel, A., & Ziv, J. (1976). On the complexity of finite sequences. IEEE Transactions on information theory, 22(1), 75-81. * Nagarajan, R. (2002). Quantifying physiological data with Lempel-Ziv complexity-certain issues. IEEE Transactions on Biomedical Engineering, 49(11), 1371-1373. * Kaspar, F., & Schuster, H. G. (1987). Easily calculable measure for the complexity of spatiotemporal patterns. Physical Review A, 36(2), 842. * Zhang, Y., Hao, J., Zhou, C., & Chang, K. (2009). Normalized Lempel-Ziv complexity and its application in bio-sequence analysis. Journal of mathematical chemistry, 46(4), 1203-1212. * Bai, Y., Liang, Z., & Li, X. (2015). A permutation Lempel-Ziv complexity measure for EEG analysis. Biomedical Signal Processing and Control, 19, 102-114. * Borowska, M. (2021). Multiscale Permutation Lempel-Ziv Complexity Measure for Biomedical Signal Analysis: Interpretation and Application to Focal EEG Signals. Entropy, 23(7), 832. """ # Sanity checks if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1: raise ValueError( "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet." ) # Store parameters info = {"Permutation": permutation} # Permutation or not if permutation: info["Dimension"] = dimension info["Delay"] = delay # Permutation on the signal (i.e., converting to ordinal pattern). _, info = complexity_ordinalpatterns(signal, delay=delay, dimension=dimension, **kwargs) symbolic = info["Uniques"] else: # Binarize the signal symbolic = complexity_symbolize(signal, method=symbolize, **kwargs) # Count using the lempelziv algorithm info["Complexity_Kolmogorov"], n = _complexity_lempelziv_count(symbolic) # Normalize if permutation is False: lzc = (info["Complexity_Kolmogorov"] * np.log2(n)) / n else: lzc = ( info["Complexity_Kolmogorov"] * np.log2(n) / np.log2(np.math.factorial(dimension)) ) / n return lzc, info
# ============================================================================= # Utilities # ============================================================================= def _complexity_lempelziv_count(symbolic): """Computes LZC counts from symbolic sequences""" # TODO: I really can't imagine that there is no faster way of doing that that with a while loop # Convert to string (faster) string = "".join(list(symbolic.astype(int).astype(str))) # Initialize variables n = len(string) s = "0" + string c = 1 j = 1 i = 0 k = 1 k_max = 1 stop = False # Start counting while stop is False: if s[i + k] != s[j + k]: if k > k_max: # k_max stores the length of the longest pattern in the LA that has been matched # somewhere in the SB k_max = k # we increase i while the bit doesn't match, looking for a previous occurrence of a # pattern. s[i+k] is scanning the "search buffer" (SB) i = i + 1 # we stop looking when i catches up with the first bit of the "look-ahead" (LA) part. if i == j: # If we were actually compressing, we would add the new token here. here we just # count reconstruction STEPs c = c + 1 # we move the beginning of the LA to the end of the newly matched pattern. j = j + k_max # if the LA surpasses length of string, then we stop. if j + 1 > n: stop = True # after STEP, else: # we reset the searching index to beginning of SB (beginning of string) i = 0 # we reset pattern matching index. Note that we are actually matching against # the first bit of the string, because we added an extra 0 above, so i+k is the # first bit of the string. k = 1 # and we reset max length of matched pattern to k. k_max = 1 else: # we've finished matching a pattern in the SB, and we reset the matched pattern # length counter. k = 1 # I increase k as long as the pattern matches, i.e. as long as s[j+k] bit string can be # reconstructed by s[i+k] bit string. Note that the matched pattern can "run over" j # because the pattern starts copying itself (see LZ 76 paper). This is just what happens # when you apply the cloning tool on photoshop to a region where you've already cloned... else: k = k + 1 # if we reach the end of the string while matching, we need to add that to the tokens, # and stop. if j + k > n: c = c + 1 stop = True return c, n