Source code for neurokit2.complexity.entropy_shannon

import numpy as np
import pandas as pd
import scipy.stats

from .utils_complexity_symbolize import complexity_symbolize

[docs] def entropy_shannon(signal=None, base=2, symbolize=None, show=False, freq=None, **kwargs): """**Shannon entropy (SE or ShanEn)** Compute Shannon entropy (SE). Entropy is a measure of unpredictability of the state, or equivalently, of its average information content. Shannon entropy (SE) is one of the first and most basic measures of entropy and a foundational concept of information theory, introduced by Shannon (1948) to quantify the amount of information in a variable. .. math:: ShanEn = -\\sum_{x \\in \\mathcal{X}} p(x) \\log_2 p(x) Shannon attempted to extend Shannon entropy in what has become known as Differential Entropy (see :func:`entropy_differential`). Because Shannon entropy was meant for symbolic sequences (discrete events such as ["A", "B", "B", "A"]), it does not do well with continuous signals. One option is to binarize (i.e., cut) the signal into a number of bins using for instance ``pd.cut(signal, bins=100, labels=False)``. This can be done automatically using the ``method`` argument, which will be transferred to :func:`complexity_symbolize`. This function can be called either via ``entropy_shannon()`` or ``complexity_se()``. Parameters ---------- signal : Union[list, np.array, pd.Series] The signal (i.e., a time series) in the form of a vector of values. base: float The logarithmic base to use, defaults to ``2``, giving a unit in *bits*. Note that ``scipy. stats.entropy()`` uses Euler's number (``np.e``) as default (the natural logarithm), giving a measure of information expressed in *nats*. symbolize : str Method to convert a continuous signal input into a symbolic (discrete) signal. ``None`` by default, which skips the process (and assumes the input is already discrete). See :func:`complexity_symbolize` for details. show : bool If ``True``, will show the discrete the signal. freq : np.array Instead of a signal, a vector of probabilities can be provided (used for instance in :func:`entropy_permutation`). **kwargs Optional arguments. Not used for now. Returns -------- shanen : float The Shannon entropy of the signal. info : dict A dictionary containing additional information regarding the parameters used to compute Shannon entropy. See Also -------- entropy_differential, entropy_cumulativeresidual, entropy_tsallis, entropy_renyi, entropy_maximum Examples ---------- .. ipython:: python import neurokit2 as nk signal = [1, 1, 5, 5, 2, 8, 1] _, freq = np.unique(signal, return_counts=True) nk.entropy_shannon(freq=freq) .. ipython:: python # Simulate a Signal with Laplace Noise signal = nk.signal_simulate(duration=2, frequency=5, noise=0.01) # Compute Shannon's Entropy @savefig p_entropy_shannon1.png scale=100% shanen, info = nk.entropy_shannon(signal, symbolize=3, show=True) @suppress plt.close() .. ipython:: python shanen Compare with ``scipy`` (using the same base). .. ipython:: python import scipy.stats # Make the binning ourselves binned = pd.cut(signal, bins=3, labels=False) scipy.stats.entropy(pd.Series(binned).value_counts()) shanen, info = nk.entropy_shannon(binned, base=np.e) shanen References ----------- * Shannon, C. E. (1948). A mathematical theory of communication. The Bell system technical journal, 27(3), 379-423. """ if freq is None: _, freq = _entropy_freq(signal, symbolize=symbolize, show=show) return scipy.stats.entropy(freq, base=base), {"Symbolization": symbolize, "Base": base}
# ============================================================================= # Compute frequencies (common to Shannon and Tsallis) # ============================================================================= def _entropy_freq(signal, symbolize=None, show=False): # Sanity checks if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1: raise ValueError( "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet." ) # Check if string ('ABBA'), and convert each character to list (['A', 'B', 'B', 'A']) if isinstance(signal, str): signal = list(signal) # Force to array if not isinstance(signal, np.ndarray): signal = np.array(signal) # Make discrete if np.isscalar(signal) is False: signal = complexity_symbolize(signal, method=symbolize, show=show) return np.unique(signal, return_counts=True)