# Source code for neurokit2.complexity.entropy_shannon

import numpy as np
import pandas as pd
import scipy.stats

from .utils_complexity_symbolize import complexity_symbolize

[docs]
def entropy_shannon(signal=None, base=2, symbolize=None, show=False, freq=None, **kwargs):
"""**Shannon entropy (SE or ShanEn)**

Compute Shannon entropy (SE). Entropy is a measure of unpredictability of the
state, or equivalently, of its average information content. Shannon entropy (SE) is one of the
first and most basic measures of entropy and a foundational concept of information theory,
introduced by Shannon (1948) to quantify the amount of information in a variable.

.. math::

ShanEn = -\\sum_{x \\in \\mathcal{X}} p(x) \\log_2 p(x)

Shannon attempted to extend Shannon entropy in what has become known as Differential Entropy
(see :func:entropy_differential).

Because Shannon entropy was meant for symbolic sequences (discrete events such as ["A", "B",
"B", "A"]), it does not do well with continuous signals. One option is to binarize (i.e., cut)
the signal into a number of bins using for instance pd.cut(signal, bins=100, labels=False).
This can be done automatically using the method argument, which will be transferred to
:func:complexity_symbolize.

This function can be called either via entropy_shannon() or complexity_se().

Parameters
----------
signal : Union[list, np.array, pd.Series]
The signal (i.e., a time series) in the form of a vector of values.
base: float
The logarithmic base to use, defaults to 2, giving a unit in *bits*. Note that scipy.
stats.entropy() uses Euler's number (np.e) as default (the natural logarithm), giving
a measure of information expressed in *nats*.
symbolize : str
Method to convert a continuous signal input into a symbolic (discrete) signal. None by
default, which skips the process (and assumes the input is already discrete). See
:func:complexity_symbolize for details.
show : bool
If True, will show the discrete the signal.
freq : np.array
Instead of a signal, a vector of probabilities can be provided (used for instance in
:func:entropy_permutation).
**kwargs
Optional arguments. Not used for now.

Returns
--------
shanen : float
The Shannon entropy of the signal.
info : dict
A dictionary containing additional information regarding the parameters used
to compute Shannon entropy.

--------
entropy_differential, entropy_cumulativeresidual, entropy_tsallis, entropy_renyi,
entropy_maximum

Examples
----------
.. ipython:: python

import neurokit2 as nk

signal = [1, 1, 5, 5, 2, 8, 1]
_, freq = np.unique(signal, return_counts=True)
nk.entropy_shannon(freq=freq)

.. ipython:: python

# Simulate a Signal with Laplace Noise
signal = nk.signal_simulate(duration=2, frequency=5, noise=0.01)

# Compute Shannon's Entropy
@savefig p_entropy_shannon1.png scale=100%
shanen, info = nk.entropy_shannon(signal, symbolize=3, show=True)
@suppress
plt.close()

.. ipython:: python

shanen

Compare with scipy (using the same base).

.. ipython:: python

import scipy.stats

# Make the binning ourselves
binned = pd.cut(signal, bins=3, labels=False)

scipy.stats.entropy(pd.Series(binned).value_counts())
shanen, info = nk.entropy_shannon(binned, base=np.e)
shanen

References
-----------
* Shannon, C. E. (1948). A mathematical theory of communication. The Bell system technical
journal, 27(3), 379-423.

"""
if freq is None:
_, freq = _entropy_freq(signal, symbolize=symbolize, show=show)

return scipy.stats.entropy(freq, base=base), {"Symbolization": symbolize, "Base": base}

# =============================================================================
# Compute frequencies (common to Shannon and Tsallis)
# =============================================================================
def _entropy_freq(signal, symbolize=None, show=False):
# Sanity checks
if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1:
raise ValueError(
"Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet."
)

# Check if string ('ABBA'), and convert each character to list (['A', 'B', 'B', 'A'])
if isinstance(signal, str):
signal = list(signal)

# Force to array
if not isinstance(signal, np.ndarray):
signal = np.array(signal)

# Make discrete
if np.isscalar(signal) is False:
signal = complexity_symbolize(signal, method=symbolize, show=show)

return np.unique(signal, return_counts=True)