Source code for neurokit2.complexity.information_mutual
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scipy.ndimage
import scipy.special
import scipy.stats
import sklearn.metrics
import sklearn.neighbors
[docs]
def mutual_information(x, y, method="varoquaux", bins="default", **kwargs):
"""**Mutual Information (MI)**
Computes the mutual information (MI) between two vectors from a joint histogram.
The mutual information of two variables is a measure of the mutual dependence between them.
More specifically, it quantifies the "amount of information" obtained about one variable by
observing the other variable.
Different methods are available:
* **nolitsa**: Standard mutual information (a bit faster than the ``"sklearn"`` method).
* **varoquaux**: Applies a Gaussian filter on the joint-histogram. The smoothing amount can be
modulated via the ``sigma`` argument (by default, ``sigma=1``).
* **knn**: Non-parametric (i.e., not based on binning) estimation via nearest neighbors.
Additional parameters includes ``k`` (by default, ``k=3``), the number of nearest neighbors
to use.
* **max**: Maximum Mutual Information coefficient, i.e., the MI is maximal given a certain
combination of number of bins.
Parameters
----------
x : Union[list, np.array, pd.Series]
A vector of values.
y : Union[list, np.array, pd.Series]
A vector of values.
method : str
The method to use.
bins : int
Number of bins to use while creating the histogram. Only used for ``"nolitsa"`` and
``"varoquaux"``. If ``"default"``, the number of bins is estimated following
Hacine-Gharbi (2018).
**kwargs
Additional keyword arguments to pass to the chosen method.
Returns
-------
float
The computed similarity measure.
See Also
--------
information_fisher
Examples
---------
**Example 1**: Simple case
.. ipython:: python
import neurokit2 as nk
x = [3, 3, 5, 1, 6, 3, 2, 8, 1, 2, 3, 5, 4, 0, 2]
y = [5, 3, 1, 3, 4, 5, 6, 4, 1, 3, 4, 6, 2, 1, 3]
nk.mutual_information(x, y, method="varoquaux")
nk.mutual_information(x, y, method="nolitsa")
nk.mutual_information(x, y, method="knn")
nk.mutual_information(x, y, method="max")
**Example 2**: Method comparison
.. ipython:: python
import numpy as np
import pandas as pd
x = np.random.normal(size=400)
y = x**2
data = pd.DataFrame()
for level in np.linspace(0.01, 3, 200):
noise = np.random.normal(scale=level, size=400)
rez = pd.DataFrame({"Noise": [level]})
rez["MI1"] = nk.mutual_information(x, y + noise, method="varoquaux", sigma=1)
rez["MI2"] = nk.mutual_information(x, y + noise, method="varoquaux", sigma=0)
rez["MI3"] = nk.mutual_information(x, y + noise, method="nolitsa")
rez["MI4"] = nk.mutual_information(x, y + noise, method="knn")
rez["MI5"] = nk.mutual_information(x, y + noise, method="max")
data = pd.concat([data, rez], axis=0)
data["MI1"] = nk.rescale(data["MI1"])
data["MI2"] = nk.rescale(data["MI2"])
data["MI3"] = nk.rescale(data["MI3"])
data["MI4"] = nk.rescale(data["MI4"])
data["MI5"] = nk.rescale(data["MI5"])
@savefig p_information_mutual1.png scale=100%
data.plot(x="Noise", y=["MI1", "MI2", "MI3", "MI4", "MI5"], kind="line")
@suppress
plt.close()
.. ipython:: python
# Computation time
# x = np.random.normal(size=10000)
# %timeit nk.mutual_information(x, x**2, method="varoquaux")
# %timeit nk.mutual_information(x, x**2, method="nolitsa")
# %timeit nk.mutual_information(x, x**2, method="sklearn")
# %timeit nk.mutual_information(x, x**2, method="knn", k=2)
# %timeit nk.mutual_information(x, x**2, method="knn", k=5)
# %timeit nk.mutual_information(x, x**2, method="max")
References
----------
* Studholme, C., Hawkes, D. J., & Hill, D. L. (1998, June). Normalized entropy measure for
multimodality image alignment. In Medical imaging 1998: image processing (Vol. 3338, pp.
132-143). SPIE.
* Hacine-Gharbi, A., & Ravier, P. (2018). A binning formula of bi-histogram for joint entropy
estimation using mean square error minimization. Pattern Recognition Letters, 101, 21-28.
"""
method = method.lower()
if method in ["max"] or isinstance(bins, (list, np.ndarray)):
# https://www.freecodecamp.org/news/
# how-machines-make-predictions-finding-correlations-in-complex-data-dfd9f0d87889/
if isinstance(bins, str):
bins = np.arange(2, np.ceil(len(x) ** 0.6) + 1).astype(int)
mi = 0
for i in bins:
for j in bins:
if i * j > np.max(bins):
continue
p_x = pd.cut(x, i, labels=False)
p_y = pd.cut(y, j, labels=False)
new_mi = _mutual_information_sklearn(p_x, p_y) / np.log2(np.min([i, j]))
if new_mi > mi:
mi = new_mi
else:
if isinstance(bins, str):
# Hacine-Gharbi (2018)
# https://stats.stackexchange.com/questions/179674/
# number-of-bins-when-computing-mutual-information
bins = 1 + np.sqrt(1 + (24 * len(x) / (1 - np.corrcoef(x, y)[0, 1] ** 2)))
bins = np.round((1 / np.sqrt(2)) * np.sqrt(bins)).astype(int)
if method in ["varoquaux"]:
mi = _mutual_information_varoquaux(x, y, bins=bins, **kwargs)
elif method in ["shannon", "nolitsa"]:
mi = _mutual_information_nolitsa(x, y, bins=bins)
elif method in ["sklearn"]:
mi = _mutual_information_sklearn(x, y, bins=bins)
elif method in ["knn"]:
mi = _mutual_information_knn(x, y, **kwargs)
else:
raise ValueError("NeuroKit error: mutual_information(): 'method' not recognized.")
return mi
# =============================================================================
# Methods
# =============================================================================
def _mutual_information_sklearn(x, y, bins=None):
if bins is None:
_, p_xy = scipy.stats.contingency.crosstab(x, y)
else:
p_xy = np.histogram2d(x, y, bins)[0]
return sklearn.metrics.mutual_info_score(None, None, contingency=p_xy)
def _mutual_information_varoquaux(x, y, bins=256, sigma=1, normalized=True):
"""Based on Gael Varoquaux's implementation:
https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429."""
jh = np.histogram2d(x, y, bins=bins)[0]
# smooth the jh with a gaussian filter of given sigma
scipy.ndimage.gaussian_filter(jh, sigma=sigma, mode="constant", output=jh)
# compute marginal histograms
jh = jh + np.finfo(float).eps
sh = np.sum(jh)
jh = jh / sh
s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0]))
s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1))
if normalized:
mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2))) / np.sum(jh * np.log(jh))) - 1
else:
mi = np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1)) - np.sum(s2 * np.log(s2))
return mi
def _mutual_information_nolitsa(x, y, bins=256):
"""
Based on the nolitsa package:
https://github.com/manu-mannattil/nolitsa/blob/master/nolitsa/delay.py#L72
"""
p_x = np.histogram(x, bins)[0]
p_y = np.histogram(y, bins)[0]
p_xy = np.histogram2d(x, y, bins)[0].flatten()
# Convert frequencies into probabilities.
# Also, in the limit p -> 0, p*log(p) is 0. We need to take out those.
p_x = p_x[p_x > 0] / np.sum(p_x)
p_y = p_y[p_y > 0] / np.sum(p_y)
p_xy = p_xy[p_xy > 0] / np.sum(p_xy)
# Calculate the corresponding Shannon entropies.
h_x = np.sum(p_x * np.log2(p_x))
h_y = np.sum(p_y * np.log2(p_y))
h_xy = np.sum(p_xy * np.log2(p_xy))
return h_xy - h_x - h_y
def _mutual_information_knn(x, y, k=3):
"""
Based on the NPEET package:
https://github.com/gregversteeg/NPEET
"""
points = np.array([x, y]).T
# Find nearest neighbors in joint space, p=inf means max-norm
dvec = sklearn.neighbors.KDTree(points, metric="chebyshev").query(points, k=k + 1)[0][:, k]
a = np.array([x]).T
a = sklearn.neighbors.KDTree(a, metric="chebyshev").query_radius(
a, dvec - 1e-15, count_only=True
)
a = np.mean(scipy.special.digamma(a))
b = np.array([y]).T
b = sklearn.neighbors.KDTree(b, metric="chebyshev").query_radius(
b, dvec - 1e-15, count_only=True
)
b = np.mean(scipy.special.digamma(b))
c = scipy.special.digamma(k)
d = scipy.special.digamma(len(x))
return (-a - b + c + d) / np.log(2)