Source code for neurokit2.complexity.information_mutual

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scipy.ndimage
import scipy.special
import scipy.stats
import sklearn.metrics
import sklearn.neighbors


[docs] def mutual_information(x, y, method="varoquaux", bins="default", **kwargs): """**Mutual Information (MI)** Computes the mutual information (MI) between two vectors from a joint histogram. The mutual information of two variables is a measure of the mutual dependence between them. More specifically, it quantifies the "amount of information" obtained about one variable by observing the other variable. Different methods are available: * **nolitsa**: Standard mutual information (a bit faster than the ``"sklearn"`` method). * **varoquaux**: Applies a Gaussian filter on the joint-histogram. The smoothing amount can be modulated via the ``sigma`` argument (by default, ``sigma=1``). * **knn**: Non-parametric (i.e., not based on binning) estimation via nearest neighbors. Additional parameters includes ``k`` (by default, ``k=3``), the number of nearest neighbors to use. * **max**: Maximum Mutual Information coefficient, i.e., the MI is maximal given a certain combination of number of bins. Parameters ---------- x : Union[list, np.array, pd.Series] A vector of values. y : Union[list, np.array, pd.Series] A vector of values. method : str The method to use. bins : int Number of bins to use while creating the histogram. Only used for ``"nolitsa"`` and ``"varoquaux"``. If ``"default"``, the number of bins is estimated following Hacine-Gharbi (2018). **kwargs Additional keyword arguments to pass to the chosen method. Returns ------- float The computed similarity measure. See Also -------- information_fisher Examples --------- **Example 1**: Simple case .. ipython:: python import neurokit2 as nk x = [3, 3, 5, 1, 6, 3, 2, 8, 1, 2, 3, 5, 4, 0, 2] y = [5, 3, 1, 3, 4, 5, 6, 4, 1, 3, 4, 6, 2, 1, 3] nk.mutual_information(x, y, method="varoquaux") nk.mutual_information(x, y, method="nolitsa") nk.mutual_information(x, y, method="knn") nk.mutual_information(x, y, method="max") **Example 2**: Method comparison .. ipython:: python import numpy as np import pandas as pd x = np.random.normal(size=400) y = x**2 data = pd.DataFrame() for level in np.linspace(0.01, 3, 200): noise = np.random.normal(scale=level, size=400) rez = pd.DataFrame({"Noise": [level]}) rez["MI1"] = nk.mutual_information(x, y + noise, method="varoquaux", sigma=1) rez["MI2"] = nk.mutual_information(x, y + noise, method="varoquaux", sigma=0) rez["MI3"] = nk.mutual_information(x, y + noise, method="nolitsa") rez["MI4"] = nk.mutual_information(x, y + noise, method="knn") rez["MI5"] = nk.mutual_information(x, y + noise, method="max") data = pd.concat([data, rez], axis=0) data["MI1"] = nk.rescale(data["MI1"]) data["MI2"] = nk.rescale(data["MI2"]) data["MI3"] = nk.rescale(data["MI3"]) data["MI4"] = nk.rescale(data["MI4"]) data["MI5"] = nk.rescale(data["MI5"]) @savefig p_information_mutual1.png scale=100% data.plot(x="Noise", y=["MI1", "MI2", "MI3", "MI4", "MI5"], kind="line") @suppress plt.close() .. ipython:: python # Computation time # x = np.random.normal(size=10000) # %timeit nk.mutual_information(x, x**2, method="varoquaux") # %timeit nk.mutual_information(x, x**2, method="nolitsa") # %timeit nk.mutual_information(x, x**2, method="sklearn") # %timeit nk.mutual_information(x, x**2, method="knn", k=2) # %timeit nk.mutual_information(x, x**2, method="knn", k=5) # %timeit nk.mutual_information(x, x**2, method="max") References ---------- * Studholme, C., Hawkes, D. J., & Hill, D. L. (1998, June). Normalized entropy measure for multimodality image alignment. In Medical imaging 1998: image processing (Vol. 3338, pp. 132-143). SPIE. * Hacine-Gharbi, A., & Ravier, P. (2018). A binning formula of bi-histogram for joint entropy estimation using mean square error minimization. Pattern Recognition Letters, 101, 21-28. """ method = method.lower() if method in ["max"] or isinstance(bins, (list, np.ndarray)): # https://www.freecodecamp.org/news/ # how-machines-make-predictions-finding-correlations-in-complex-data-dfd9f0d87889/ if isinstance(bins, str): bins = np.arange(2, np.ceil(len(x) ** 0.6) + 1).astype(int) mi = 0 for i in bins: for j in bins: if i * j > np.max(bins): continue p_x = pd.cut(x, i, labels=False) p_y = pd.cut(y, j, labels=False) new_mi = _mutual_information_sklearn(p_x, p_y) / np.log2(np.min([i, j])) if new_mi > mi: mi = new_mi else: if isinstance(bins, str): # Hacine-Gharbi (2018) # https://stats.stackexchange.com/questions/179674/ # number-of-bins-when-computing-mutual-information bins = 1 + np.sqrt(1 + (24 * len(x) / (1 - np.corrcoef(x, y)[0, 1] ** 2))) bins = np.round((1 / np.sqrt(2)) * np.sqrt(bins)).astype(int) if method in ["varoquaux"]: mi = _mutual_information_varoquaux(x, y, bins=bins, **kwargs) elif method in ["shannon", "nolitsa"]: mi = _mutual_information_nolitsa(x, y, bins=bins) elif method in ["sklearn"]: mi = _mutual_information_sklearn(x, y, bins=bins) elif method in ["knn"]: mi = _mutual_information_knn(x, y, **kwargs) else: raise ValueError("NeuroKit error: mutual_information(): 'method' not recognized.") return mi
# ============================================================================= # Methods # ============================================================================= def _mutual_information_sklearn(x, y, bins=None): if bins is None: _, p_xy = scipy.stats.contingency.crosstab(x, y) else: p_xy = np.histogram2d(x, y, bins)[0] return sklearn.metrics.mutual_info_score(None, None, contingency=p_xy) def _mutual_information_varoquaux(x, y, bins=256, sigma=1, normalized=True): """Based on Gael Varoquaux's implementation: https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429.""" jh = np.histogram2d(x, y, bins=bins)[0] # smooth the jh with a gaussian filter of given sigma scipy.ndimage.gaussian_filter(jh, sigma=sigma, mode="constant", output=jh) # compute marginal histograms jh = jh + np.finfo(float).eps sh = np.sum(jh) jh = jh / sh s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0])) s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1)) if normalized: mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2))) / np.sum(jh * np.log(jh))) - 1 else: mi = np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1)) - np.sum(s2 * np.log(s2)) return mi def _mutual_information_nolitsa(x, y, bins=256): """ Based on the nolitsa package: https://github.com/manu-mannattil/nolitsa/blob/master/nolitsa/delay.py#L72 """ p_x = np.histogram(x, bins)[0] p_y = np.histogram(y, bins)[0] p_xy = np.histogram2d(x, y, bins)[0].flatten() # Convert frequencies into probabilities. # Also, in the limit p -> 0, p*log(p) is 0. We need to take out those. p_x = p_x[p_x > 0] / np.sum(p_x) p_y = p_y[p_y > 0] / np.sum(p_y) p_xy = p_xy[p_xy > 0] / np.sum(p_xy) # Calculate the corresponding Shannon entropies. h_x = np.sum(p_x * np.log2(p_x)) h_y = np.sum(p_y * np.log2(p_y)) h_xy = np.sum(p_xy * np.log2(p_xy)) return h_xy - h_x - h_y def _mutual_information_knn(x, y, k=3): """ Based on the NPEET package: https://github.com/gregversteeg/NPEET """ points = np.array([x, y]).T # Find nearest neighbors in joint space, p=inf means max-norm dvec = sklearn.neighbors.KDTree(points, metric="chebyshev").query(points, k=k + 1)[0][:, k] a = np.array([x]).T a = sklearn.neighbors.KDTree(a, metric="chebyshev").query_radius( a, dvec - 1e-15, count_only=True ) a = np.mean(scipy.special.digamma(a)) b = np.array([y]).T b = sklearn.neighbors.KDTree(b, metric="chebyshev").query_radius( b, dvec - 1e-15, count_only=True ) b = np.mean(scipy.special.digamma(b)) c = scipy.special.digamma(k) d = scipy.special.digamma(len(x)) return (-a - b + c + d) / np.log(2)