Source code for neurokit2.stats.hdi

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np

from ..misc import find_closest
from .density import density


[docs] def hdi(x, ci=0.95, show=False, **kwargs): """**Highest Density Interval (HDI)** Compute the Highest Density Interval (HDI) of a distribution. All points within this interval have a higher probability density than points outside the interval. The HDI can be used in the context of uncertainty characterisation of posterior distributions (in the Bayesian farmework) as Credible Interval (CI). Unlike equal-tailed intervals that typically exclude 2.5% from each tail of the distribution and always include the median, the HDI is not equal-tailed and therefore always includes the mode(s) of posterior distributions. Parameters ---------- x : Union[list, np.array, pd.Series] A vector of values. ci : float Value of probability of the (credible) interval - CI (between 0 and 1) to be estimated. Default to .95 (95%). show : bool If ``True``, the function will produce a figure. **kwargs : Line2D properties Other arguments to be passed to :func:`nk.density`. See Also -------- density Returns ---------- float(s) The HDI low and high limits. fig Distribution plot. Examples ---------- .. ipython:: python import numpy as np import neurokit2 as nk x = np.random.normal(loc=0, scale=1, size=100000) @savefig p_hdi1.png scale=100% ci_min, ci_high = nk.hdi(x, ci=0.95, show=True) @suppress plt.close() """ x_sorted = np.sort(x) window_size = np.ceil(ci * len(x_sorted)).astype("int") if window_size < 2: raise ValueError( "NeuroKit error: hdi(): `ci` is too small or x does not contain enough data points." ) nCIs = len(x_sorted) - window_size ciWidth = [0] * nCIs for i in np.arange(0, nCIs): ciWidth[i] = x_sorted[i + window_size] - x_sorted[i] hdi_low = x_sorted[ciWidth.index(np.min(ciWidth))] hdi_high = x_sorted[ciWidth.index(np.min(ciWidth)) + window_size] if show is True: _hdi_plot(x, hdi_low, hdi_high, **kwargs) return hdi_low, hdi_high
def _hdi_plot(vals, hdi_low, hdi_high, ci=0.95, **kwargs): x, y = density(vals, show=False, **kwargs) where = np.full(len(x), False) where[0 : find_closest(hdi_low, x, return_index=True)] = True where[find_closest(hdi_high, x, return_index=True) : :] = True fig, ax = plt.subplots() # pylint: disable=unused-variable ax.plot(x, y, color="white") ax.fill_between( x, y, where=where, color="#E91E63", label="CI {:.0%} [{:.2f}, {:.2f}]".format(ci, hdi_low, hdi_high), ) ax.fill_between(x, y, where=~where, color="#2196F3") ax.legend(loc="upper right")