Source code for neurokit2.stats.hdi
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
from ..misc import find_closest
from .density import density
[docs]
def hdi(x, ci=0.95, show=False, **kwargs):
"""**Highest Density Interval (HDI)**
Compute the Highest Density Interval (HDI) of a distribution. All points within this interval
have a higher probability density than points outside the interval. The HDI can be used in the
context of uncertainty characterisation of posterior distributions (in the Bayesian farmework)
as Credible Interval (CI). Unlike equal-tailed intervals that typically exclude 2.5% from each
tail of the distribution and always include the median, the HDI is not equal-tailed and
therefore always includes the mode(s) of posterior distributions.
Parameters
----------
x : Union[list, np.array, pd.Series]
A vector of values.
ci : float
Value of probability of the (credible) interval - CI (between 0 and 1) to be estimated.
Default to .95 (95%).
show : bool
If ``True``, the function will produce a figure.
**kwargs : Line2D properties
Other arguments to be passed to :func:`nk.density`.
See Also
--------
density
Returns
----------
float(s)
The HDI low and high limits.
fig
Distribution plot.
Examples
----------
.. ipython:: python
import numpy as np
import neurokit2 as nk
x = np.random.normal(loc=0, scale=1, size=100000)
@savefig p_hdi1.png scale=100%
ci_min, ci_high = nk.hdi(x, ci=0.95, show=True)
@suppress
plt.close()
"""
x_sorted = np.sort(x)
window_size = np.ceil(ci * len(x_sorted)).astype("int")
if window_size < 2:
raise ValueError(
"NeuroKit error: hdi(): `ci` is too small or x does not contain enough data points."
)
nCIs = len(x_sorted) - window_size
ciWidth = [0] * nCIs
for i in np.arange(0, nCIs):
ciWidth[i] = x_sorted[i + window_size] - x_sorted[i]
hdi_low = x_sorted[ciWidth.index(np.min(ciWidth))]
hdi_high = x_sorted[ciWidth.index(np.min(ciWidth)) + window_size]
if show is True:
_hdi_plot(x, hdi_low, hdi_high, **kwargs)
return hdi_low, hdi_high
def _hdi_plot(vals, hdi_low, hdi_high, ci=0.95, **kwargs):
x, y = density(vals, show=False, **kwargs)
where = np.full(len(x), False)
where[0 : find_closest(hdi_low, x, return_index=True)] = True
where[find_closest(hdi_high, x, return_index=True) : :] = True
fig, ax = plt.subplots() # pylint: disable=unused-variable
ax.plot(x, y, color="white")
ax.fill_between(
x,
y,
where=where,
color="#E91E63",
label="CI {:.0%} [{:.2f}, {:.2f}]".format(ci, hdi_low, hdi_high),
)
ax.fill_between(x, y, where=~where, color="#2196F3")
ax.legend(loc="upper right")