Source code for neurokit2.stats.distance
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import scipy
import scipy.spatial
from .standardize import standardize
[docs]
def distance(X=None, method="mahalanobis"):
"""**Distance**
Compute distance using different metrics.
Parameters
----------
X : array or DataFrame
A dataframe of values.
method : str
The method to use. One of ``"mahalanobis"`` or ``"mean"`` for the average distance from the
mean.
Returns
-------
array
Vector containing the distance values.
Examples
---------
.. ipython:: python
import neurokit2 as nk
# Load the iris dataset
data = nk.data("iris").drop("Species", axis=1)
data["Distance"] = nk.distance(data, method="mahalanobis")
@savefig p_distance1.png scale=100%
fig = data.plot(x="Petal.Length", y="Petal.Width", s="Distance", c="Distance", kind="scatter")
@suppress
plt.close()
.. ipython:: python
data["DistanceZ"] = np.abs(nk.distance(data.drop("Distance", axis=1), method="mean"))
@savefig p_distance2.png scale=100%
fig = data.plot(x="Petal.Length", y="Sepal.Length", s="DistanceZ", c="DistanceZ", kind="scatter")
@suppress
plt.close()
"""
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X)
method = method.lower() # remove capitalised letters
if method in ["mahalanobis"]:
dist = _distance_mahalanobis(X)
elif method in ["mean", "center", "average"]:
dist = _distance_mean(X)
else:
raise ValueError("NeuroKit error: distance(): 'method' should be one of 'mahalanobis'.")
return dist
# =============================================================================
# Methods
# =============================================================================
def _distance_mahalanobis(X=None):
cov = X.cov().values
cov = scipy.linalg.inv(cov)
col_means = X.mean().values
dist = np.full(len(X), np.nan)
for i in range(len(X)):
dist[i] = scipy.spatial.distance.mahalanobis(X.iloc[i, :].values, col_means, cov) ** 2
return dist
def _distance_mean(X=None):
Z = standardize(X)
dist = Z.mean(axis=1).values
return dist