Source code for neurokit2.stats.standardize

# -*- coding: utf-8 -*-
from warnings import warn

import numpy as np
import pandas as pd

from ..misc import NeuroKitWarning
from ..misc.check_type import is_string
from .mad import mad


[docs] def standardize(data, robust=False, window=None, **kwargs): """**Standardization of data** Performs a standardization of data (Z-scoring), i.e., centering and scaling, so that the data is expressed in terms of standard deviation (i.e., mean = 0, SD = 1) or Median Absolute Deviance (median = 0, MAD = 1). Parameters ---------- data : Union[list, np.array, pd.Series] Raw data. robust : bool If ``True``, centering is done by substracting the median from the variables and dividing it by the median absolute deviation (MAD). If ``False``, variables are standardized by substracting the mean and dividing it by the standard deviation (SD). window : int Perform a rolling window standardization, i.e., apply a standardization on a window of the specified number of samples that rolls along the main axis of the signal. Can be used for complex detrending. **kwargs : optional Other arguments to be passed to :func:`.pandas.rolling`. Returns ---------- list The standardized values. Examples ---------- .. ipython:: python import neurokit2 as nk import pandas as pd # Simple example nk.standardize([3, 1, 2, 4, 6, np.nan]) nk.standardize([3, 1, 2, 4, 6, np.nan], robust=True) nk.standardize(np.array([[1, 2, 3, 4], [5, 6, 7, 8]]).T) nk.standardize(pd.DataFrame({"A": [3, 1, 2, 4, 6, np.nan], "B": [3, 1, 2, 4, 6, 5]})) # Rolling standardization of a signal signal = nk.signal_simulate(frequency=[0.1, 2], sampling_rate=200) z = nk.standardize(signal, window=200) @savefig p_standardize1.png scale=100% nk.signal_plot([signal, z], standardize=True) @suppress plt.close() """ # Return appropriate type if isinstance(data, list): if any(is_string(data)): out = data warn( "The data is not standardized." "Some elements in the list is of string type.", category=NeuroKitWarning, ) else: out = list( _standardize(np.array(data), robust=robust, window=window, **kwargs) ) elif isinstance(data, pd.DataFrame): # only standardize columns that are not string and are not nan _data = data.loc[:, ~is_string(data) & ~np.array(data.isnull().all())] to_append = data.loc[:, is_string(data) | np.array(data.isnull().all())] out = pd.DataFrame(_standardize(_data, robust=robust, window=window, **kwargs)) out = pd.concat([to_append, out], axis=1) elif isinstance(data, pd.Series): if is_string(data): out = data warn( "The data is not standardized as it is of string type.", category=NeuroKitWarning, ) else: out = pd.Series(_standardize(data, robust=robust, window=window, **kwargs)) else: if is_string(data): out = data warn( "The data is not standardized as it is of string type.", category=NeuroKitWarning, ) else: out = _standardize(data, robust=robust, window=window, **kwargs) return out
# ============================================================================= # Internals # ============================================================================= def _standardize(data, robust=False, window=None, **kwargs): # Compute standardized on whole data if window is None: if robust is False: z = (data - np.nanmean(data, axis=0)) / np.nanstd(data, axis=0, ddof=1) else: z = (data - np.nanmedian(data, axis=0)) / mad(data) # Rolling standardization on windows else: df = pd.DataFrame(data) # Force dataframe if robust is False: z = (df - df.rolling(window, min_periods=0, **kwargs).mean()) / df.rolling( window, min_periods=0, **kwargs ).std(ddof=1) else: z = ( df - df.rolling(window, min_periods=0, **kwargs).median() ) / df.rolling(window, min_periods=0, **kwargs).apply(mad) # Fill the created nans z = z.bfill() # Restore to vector or array if z.shape[1] == 1: z = z[0].values else: z = z.values return z