[docs]defstandardize(data,robust=False,window=None,**kwargs):"""**Standardization of data** Performs a standardization of data (Z-scoring), i.e., centering and scaling, so that the data is expressed in terms of standard deviation (i.e., mean = 0, SD = 1) or Median Absolute Deviance (median = 0, MAD = 1). Parameters ---------- data : Union[list, np.array, pd.Series] Raw data. robust : bool If ``True``, centering is done by substracting the median from the variables and dividing it by the median absolute deviation (MAD). If ``False``, variables are standardized by substracting the mean and dividing it by the standard deviation (SD). window : int Perform a rolling window standardization, i.e., apply a standardization on a window of the specified number of samples that rolls along the main axis of the signal. Can be used for complex detrending. **kwargs : optional Other arguments to be passed to :func:`.pandas.rolling`. Returns ---------- list The standardized values. Examples ---------- .. ipython:: python import neurokit2 as nk import pandas as pd # Simple example nk.standardize([3, 1, 2, 4, 6, np.nan]) nk.standardize([3, 1, 2, 4, 6, np.nan], robust=True) nk.standardize(np.array([[1, 2, 3, 4], [5, 6, 7, 8]]).T) nk.standardize(pd.DataFrame({"A": [3, 1, 2, 4, 6, np.nan], "B": [3, 1, 2, 4, 6, 5]})) # Rolling standardization of a signal signal = nk.signal_simulate(frequency=[0.1, 2], sampling_rate=200) z = nk.standardize(signal, window=200) @savefig p_standardize1.png scale=100% nk.signal_plot([signal, z], standardize=True) @suppress plt.close() """# Return appropriate typeifisinstance(data,list):ifany(is_string(data)):out=datawarn("The data is not standardized.""Some elements in the list is of string type.",category=NeuroKitWarning,)else:out=list(_standardize(np.array(data),robust=robust,window=window,**kwargs))elifisinstance(data,pd.DataFrame):# only standardize columns that are not string and are not nan_data=data.loc[:,~is_string(data)&~np.array(data.isnull().all())]to_append=data.loc[:,is_string(data)|np.array(data.isnull().all())]out=pd.DataFrame(_standardize(_data,robust=robust,window=window,**kwargs))out=pd.concat([to_append,out],axis=1)elifisinstance(data,pd.Series):ifis_string(data):out=datawarn("The data is not standardized as it is of string type.",category=NeuroKitWarning,)else:out=pd.Series(_standardize(data,robust=robust,window=window,**kwargs))else:ifis_string(data):out=datawarn("The data is not standardized as it is of string type.",category=NeuroKitWarning,)else:out=_standardize(data,robust=robust,window=window,**kwargs)returnout
# =============================================================================# Internals# =============================================================================def_standardize(data,robust=False,window=None,**kwargs):# Compute standardized on whole dataifwindowisNone:ifrobustisFalse:z=(data-np.nanmean(data,axis=0))/np.nanstd(data,axis=0,ddof=1)else:z=(data-np.nanmedian(data,axis=0))/mad(data)# Rolling standardization on windowselse:df=pd.DataFrame(data)# Force dataframeifrobustisFalse:z=(df-df.rolling(window,min_periods=0,**kwargs).mean())/df.rolling(window,min_periods=0,**kwargs).std(ddof=1)else:z=(df-df.rolling(window,min_periods=0,**kwargs).median())/df.rolling(window,min_periods=0,**kwargs).apply(mad)# Fill the created nansz=z.bfill()# Restore to vector or arrayifz.shape[1]==1:z=z[0].valueselse:z=z.valuesreturnz