Source code for neurokit2.microstates.microstates_segment

# -*- coding: utf-8 -*-
import numpy as np

from ..misc import check_random_state
from ..stats import cluster
from ..stats.cluster_quality import _cluster_quality_gev
from .microstates_classify import microstates_classify
from .microstates_clean import microstates_clean


[docs] def microstates_segment( eeg, n_microstates=4, train="gfp", method="kmod", gfp_method="l1", sampling_rate=None, standardize_eeg=False, n_runs=10, max_iterations=1000, criterion="gev", random_state=None, optimize=False, **kwargs ): """**Segment M/EEG signal into Microstates** This functions identifies and extracts the microstates from an M/EEG signal using different clustering algorithms. Several runs of the clustering algorithm are performed, using different random initializations.The run that resulted in the best segmentation, as measured by global explained variance (GEV), is used. * **kmod**: Modified k-means algorithm. It differs from a traditional k-means in that it is *polarity-invariant*, which means that samples with EEG potential distribution maps that are similar but have opposite polarity will be assigned the *same* microstate class. * **kmeans**: Normal k-means. * **kmedoids**: k-medoids clustering, a more stable version of k-means. * **pca**: Principal Component Analysis. * **ica**: Independent Component Analysis. * **aahc**: Atomize and Agglomerate Hierarchical Clustering. Computationally heavy. The microstates clustering is typically fitted on the EEG data at the global field power (GFP) peaks to maximize the signal to noise ratio and focus on moments of high global neuronal synchronization. It is assumed that the topography around a GFP peak remains stable and is at its highest signal-to-noise ratio at the GFP peak. Parameters ---------- eeg : np.ndarray An array (channels, times) of M/EEG data or a Raw or Epochs object from MNE. n_microstates : int The number of unique microstates to find. Defaults to 4. train : Union[str, int, float] Method for selecting the timepoints how which to train the clustering algorithm. Can be ``"gfp"`` to use the peaks found in the Peaks in the global field power. Can be ``"all"``, in which case it will select all the datapoints. It can also be a number or a ratio, in which case it will select the corresponding number of evenly spread data points. For instance, ``train=10`` will select 10 equally spaced datapoints, whereas ``train=0.5`` will select half the data. See :func:`.microstates_peaks`. method : str The algorithm for clustering. Can be one of ``"kmod"`` (default), ``"kmeans"``, ``"kmedoids"``, ``"pca"``, ``"ica"``, or ``"aahc"``. gfp_method : str The GFP extraction method, can be either ``"l1"`` (default) or ``"l2"`` to use the L1 or L2 norm. See :func:`nk.eeg_gfp` for more details. sampling_rate : int The sampling frequency of the signal (in Hz, i.e., samples/second). standardize_eeg : bool Standardized (z-score) the data across time prior to GFP extraction using :func:`.nk.standardize`. n_runs : int The number of random initializations to use for the k-means algorithm. The best fitting segmentation across all initializations is used. Defaults to 10. max_iterations : int The maximum number of iterations to perform in the k-means algorithm. Defaults to 1000. criterion : str Which criterion to use to choose the best run for modified k-means algorithm, can be ``"gev"`` (default) which selects the best run based on the highest global explained variance, or ``"cv"`` which selects the best run based on the lowest cross-validation criterion. See :func:`.nk.microstates_gev` and :func:`.nk.microstates_crossvalidation` for more details respectively. random_state : Union[int, numpy.random.RandomState] The seed or ``RandomState`` for the random number generator. Defaults to ``None``, in which case a different seed is chosen each time this function is called. optimize : bool Optimized method in Poulsen et al. (2018) for the *k*-means modified method. Returns ------- dict Contains information about the segmented microstates: * **Microstates**: The topographic maps of the found unique microstates which has a shape of n_channels x n_states * **Sequence**: For each sample, the index of the microstate to which the sample has been assigned. * **GEV**: The global explained variance of the microstates. * **GFP**: The global field power of the data. * **Cross-Validation Criterion**: The cross-validation value of the iteration. * **Explained Variance**: The explained variance of each cluster map generated by PCA. * **Total Explained Variance**: The total explained variance of the cluster maps generated by PCA. Examples --------- * **Example 1**: k-means Algorithm .. ipython:: python import neurokit2 as nk # Download data eeg = nk.mne_data("filt-0-40_raw") # Average rereference and band-pass filtering eeg = nk.eeg_rereference(eeg, 'average').filter(1, 30, verbose=False) # Cluster microstates microstates = nk.microstates_segment(eeg, method="kmeans") @savefig p_microstate_segment1.png scale=100% nk.microstates_plot(microstates , epoch=(500, 750)) @suppress plt.close() # Modified kmeans (currently comment out due to memory error) #out_kmod = nk.microstates_segment(eeg, method='kmod') # nk.microstates_plot(out_kmod, gfp=out_kmod["GFP"][0:500]) # K-medoids (currently comment out due to memory error) #out_kmedoids = nk.microstates_segment(eeg, method='kmedoids') #nk.microstates_plot(out_kmedoids, gfp=out_kmedoids["GFP"][0:500]) * **Example with PCA** .. ipython:: python out_pca = nk.microstates_segment(eeg, method='pca', standardize_eeg=True) @savefig p_microstate_segment2.png scale=100% nk.microstates_plot(out_pca, gfp=out_pca["GFP"][0:500]) @suppress plt.close() * **Example with ICA** .. ipython:: python out_ica = nk.microstates_segment(eeg, method='ica', standardize_eeg=True) @savefig p_microstate_segment3.png scale=100% nk.microstates_plot(out_ica, gfp=out_ica["GFP"][0:500]) @suppress plt.close() * **Example with AAHC** .. ipython:: python out_aahc = nk.microstates_segment(eeg, method='aahc') @savefig p_microstate_segment4.png scale=100% nk.microstates_plot(out_aahc, gfp=out_aahc["GFP"][0:500]) @suppress plt.close() See Also -------- eeg_gfp, microstates_peaks, microstates_gev, microstates_crossvalidation, microstates_classify References ---------- * Poulsen, A. T., Pedroni, A., Langer, N., & Hansen, L. K. (2018). Microstate EEGlab toolbox: an introductory guide. BioRxiv, (289850). * Pascual-Marqui, R. D., Michel, C. M., & Lehmann, D. (1995). Segmentation of brain electrical activity into microstates: model estimation and validation. IEEE Transactions on Biomedical Engineering. """ # Sanitize input data, indices, gfp, info_mne = microstates_clean( eeg, train=train, sampling_rate=sampling_rate, standardize_eeg=standardize_eeg, gfp_method=gfp_method, **kwargs ) # Run clustering algorithm if method in ["kmods", "kmod", "kmeans modified", "modified kmeans"]: # Seed the random generator for reproducible results rng = check_random_state(random_state) # Generate one random integer for each run random_state = rng.choice(n_runs * 1000, n_runs, replace=False) # Initialize values gev = 0 cv = np.inf microstates = None segmentation = None polarity = None info = None # Do several runs of the k-means algorithm, keep track of the best segmentation. for run in range(n_runs): # Run clustering on subset of data _, _, current_info = cluster( data[:, indices].T, method="kmod", n_clusters=n_microstates, random_state=random_state[run], max_iterations=max_iterations, threshold=1e-6, optimize=optimize, ) current_microstates = current_info["clusters_normalized"] current_residual = current_info["residual"] # Run segmentation on the whole dataset s, p, g, g_all = _microstates_segment_runsegmentation( data, current_microstates, gfp, n_microstates=n_microstates ) if criterion == "gev": # If better (i.e., higher GEV), keep this segmentation if g > gev: microstates, segmentation, polarity, gev = ( current_microstates, s, p, g, ) gev_all = g_all info = current_info elif criterion == "cv": # If better (i.e., lower CV), keep this segmentation # R2 and residual are proportional, use residual instead of R2 if current_residual < cv: microstates, segmentation, polarity = current_microstates, s, p cv, gev, gev_all = current_residual, g, g_all info -= current_info else: # Run clustering algorithm on subset _, microstates, info = cluster( data[:, indices].T, method=method, n_clusters=n_microstates, random_state=random_state, **kwargs ) # Run segmentation on the whole dataset segmentation, polarity, gev, gev_all = _microstates_segment_runsegmentation( data, microstates, gfp, n_microstates=n_microstates ) # Reorder segmentation, microstates = microstates_classify(segmentation, microstates) # CLustering quality # quality = cluster_quality(data, segmentation, clusters=microstates, info=info, n_random=10, sd=gfp) # Output info = { "Microstates": microstates, "Sequence": segmentation, "GEV": gev, "GEV_per_microstate": gev_all, "GFP": gfp, "Polarity": polarity, "Info_algorithm": info, "Info": info_mne, } return info
# ============================================================================= # Utils # ============================================================================= def _microstates_segment_runsegmentation(data, microstates, gfp, n_microstates): # Find microstate corresponding to each datapoint activation = microstates.dot(data) segmentation = np.argmax(np.abs(activation), axis=0) polarity = np.sign(np.choose(segmentation, activation)) # Get Global Explained Variance (GEV) gev, gev_all = _cluster_quality_gev( data.T, microstates, segmentation, sd=gfp, n_clusters=n_microstates ) return segmentation, polarity, gev, gev_all