# -*- coding: utf-8 -*-
import numpy as np
from ..misc import check_random_state
from ..stats import cluster
from ..stats.cluster_quality import _cluster_quality_gev
from .microstates_classify import microstates_classify
from .microstates_clean import microstates_clean
[docs]
def microstates_segment(
eeg,
n_microstates=4,
train="gfp",
method="kmod",
gfp_method="l1",
sampling_rate=None,
standardize_eeg=False,
n_runs=10,
max_iterations=1000,
criterion="gev",
random_state=None,
optimize=False,
**kwargs
):
"""**Segment M/EEG signal into Microstates**
This functions identifies and extracts the microstates from an M/EEG signal using different
clustering algorithms. Several runs of the clustering algorithm are performed, using different
random initializations.The run that resulted in the best segmentation, as measured by global
explained variance (GEV), is used.
* **kmod**: Modified k-means algorithm. It differs from a traditional k-means in that it is
*polarity-invariant*, which means that samples with EEG potential distribution maps that are
similar but have opposite polarity will be assigned the *same* microstate class.
* **kmeans**: Normal k-means.
* **kmedoids**: k-medoids clustering, a more stable version of k-means.
* **pca**: Principal Component Analysis.
* **ica**: Independent Component Analysis.
* **aahc**: Atomize and Agglomerate Hierarchical Clustering. Computationally heavy.
The microstates clustering is typically fitted on the EEG data at the global field power (GFP)
peaks to maximize the signal to noise ratio and focus on moments of high global neuronal
synchronization. It is assumed that the topography around a GFP peak remains stable and is at
its highest signal-to-noise ratio at the GFP peak.
Parameters
----------
eeg : np.ndarray
An array (channels, times) of M/EEG data or a Raw or Epochs object from MNE.
n_microstates : int
The number of unique microstates to find. Defaults to 4.
train : Union[str, int, float]
Method for selecting the timepoints how which to train the clustering algorithm. Can be
``"gfp"`` to use the peaks found in the Peaks in the global field power. Can be ``"all"``,
in which case it will select all the datapoints. It can also be a number or a ratio, in
which case it will select the corresponding number of evenly spread data points. For
instance, ``train=10`` will select 10 equally spaced datapoints, whereas ``train=0.5`` will
select half the data. See :func:`.microstates_peaks`.
method : str
The algorithm for clustering. Can be one of ``"kmod"`` (default), ``"kmeans"``,
``"kmedoids"``, ``"pca"``, ``"ica"``, or ``"aahc"``.
gfp_method : str
The GFP extraction method, can be either ``"l1"`` (default) or ``"l2"`` to use the L1 or L2
norm. See :func:`nk.eeg_gfp` for more details.
sampling_rate : int
The sampling frequency of the signal (in Hz, i.e., samples/second).
standardize_eeg : bool
Standardized (z-score) the data across time prior to GFP extraction
using :func:`.nk.standardize`.
n_runs : int
The number of random initializations to use for the k-means algorithm. The best fitting
segmentation across all initializations is used. Defaults to 10.
max_iterations : int
The maximum number of iterations to perform in the k-means algorithm.
Defaults to 1000.
criterion : str
Which criterion to use to choose the best run for modified k-means algorithm,
can be ``"gev"`` (default) which selects the best run based on the highest global explained
variance, or ``"cv"`` which selects the best run based on the lowest cross-validation
criterion. See :func:`.nk.microstates_gev` and :func:`.nk.microstates_crossvalidation` for
more details respectively.
random_state : Union[int, numpy.random.RandomState]
The seed or ``RandomState`` for the random number generator. Defaults to ``None``, in which
case a different seed is chosen each time this function is called.
optimize : bool
Optimized method in Poulsen et al. (2018) for the *k*-means modified method.
Returns
-------
dict
Contains information about the segmented microstates:
* **Microstates**: The topographic maps of the found unique microstates which has a shape of
n_channels x n_states
* **Sequence**: For each sample, the index of the microstate to which the sample has been
assigned.
* **GEV**: The global explained variance of the microstates.
* **GFP**: The global field power of the data.
* **Cross-Validation Criterion**: The cross-validation value of the iteration.
* **Explained Variance**: The explained variance of each cluster map generated by PCA.
* **Total Explained Variance**: The total explained variance of the cluster maps generated
by PCA.
Examples
---------
* **Example 1**: k-means Algorithm
.. ipython:: python
import neurokit2 as nk
# Download data
eeg = nk.mne_data("filt-0-40_raw")
# Average rereference and band-pass filtering
eeg = nk.eeg_rereference(eeg, 'average').filter(1, 30, verbose=False)
# Cluster microstates
microstates = nk.microstates_segment(eeg, method="kmeans")
@savefig p_microstate_segment1.png scale=100%
nk.microstates_plot(microstates , epoch=(500, 750))
@suppress
plt.close()
# Modified kmeans (currently comment out due to memory error)
#out_kmod = nk.microstates_segment(eeg, method='kmod')
# nk.microstates_plot(out_kmod, gfp=out_kmod["GFP"][0:500])
# K-medoids (currently comment out due to memory error)
#out_kmedoids = nk.microstates_segment(eeg, method='kmedoids')
#nk.microstates_plot(out_kmedoids, gfp=out_kmedoids["GFP"][0:500])
* **Example with PCA**
.. ipython:: python
out_pca = nk.microstates_segment(eeg, method='pca', standardize_eeg=True)
@savefig p_microstate_segment2.png scale=100%
nk.microstates_plot(out_pca, gfp=out_pca["GFP"][0:500])
@suppress
plt.close()
* **Example with ICA**
.. ipython:: python
out_ica = nk.microstates_segment(eeg, method='ica', standardize_eeg=True)
@savefig p_microstate_segment3.png scale=100%
nk.microstates_plot(out_ica, gfp=out_ica["GFP"][0:500])
@suppress
plt.close()
* **Example with AAHC**
.. ipython:: python
out_aahc = nk.microstates_segment(eeg, method='aahc')
@savefig p_microstate_segment4.png scale=100%
nk.microstates_plot(out_aahc, gfp=out_aahc["GFP"][0:500])
@suppress
plt.close()
See Also
--------
eeg_gfp, microstates_peaks, microstates_gev, microstates_crossvalidation, microstates_classify
References
----------
* Poulsen, A. T., Pedroni, A., Langer, N., & Hansen, L. K. (2018). Microstate EEGlab toolbox:
an introductory guide. BioRxiv, (289850).
* Pascual-Marqui, R. D., Michel, C. M., & Lehmann, D. (1995). Segmentation of brain
electrical activity into microstates: model estimation and validation. IEEE Transactions
on Biomedical Engineering.
"""
# Sanitize input
data, indices, gfp, info_mne = microstates_clean(
eeg,
train=train,
sampling_rate=sampling_rate,
standardize_eeg=standardize_eeg,
gfp_method=gfp_method,
**kwargs
)
# Run clustering algorithm
if method in ["kmods", "kmod", "kmeans modified", "modified kmeans"]:
# Seed the random generator for reproducible results
rng = check_random_state(random_state)
# Generate one random integer for each run
random_state = rng.choice(n_runs * 1000, n_runs, replace=False)
# Initialize values
gev = 0
cv = np.inf
microstates = None
segmentation = None
polarity = None
info = None
# Do several runs of the k-means algorithm, keep track of the best segmentation.
for run in range(n_runs):
# Run clustering on subset of data
_, _, current_info = cluster(
data[:, indices].T,
method="kmod",
n_clusters=n_microstates,
random_state=random_state[run],
max_iterations=max_iterations,
threshold=1e-6,
optimize=optimize,
)
current_microstates = current_info["clusters_normalized"]
current_residual = current_info["residual"]
# Run segmentation on the whole dataset
s, p, g, g_all = _microstates_segment_runsegmentation(
data, current_microstates, gfp, n_microstates=n_microstates
)
if criterion == "gev":
# If better (i.e., higher GEV), keep this segmentation
if g > gev:
microstates, segmentation, polarity, gev = (
current_microstates,
s,
p,
g,
)
gev_all = g_all
info = current_info
elif criterion == "cv":
# If better (i.e., lower CV), keep this segmentation
# R2 and residual are proportional, use residual instead of R2
if current_residual < cv:
microstates, segmentation, polarity = current_microstates, s, p
cv, gev, gev_all = current_residual, g, g_all
info -= current_info
else:
# Run clustering algorithm on subset
_, microstates, info = cluster(
data[:, indices].T, method=method, n_clusters=n_microstates, random_state=random_state, **kwargs
)
# Run segmentation on the whole dataset
segmentation, polarity, gev, gev_all = _microstates_segment_runsegmentation(
data, microstates, gfp, n_microstates=n_microstates
)
# Reorder
segmentation, microstates = microstates_classify(segmentation, microstates)
# CLustering quality
# quality = cluster_quality(data, segmentation, clusters=microstates, info=info, n_random=10, sd=gfp)
# Output
info = {
"Microstates": microstates,
"Sequence": segmentation,
"GEV": gev,
"GEV_per_microstate": gev_all,
"GFP": gfp,
"Polarity": polarity,
"Info_algorithm": info,
"Info": info_mne,
}
return info
# =============================================================================
# Utils
# =============================================================================
def _microstates_segment_runsegmentation(data, microstates, gfp, n_microstates):
# Find microstate corresponding to each datapoint
activation = microstates.dot(data)
segmentation = np.argmax(np.abs(activation), axis=0)
polarity = np.sign(np.choose(segmentation, activation))
# Get Global Explained Variance (GEV)
gev, gev_all = _cluster_quality_gev(
data.T, microstates, segmentation, sd=gfp, n_clusters=n_microstates
)
return segmentation, polarity, gev, gev_all