Source code for neurokit2.markov.transition_matrix

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..misc import as_vector


[docs] def transition_matrix(sequence, order=1, adjust=True, show=False): """**Transition Matrix** A **Transition Matrix** (also known as a stochastic matrix or a **Markov matrix**) is a convenient way of representing and describing a sequence of (discrete) states, also known as **discrete Markov chains**. Each of its entries is a probability of transitioning from one state to the other. .. note:: This function is fairly new and hasn't be tested extensively. Please help us by double-checking the code and letting us know if everything is correct. Parameters ---------- sequence : Union[list, np.array, pd.Series] A list of discrete states. order : int The order of the Markov chain. adjust : bool If ``True``, the transition matrix will be adjusted to ensure that the sum of each row is equal to 1. This is useful when the transition matrix is used to represent a probability distribution. show : bool Displays the transition matrix heatmap. See Also -------- markov_simulate, markov_test_random, markov_test_symmetry Returns ------- pd.DataFrame The empirical (observed) transition matrix. dict A dictionnary containing additional information, such as the Frequency Matrix (**fm**; accessible via the key ``"Occurrences"``), useful for some tests. Examples -------- .. ipython:: python import neurokit2 as nk sequence = ["A", "A", "C", "B", "B", "B", "C", "A", "A", "D"] @savefig p_transition_matrix1.png scale=100% tm, _ = nk.transition_matrix(sequence, show=True) @suppress plt.close() .. ipython:: python tm In this example, the transition from D is unknown (it is the last element), resulting in an absence of transitioning probability. As this can cause issues, unknown probabilities are replaced by a uniform distribution, but this can be turned off using the ``adjust`` argument. .. ipython:: python tm, _ = nk.transition_matrix(sequence, adjust=False) tm Transition matrix of higher order .. ipython:: python sequence = ["A", "A", "A", "B", "A", "A", "B", "A", "A", "B"] tm, _ = nk.transition_matrix(sequence, order=2) tm """ sequence = as_vector(sequence) # Observed transition matrix states = np.unique(sequence) n_states = len(states) # Get observed transition matrix freqs = np.zeros((n_states,) * (order + 1)) for idx in zip(*[sequence[i:] for i in range(order + 1)]): idx = tuple([np.argwhere(states == k)[0][0] for k in idx]) freqs[idx] += 1 freqs # Find rows containing zeros (unknown transition) idx = freqs.sum(axis=-1) == 0 # Fillit with uniform probability to avoid problem in division freqs[idx, :] = 1 # Convert to probabilities tm = (freqs.T / freqs.sum(axis=-1)).T # If no adjustment, revert to 0 freqs[idx, :] = 0 if adjust is False: tm[idx, :] = 0 # Convert to DataFrame if order == 1: tm = pd.DataFrame(tm, index=states, columns=states) freqs = pd.DataFrame(freqs, index=states, columns=states) if show is True: if order > 1: raise ValueError( "Visualization of order > 1 not supported yet. " "Consider helping us to implement it!" ) fig, ax = plt.subplots() ax.imshow(tm, cmap="Reds", interpolation="nearest") ax.set_xticks(np.arange(len(tm))) ax.set_yticks(np.arange(len(tm))) ax.set_xticklabels(tm.columns) ax.set_yticklabels(tm.index) # Loop over data dimensions and create text annotations. for i, row in enumerate(tm.index): for j, col in enumerate(tm.columns): if tm.loc[row, col] > 0.5: color = "white" else: color = "black" ax.text(j, i, f"{tm.loc[row, col]:.2f}", ha="center", va="center", color=color) ax.set_title("Transition Matrix") fig.tight_layout() return tm, {"Occurrences": freqs, "States": states}
# ============================================================================= # Utils # ============================================================================= def _sanitize_tm_input(tm, probs=True): # If symmetric dataframe, then surely a transition matrix if isinstance(tm, pd.DataFrame) and tm.shape[1] == tm.shape[0]: if tm.values.max() > 1: if probs is True: raise ValueError( "Transition matrix must be a probability matrix (all probabilities must be" " < 1)." ) else: return tm else: if probs is True: return tm else: raise ValueError( "Transition matrix must be a frequency matrix containing counts and not" " probabilities. Please pass the `info['Occurrences']` object instead of" " the transition matrix." ) # Otherwise, conver to TM else: return transition_matrix(tm) # def transition_matrix_plot(tm): # """Graph of Transition Matrix # Abandonned for now because networkx gives ugly results. Please do help! # """ # try: # import networkx as nx # except ImportError: # raise ImportError( # "NeuroKit error: transition_matrix_plot(): the 'networkx' module is required for this ", # "function to run. Please install it first (`pip install networkx`).", # ) # # create graph object # G = nx.MultiDiGraph(tm) # edge_labels = {} # for col in tm.columns: # for row in tm.index: # G.add_edge(row, col, weight=tm.loc[row, col]) # edge_labels[(row, col)] = label = "{:.02f}".format(tm.loc[row, col]) # pos = nx.circular_layout(G) # nx.draw_networkx_edges(G, pos, width=2.0, alpha=0.5) # nx.draw_networkx_edge_labels(G, pos, edge_labels) # nx.draw_networkx(G, pos)