Source code for neurokit2.markov.transition_matrix

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..misc import as_vector



[docs]
def transition_matrix(sequence, order=1, adjust=True, show=False):
    """**Transition Matrix**

    A **Transition Matrix** (also known as a stochastic matrix or a **Markov matrix**) is a
    convenient way of representing and describing a sequence of (discrete) states, also known as
    **discrete Markov chains**. Each of its entries is a probability of transitioning from one
    state to the other.

    .. note::

        This function is fairly new and hasn't be tested extensively. Please help us by
        double-checking the code and letting us know if everything is correct.

    Parameters
    ----------
    sequence : Union[list, np.array, pd.Series]
        A list of discrete states.
    order : int
        The order of the Markov chain.
    adjust : bool
        If ``True``, the transition matrix will be adjusted to ensure that the sum of each row is
        equal to 1. This is useful when the transition matrix is used to represent a probability
        distribution.
    show : bool
        Displays the transition matrix heatmap.

    See Also
    --------
    markov_simulate, markov_test_random, markov_test_symmetry

    Returns
    -------
    pd.DataFrame
        The empirical (observed) transition matrix.
    dict
        A dictionnary containing additional information, such as the Frequency Matrix (**fm**;
        accessible via the key ``"Occurrences"``), useful for some tests.

    Examples
    --------
    .. ipython:: python

      import neurokit2 as nk

      sequence = ["A", "A", "C", "B", "B", "B", "C", "A", "A", "D"]

      @savefig p_transition_matrix1.png scale=100%
      tm, _ = nk.transition_matrix(sequence, show=True)
      @suppress
      plt.close()

    .. ipython:: python

      tm

    In this example, the transition from D is unknown (it is the last element), resulting in an
    absence of transitioning probability. As this can cause issues, unknown probabilities are
    replaced by a uniform distribution, but this can be turned off using the ``adjust`` argument.

    .. ipython:: python

      tm, _ = nk.transition_matrix(sequence, adjust=False)
      tm

    Transition matrix of higher order

    .. ipython:: python

      sequence = ["A", "A", "A", "B", "A", "A", "B", "A", "A", "B"]
      tm, _ = nk.transition_matrix(sequence, order=2)
      tm

    """
    sequence = as_vector(sequence)

    # Observed transition matrix
    states = np.unique(sequence)
    n_states = len(states)

    # Get observed transition matrix
    freqs = np.zeros((n_states,) * (order + 1))
    for idx in zip(*[sequence[i:] for i in range(order + 1)]):
        idx = tuple([np.argwhere(states == k)[0][0] for k in idx])
        freqs[idx] += 1
    freqs

    # Find rows containing zeros (unknown transition)
    idx = freqs.sum(axis=-1) == 0

    # Fillit with uniform probability to avoid problem in division
    freqs[idx, :] = 1

    # Convert to probabilities
    tm = (freqs.T / freqs.sum(axis=-1)).T

    # If no adjustment, revert to 0
    freqs[idx, :] = 0
    if adjust is False:
        tm[idx, :] = 0

    # Convert to DataFrame
    if order == 1:
        tm = pd.DataFrame(tm, index=states, columns=states)
        freqs = pd.DataFrame(freqs, index=states, columns=states)

    if show is True:
        if order > 1:
            raise ValueError(
                "Visualization of order > 1 not supported yet. "
                "Consider helping us to implement it!"
            )
        fig, ax = plt.subplots()
        ax.imshow(tm, cmap="Reds", interpolation="nearest")
        ax.set_xticks(np.arange(len(tm)))
        ax.set_yticks(np.arange(len(tm)))
        ax.set_xticklabels(tm.columns)
        ax.set_yticklabels(tm.index)

        # Loop over data dimensions and create text annotations.
        for i, row in enumerate(tm.index):
            for j, col in enumerate(tm.columns):
                if tm.loc[row, col] > 0.5:
                    color = "white"
                else:
                    color = "black"
                ax.text(j, i, f"{tm.loc[row, col]:.2f}", ha="center", va="center", color=color)
        ax.set_title("Transition Matrix")
        fig.tight_layout()

    return tm, {"Occurrences": freqs, "States": states}



# =============================================================================
# Utils
# =============================================================================


def _sanitize_tm_input(tm, probs=True):
    # If symmetric dataframe, then surely a transition matrix
    if isinstance(tm, pd.DataFrame) and tm.shape[1] == tm.shape[0]:
        if tm.values.max() > 1:
            if probs is True:
                raise ValueError(
                    "Transition matrix must be a probability matrix (all probabilities must be"
                    " < 1)."
                )
            else:
                return tm
        else:
            if probs is True:
                return tm
            else:
                raise ValueError(
                    "Transition matrix must be a frequency matrix containing counts and not"
                    " probabilities. Please pass the `info['Occurrences']` object instead of"
                    " the transition matrix."
                )

    # Otherwise, conver to TM
    else:
        return transition_matrix(tm)


# def transition_matrix_plot(tm):
#     """Graph of Transition Matrix

#     Abandonned for now because networkx gives ugly results. Please do help!
#     """
#     try:
#         import networkx as nx
#     except ImportError:
#         raise ImportError(
#             "NeuroKit error: transition_matrix_plot(): the 'networkx' module is required for this ",
#             "function to run. Please install it first (`pip install networkx`).",
#         )

#     # create graph object
#     G = nx.MultiDiGraph(tm)

#     edge_labels = {}
#     for col in tm.columns:
#         for row in tm.index:
#             G.add_edge(row, col, weight=tm.loc[row, col])
#             edge_labels[(row, col)] = label = "{:.02f}".format(tm.loc[row, col])

#     pos = nx.circular_layout(G)
#     nx.draw_networkx_edges(G, pos, width=2.0, alpha=0.5)
#     nx.draw_networkx_edge_labels(G, pos, edge_labels)
#     nx.draw_networkx(G, pos)