# Source code for quantecon.markov.estimate

import numpy as np
from numba import njit
from .core import MarkovChain
from .._gridtools import cartesian_nearest_index, cartesian

[docs]def estimate_mc(X):
r"""
Estimate the Markov chain associated with a time series :math:X =
(X_0, \ldots, X_{T-1}) assuming that the state space is the finite
set :math:\{X_0, \ldots, X_{T-1}\} (duplicates removed). The
estimation is by maximum likelihood. The estimated transition
probabilities are given by the matrix :math:P such that
:math:P[i, j] = N_{ij} / N_i, where :math:N_{ij} =
\sum_{t=0}^{T-1} 1_{\{X_t=s_i, X_{t+1}=s_j\}}, the number of
transitions from state :math:s_i to state :math:s_j, while
:math:N_i is the total number of visits to :math:s_i. The result
is returned as a MarkovChain instance.

Parameters
----------
X : array_like
A time series of state values, from which the transition matrix
will be estimated, where X[t] contains the t-th observation.

Returns
-------
mc : MarkovChain
A MarkovChain instance where mc.P is a stochastic matrix
estimated from the data X and mc.state_values is an array of
values that appear in X (sorted in ascending order).

"""
X = np.asarray(X)
axis = 0 if X.ndim > 1 else None
state_values, indices = np.unique(X, return_inverse=True, axis=axis)

n = len(state_values)
P = np.zeros((n, n))  # dtype=float to modify in place upon normalization
P = _count_transition_frequencies(indices, P)
P /= P.sum(1)[:, np.newaxis]

mc = MarkovChain(P, state_values=state_values)
return mc

@njit(cache=True)
def _count_transition_frequencies(index_series, trans_counter):
T = len(index_series)
i = index_series[0]
for t in range(1, T):
j = index_series[t]
trans_counter[i, j] += 1
i = j
return trans_counter

[docs]def fit_discrete_mc(X, grids, order='C'):
r"""
Function that takes an arbitrary time series :math: (X_t)_{t=0}^{T-1} in
:math: \mathbb R^n plus a set of grid points in each dimension and converts
it to a MarkovChain by first applying discretization onto the grid
and then estimation of the Markov chain.

Parameters
----------

X: array_like(ndim=2)
Time-series such that the t-th row is :math:x_t.
It should be of the shape T x n, where n is the number of dimensions.

grids: array_like(array_like(ndim=1))
Array of n sorted arrays. Set of grid points in each dimension

Examples
--------

>>> grids = (np.arange(3), np.arange(2))
>>> X = [(-0.1, 1.2), (2, 0), (0.6, 0.4), (1.0, 0.1)]
>>> mc = fit_discrete_mc(X, grids)
>>> mc.state_values
array([[0, 1],
[1, 0],
[2, 0]])
>>> mc.P
array([[0., 0., 1.],
[0., 1., 0.],
[0., 1., 0.]])

Returns
-------

mc: MarkovChain
An instance of the MarkovChain class constructed after discretization
onto the grid.
"""
X_indices = cartesian_nearest_index(X, grids, order=order)
mc = estimate_mc(X_indices)
# Assign the visited states in the cartesian product as the state values
prod = cartesian(grids, order=order)
mc.state_values = prod[mc.state_values]
return mc