# Source code for quantecon.game_theory.lemke_howson

"""
Compute mixed Nash equilibria of a 2-player normal form game by the
Lemke-Howson algorithm.

"""
import numbers
import numpy as np
from numba import jit
from .utilities import NashResult
from ..optimize.pivoting import _pivoting, _lex_min_ratio_test

[docs]def lemke_howson(g, init_pivot=0, max_iter=10**6, capping=None,
full_output=False):
"""
Find one mixed-action Nash equilibrium of a 2-player normal form
game by the Lemke-Howson algorithm _, implemented with
"complementary pivoting" (see, e.g., von Stengel _ for details).

Parameters
----------
g : NormalFormGame
NormalFormGame instance with 2 players.

init_pivot : scalar(int), optional(default=0)
Initial pivot, an integer k such that 0 <= k < m+n, where
integers 0, ..., m-1 and m, ..., m+n-1 correspond to the actions
of players 0 and 1, respectively.

max_iter : scalar(int), optional(default=10**6)
Maximum number of pivoting steps.

capping : scalar(int), optional(default=None)
If supplied, the routine is executed with the heuristics
proposed by Codenotti et al. _; see Notes below for details.

full_output : bool, optional(default=False)
If False, only the computed Nash equilibrium is returned. If
True, the return value is (NE, res), where NE is the Nash
equilibrium and res is a NashResult object.

Returns
-------
NE : tuple(ndarray(float, ndim=1))
Tuple of computed Nash equilibrium mixed actions.

res : NashResult
Object containing information about the computation. Returned
only when full_output is True. See NashResult for details.

Examples
--------
Consider the following game from von Stengel _:

>>> np.set_printoptions(precision=4)  # Reduce the digits printed
>>> bimatrix = [[(3, 3), (3, 2)],
...             [(2, 2), (5, 6)],
...             [(0, 3), (6, 1)]]
>>> g = NormalFormGame(bimatrix)

Obtain a Nash equilibrium of this game by lemke_howson with player
0's action 1 (out of the three actions 0, 1, and 2) as the initial
pivot:

>>> lemke_howson(g, init_pivot=1)
(array([ 0.    ,  0.3333,  0.6667]), array([ 0.3333,  0.6667]))
>>> g.is_nash(_)
True

Additional information is returned if full_output is set True:

>>> NE, res = lemke_howson(g, init_pivot=1, full_output=True)
>>> res.converged  # Whether the routine has converged
True
>>> res.num_iter  # Number of pivoting steps performed
4

Notes
-----
* This routine is implemented with floating point arithmetic and
thus is subject to numerical instability.

* If capping is set to a positive integer, the routine is executed
with the heuristics proposed by _:

* For k = init_pivot, init_pivot + 1, ..., init_pivot +
(m+n-2), (modulo m+n), the Lemke-Howson algorithm is executed
with k as the initial pivot and capping as the maximum number
of pivoting steps. If the algorithm converges during this loop,
then the Nash equilibrium found is returned.

* Otherwise, the Lemke-Howson algorithm is executed with
init_pivot + (m+n-1) (modulo m+n) as the initial pivot, with a
limit max_iter on the total number of pivoting steps.

Accoding to the simulation results for *uniformly random games*,
for medium- to large-size games this heuristics outperforms the
basic Lemke-Howson algorithm with a fixed initial pivot, where
_ suggests that capping be set to 10.

References
----------
..  B. Codenotti, S. De Rossi, and M. Pagan, "An Experimental
Analysis of Lemke-Howson Algorithm," arXiv:0811.3247, 2008.

..  C. E. Lemke and J. T. Howson, "Equilibrium Points of Bimatrix
Games," Journal of the Society for Industrial and Applied
Mathematics (1964), 413-423.

..  B. von Stengel, "Equilibrium Computation for Two-Player Games
in Strategic and Extensive Form," Chapter 3, N. Nisan, T.
Roughgarden, E. Tardos, and V. Vazirani eds., Algorithmic Game
Theory, 2007.

"""
try:
N = g.N
except:
raise TypeError('g must be a 2-player NormalFormGame')
if N != 2:
raise NotImplementedError('Implemented only for 2-player games')

payoff_matrices = g.payoff_arrays
nums_actions = g.nums_actions
total_num = sum(nums_actions)

msg = 'init_pivot must be an integer k' + \
'such that 0 <= k < {0}'.format(total_num)

if not isinstance(init_pivot, numbers.Integral):
raise TypeError(msg)

if not (0 <= init_pivot < total_num):
raise ValueError(msg)

if capping is None:
capping = max_iter

tableaux = tuple(
np.empty((nums_actions[1-i], total_num+1)) for i in range(N)
)
bases = tuple(np.empty(nums_actions[1-i], dtype=int) for i in range(N))

converged, num_iter, init_pivot_used = \
_lemke_howson_capping(payoff_matrices, tableaux, bases, init_pivot,
max_iter, capping)
NE = _get_mixed_actions(tableaux, bases)

if not full_output:
return NE

res = NashResult(NE=NE,
converged=converged,
num_iter=num_iter,
max_iter=max_iter,
init=init_pivot_used)

return NE, res

@jit(nopython=True, cache=True)
def _lemke_howson_capping(payoff_matrices, tableaux, bases, init_pivot,
max_iter, capping):
"""
Execute the Lemke-Howson algorithm with the heuristics proposed by
Codenotti et al.

Parameters
----------
payoff_matrices : tuple(ndarray(ndim=2))
Tuple of two arrays representing payoff matrices, of shape
(m, n) and (n, m), respectively.

tableaux : tuple(ndarray(float, ndim=2))
Tuple of two arrays to be used to store the tableaux, of shape
(n, m+n+1) and (m, m+n+1), respectively. Modified in place.

bases : tuple(ndarray(int, ndim=1))
Tuple of two arrays to be used to store the bases, of shape (n,)
and (m,), respectively. Modified in place.

init_pivot : scalar(int)
Integer k such that 0 <= k < m+n.

max_iter : scalar(int)
Maximum number of pivoting steps.

capping : scalar(int)
Value for capping. If set equal to max_iter, then the routine
is equivalent to the standard Lemke-Howson algorithm.

"""
m, n = tableaux.shape, tableaux.shape
init_pivot_curr = init_pivot
max_iter_curr = max_iter
total_num_iter = 0

for k in range(m+n-1):
capping_curr = min(max_iter_curr, capping)

_initialize_tableaux(payoff_matrices, tableaux, bases)
converged, num_iter = \
_lemke_howson_tbl(tableaux, bases, init_pivot_curr, capping_curr)

total_num_iter += num_iter

if converged or total_num_iter >= max_iter:
return converged, total_num_iter, init_pivot_curr

init_pivot_curr += 1
if init_pivot_curr >= m + n:
init_pivot_curr -= m + n
max_iter_curr -= num_iter

_initialize_tableaux(payoff_matrices, tableaux, bases)
converged, num_iter = \
_lemke_howson_tbl(tableaux, bases, init_pivot_curr, max_iter_curr)
total_num_iter += num_iter

return converged, total_num_iter, init_pivot_curr

@jit(nopython=True, cache=True)
def _initialize_tableaux(payoff_matrices, tableaux, bases):
"""
Given a tuple of payoff matrices, initialize the tableau and basis
arrays in place.

For each player i, if payoff_matrices[i].min() is non-positive,
then stored in the tableau are payoff values incremented by
abs(payoff_matrices[i].min()) + 1 (to ensure for the tableau not
to have a negative entry or a column identically zero).

Suppose that the players 0 and 1 have m and n actions, respectively.

* tableaux has n rows and m+n+1 columns, where columns 0, ...,
m-1 and m, ..., m+n-1 correspond to the non-slack and slack
variables, respectively.

* tableaux has m rows and m+n+1 columns, where columns 0, ...,
m-1 and m, ..., m+n-1 correspond to the slack and non-slack
variables, respectively.

* In each tableaux[i], column m+n contains the values of the basic
variables (which are initially 1).

* bases and bases contain basic variable indices, which
are initially m, ..., m+n-1 and 0, ..., m-1, respectively.

Parameters
----------
payoff_matrices : tuple(ndarray(ndim=2))
Tuple of two arrays representing payoff matrices, of shape
(m, n) and (n, m), respectively.

tableaux : tuple(ndarray(float, ndim=2))
Tuple of two arrays to be used to store the tableaux, of shape
(n, m+n+1) and (m, m+n+1), respectively. Modified in place.

bases : tuple(ndarray(int, ndim=1))
Tuple of two arrays to be used to store the bases, of shape (n,)
and (m,), respectively. Modified in place.

Returns
-------
tableaux : tuple(ndarray(float, ndim=2))
View to tableaux.

bases : tuple(ndarray(int, ndim=1))
View to bases.

Examples
--------
>>> A = np.array([[3, 3], [2, 5], [0, 6]])
>>> B = np.array([[3, 2, 3], [2, 6, 1]])
>>> m, n = A.shape
>>> tableaux = (np.empty((n, m+n+1)), np.empty((m, m+n+1)))
>>> bases = (np.empty(n, dtype=int), np.empty(m, dtype=int))
>>> tableaux, bases = _initialize_tableaux((A, B), tableaux, bases)
>>> tableaux
array([[ 3.,  2.,  3.,  1.,  0.,  1.],
[ 2.,  6.,  1.,  0.,  1.,  1.]])
>>> tableaux
array([[ 1.,  0.,  0.,  4.,  4.,  1.],
[ 0.,  1.,  0.,  3.,  6.,  1.],
[ 0.,  0.,  1.,  1.,  7.,  1.]])
>>> bases
(array([3, 4]), array([0, 1, 2]))

"""
nums_actions = payoff_matrices.shape

consts = np.zeros(2)  # To be added to payoffs if min <= 0
for pl in range(2):
min_ = payoff_matrices[pl].min()
if min_ <= 0:
consts[pl] = min_ * (-1) + 1

for pl, (py_start, sl_start) in enumerate(zip((0, nums_actions),
(nums_actions, 0))):
for i in range(nums_actions[1-pl]):
for j in range(nums_actions[pl]):
tableaux[pl][i, py_start+j] = \
payoff_matrices[1-pl][i, j] + consts[1-pl]
for j in range(nums_actions[1-pl]):
if j == i:
tableaux[pl][i, sl_start+j] = 1
else:
tableaux[pl][i, sl_start+j] = 0
tableaux[pl][i, -1] = 1

for i in range(nums_actions[1-pl]):
bases[pl][i] = sl_start + i

return tableaux, bases

@jit(nopython=True, cache=True)
def _lemke_howson_tbl(tableaux, bases, init_pivot, max_iter):
"""
Main body of the Lemke-Howson algorithm implementation.

Perform the complementary pivoting. Modify tablaux and bases in
place.

Parameters
----------
tableaux : tuple(ndarray(float, ndim=2))
Tuple of two arrays containing the tableaux, of shape (n, m+n+1)
and (m, m+n+1), respectively. Modified in place.

bases : tuple(ndarray(int, ndim=1))
Tuple of two arrays containing the bases, of shape (n,) and
(m,), respectively. Modified in place.

init_pivot : scalar(int)
Integer k such that 0 <= k < m+n.

max_iter : scalar(int)
Maximum number of pivoting steps.

Returns
-------
converged : bool
Whether the pivoting terminated before max_iter was reached.

num_iter : scalar(int)
Number of pivoting steps performed.

Examples
--------
>>> np.set_printoptions(precision=4)  # Reduce the digits printed
>>> A = np.array([[3, 3], [2, 5], [0, 6]])
>>> B = np.array([[3, 2, 3], [2, 6, 1]])
>>> m, n = A.shape
>>> tableaux = (np.empty((n, m+n+1)), np.empty((m, m+n+1)))
>>> bases = (np.empty(n, dtype=int), np.empty(m, dtype=int))
>>> tableaux, bases = _initialize_tableaux((A, B), tableaux, bases)
>>> _lemke_howson_tbl(tableaux, bases, 1, 10)
(True, 4)
>>> tableaux
array([[ 0.875 ,  0.    ,  1.    ,  0.375 , -0.125 ,  0.25  ],
[ 0.1875,  1.    ,  0.    , -0.0625,  0.1875,  0.125 ]])
>>> tableaux
array([[ 1.    , -1.6   ,  0.8   ,  0.    ,  0.    ,  0.2   ],
[ 0.    ,  0.4667, -0.4   ,  1.    ,  0.    ,  0.0667],
[ 0.    , -0.0667,  0.2   ,  0.    ,  1.    ,  0.1333]])
>>> bases
(array([2, 1]), array([0, 3, 4]))

The outputs indicate that in the Nash equilibrium obtained, player
0's mixed action plays actions 2 and 1 with positive weights 0.25
and 0.125, while player 1's mixed action plays actions 0 and 1
(labeled as 3 and 4) with positive weights 0.0667 and 0.1333.

"""
init_player = 0
for k in bases:
if k == init_pivot:
init_player = 1
break
pls = [init_player, 1 - init_player]

pivot = init_pivot

m, n = tableaux.shape, tableaux.shape
slack_starts = (m, 0)

# Array to store row indices in lex_min_ratio_test
argmins = np.empty(max(m, n), dtype=np.int_)

converged = False
num_iter = 0

while True:
for pl in pls:
# Determine the leaving variable
_, row_min = _lex_min_ratio_test(tableaux[pl], pivot,
slack_starts[pl], argmins)

# Pivoting step: modify tableau in place
_pivoting(tableaux[pl], pivot, row_min)

# Update the basic variables and the pivot
bases[pl][row_min], pivot = pivot, bases[pl][row_min]

num_iter += 1

if pivot == init_pivot:
converged = True
break
if num_iter >= max_iter:
break
else:
continue
break

return converged, num_iter

@jit(nopython=True, cache=True)
def _get_mixed_actions(tableaux, bases):
"""
From tableaux and bases, extract non-slack basic variables and
return a tuple of the corresponding, normalized mixed actions.

Parameters
----------
tableaux : tuple(ndarray(float, ndim=2))
Tuple of two arrays containing the tableaux, of shape (n, m+n+1)
and (m, m+n+1), respectively.

bases : tuple(ndarray(int, ndim=1))
Tuple of two arrays containing the bases, of shape (n,) and
(m,), respectively.

Returns
-------
tuple(ndarray(float, ndim=1))
Tuple of mixed actions as given by the non-slack basic variables
in the tableaux.

"""
nums_actions = tableaux.shape, tableaux.shape
num = nums_actions + nums_actions
out = np.zeros(num)

for pl, (start, stop) in enumerate(zip((0, nums_actions),
(nums_actions, num))):
sum_ = 0.
for i in range(nums_actions[1-pl]):
k = bases[pl][i]
if start <= k < stop:
out[k] = tableaux[pl][i, -1]
sum_ += tableaux[pl][i, -1]
if sum_ != 0:
out[start:stop] /= sum_

return out[:nums_actions], out[nums_actions:]