"""
Compute mixed Nash equilibria of an N-player normal form game by
applying the imitation game algorithm by McLennan and Tourky to the best
response correspondence.
"""
import numbers
import numpy as np
from .._compute_fp import _compute_fixed_point_ig
from .normal_form_game import pure2mixed
from .utilities import NashResult
[docs]def mclennan_tourky(g, init=None, epsilon=1e-3, max_iter=200,
full_output=False):
r"""
Find one mixed-action epsilon-Nash equilibrium of an N-player normal
form game by the fixed point computation algorithm by McLennan and
Tourky [1]_.
Parameters
----------
g : NormalFormGame
NormalFormGame instance.
init : array_like(int or array_like(float, ndim=1)), optional
Initial action profile, an array of N objects, where each object
must be an iteger (pure action) or an array of floats (mixed
action). If None, default to an array of zeros (the zero-th
action for each player).
epsilon : scalar(float), optional(default=1e-3)
Value of epsilon-optimality.
max_iter : scalar(int), optional(default=100)
Maximum number of iterations.
full_output : bool, optional(default=False)
If False, only the computed Nash equilibrium is returned. If
True, the return value is `(NE, res)`, where `NE` is the Nash
equilibrium and `res` is a `NashResult` object.
Returns
-------
NE : tuple(ndarray(float, ndim=1))
Tuple of computed Nash equilibrium mixed actions.
res : NashResult
Object containing information about the computation. Returned
only when `full_output` is True. See `NashResult` for details.
Examples
--------
Consider the following version of 3-player "anti-coordination" game,
where action 0 is a safe action which yields payoff 1, while action
1 yields payoff :math:`v` if no other player plays 1 and payoff 0
otherwise:
>>> N = 3
>>> v = 2
>>> payoff_array = np.empty((2,)*n)
>>> payoff_array[0, :] = 1
>>> payoff_array[1, :] = 0
>>> payoff_array[1].flat[0] = v
>>> g = NormalFormGame((Player(payoff_array),)*N)
>>> print(g)
3-player NormalFormGame with payoff profile array:
[[[[ 1., 1., 1.], [ 1., 1., 2.]],
[[ 1., 2., 1.], [ 1., 0., 0.]]],
[[[ 2., 1., 1.], [ 0., 1., 0.]],
[[ 0., 0., 1.], [ 0., 0., 0.]]]]
This game has a unique symmetric Nash equilibrium, where the
equilibrium action is given by :math:`(p^*, 1-p^*)` with :math:`p^*
= 1/v^{1/(N-1)}`:
>>> p_star = 1/(v**(1/(N-1)))
>>> [p_star, 1 - p_star]
[0.7071067811865475, 0.29289321881345254]
Obtain an approximate Nash equilibrium of this game by
`mclennan_tourky`:
>>> epsilon = 1e-5 # Value of epsilon-optimality
>>> NE = mclennan_tourky(g, epsilon=epsilon)
>>> print(NE[0], NE[1], NE[2], sep='\n')
[ 0.70710754 0.29289246]
[ 0.70710754 0.29289246]
[ 0.70710754 0.29289246]
>>> g.is_nash(NE, tol=epsilon)
True
Additional information is returned if `full_output` is set True:
>>> NE, res = mclennan_tourky(g, epsilon=epsilon, full_output=True)
>>> res.converged
True
>>> res.num_iter
18
References
----------
.. [1] A. McLennan and R. Tourky, "From Imitation Games to
Kakutani," 2006.
"""
try:
N = g.N
except AttributeError:
raise TypeError('g must be a NormalFormGame')
if N < 2:
raise NotImplementedError('Not implemented for 1-player games')
if init is None:
init = (0,) * N
try:
l = len(init)
except TypeError:
raise TypeError('init must be array_like')
if l != N:
raise ValueError(
'init must be of length {N}'.format(N=N)
)
indptr = np.empty(N+1, dtype=int)
indptr[0] = 0
indptr[1:] = np.cumsum(g.nums_actions)
x_init = _flatten_action_profile(init, indptr)
is_approx_fp = lambda x: _is_epsilon_nash(x, g, epsilon, indptr)
x_star, converged, num_iter = \
_compute_fixed_point_ig(_best_response_selection, x_init, max_iter,
verbose=0, print_skip=1,
is_approx_fp=is_approx_fp,
g=g, indptr=indptr)
NE = _get_action_profile(x_star, indptr)
if not full_output:
return NE
res = NashResult(NE=NE,
converged=converged,
num_iter=num_iter,
max_iter=max_iter,
init=init,
epsilon=epsilon)
return NE, res
def _best_response_selection(x, g, indptr=None):
"""
Selection of the best response correspondence of `g` that selects
the best response action with the smallest index when there are
ties, where the input and output are flattened action profiles.
Parameters
----------
x : array_like(float, ndim=1)
Array of flattened mixed action profile of length equal to n_0 +
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
i's mixed action.
g : NormalFormGame
indptr : array_like(int, ndim=1), optional(default=None)
Array of index pointers of length N+1, where `indptr[0] = 0` and
`indptr[i+1] = indptr[i] + n_i`. Created internally if None.
Returns
-------
out : ndarray(float, ndim=1)
Array of flattened mixed action profile of length equal to n_0 +
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
i's mixed action representation of his pure best response.
"""
N = g.N
if indptr is None:
indptr = np.empty(N+1, dtype=int)
indptr[0] = 0
indptr[1:] = np.cumsum(g.nums_actions)
out = np.zeros(indptr[-1])
if N == 2:
for i in range(N):
opponent_action = x[indptr[1-i]:indptr[1-i+1]]
pure_br = g.players[i].best_response(opponent_action)
out[indptr[i]+pure_br] = 1
else:
for i in range(N):
opponent_actions = tuple(
x[indptr[(i+j)%N]:indptr[(i+j)%N+1]] for j in range(1, N)
)
pure_br = g.players[i].best_response(opponent_actions)
out[indptr[i]+pure_br] = 1
return out
def _is_epsilon_nash(x, g, epsilon, indptr=None):
"""
Determine whether `x` is an `epsilon`-Nash equilibrium of `g`.
Parameters
----------
x : array_like(float, ndim=1)
Array of flattened mixed action profile of length equal to n_0 +
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
i's mixed action.
g : NormalFormGame
epsilon : scalar(float)
indptr : array_like(int, ndim=1), optional(default=None)
Array of index pointers of length N+1, where `indptr[0] = 0` and
`indptr[i+1] = indptr[i] + n_i`. Created internally if None.
Returns
-------
bool
"""
if indptr is None:
indptr = np.empty(g.N+1, dtype=int)
indptr[0] = 0
indptr[1:] = np.cumsum(g.nums_actions)
action_profile = _get_action_profile(x, indptr)
return g.is_nash(action_profile, tol=epsilon)
def _get_action_profile(x, indptr):
"""
Obtain a tuple of mixed actions from a flattened action profile.
Parameters
----------
x : array_like(float, ndim=1)
Array of flattened mixed action profile of length equal to n_0 +
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
i's mixed action.
indptr : array_like(int, ndim=1)
Array of index pointers of length N+1, where `indptr[0] = 0` and
`indptr[i+1] = indptr[i] + n_i`.
Returns
-------
action_profile : tuple(ndarray(float, ndim=1))
Tuple of N mixed actions, each of length n_i.
"""
N = len(indptr) - 1
action_profile = tuple(x[indptr[i]:indptr[i+1]] for i in range(N))
return action_profile
def _flatten_action_profile(action_profile, indptr):
"""
Flatten the given action profile.
Parameters
----------
action_profile : array_like(int or array_like(float, ndim=1))
Profile of actions of the N players, where each player i' action
is a pure action (int) or a mixed action (array_like of floats
of length n_i).
indptr : array_like(int, ndim=1)
Array of index pointers of length N+1, where `indptr[0] = 0` and
`indptr[i+1] = indptr[i] + n_i`.
Returns
-------
out : ndarray(float, ndim=1)
Array of flattened mixed action profile of length equal to n_0 +
... + n_N-1, where `out[indptr[i]:indptr[i+1]]` contains player
i's mixed action.
"""
N = len(indptr) - 1
out = np.empty(indptr[-1])
for i in range(N):
if isinstance(action_profile[i], numbers.Integral): # pure action
num_actions = indptr[i+1] - indptr[i]
mixed_action = pure2mixed(num_actions, action_profile[i])
else: # mixed action
mixed_action = action_profile[i]
out[indptr[i]:indptr[i+1]] = mixed_action
return out