Source code for quantecon.game_theory.howson_lcp

"""
Compute a Nash Equilibrium of a Polymatrix Game.

Examples
--------

Find a Nash Equilibrium of a Matching Pennies Game.
(This NE is unique so this works reliably.)

>>> matrices = {
...     (0, 1): [[1., -1.], [-1., 1.]],
...     (1, 0): [[-1., 1.], [1., -1.]]
... }
>>> polymg = PolymatrixGame(matrices)
>>> result = polym_lcp_solver(polymg, full_output=True)
>>> print(result[0])
(array([0.5, 0.5]), array([0.5, 0.5]))
>>> print(result[1])
        NE: (array([0.5, 0.5]), array([0.5, 0.5]))
 converged: True
      init: {0: 0, 1: 0}
  max_iter: -1
  num_iter: 4

"""

import numpy as np
from .polymatrix_game import PolymatrixGame
from ..optimize.pivoting import _pivoting, _lex_min_ratio_test
from ..optimize.lcp_lemke import _get_solution
from .utilities import NashResult
from typing import Sequence, Union, Optional
from numpy.typing import NDArray


[docs]def polym_lcp_solver(
        polymg: PolymatrixGame,
        starting_player_actions: Optional[Sequence[int]] = None,
        max_iter: int = -1,
        full_output: bool = False,
) -> Union[tuple[NDArray, ...], tuple[tuple[NDArray, ...], NashResult]]:
    """
    Finds a Nash Equilbrium of a Polymatrix Game.

    Uses Howson's algorithm which utilises
    linear complementarity [1]_.

    Parameters
    ----------
    polymg : PolymatrixGame
        Polymatrix game to solve.

    starting_player_actions : Sequence[int], optional
        Pure actions for each player at which the algorithm begins.
        Defaults to each player playing their first action.

    max_iter : int, optional
        Maximum number of iterations of the complementary
        pivoting before giving up. Howson proves that with enough
        iterations, it will reach a Nash Equilibrium.
        Defaults to never giving up.

    full_output : bool, optional
        When True, adds information about the run to the output
        actions and puts them in a NashResult. Defaults to False.

    Returns
    -------
    NE : tuple(ndarray(float, ndim=1))
        Tuple of computed Nash equilibrium mixed actions. A Nash
        Equilibrium if not stopped early by reaching `max_iter`.

    res : NashResult
        Object containing information about the computation: the number
        of iterations, whether it has converged, and the initial
        conditions of the algorithm. Returned only when `full_output`
        is True. See `NashResult` for details.

    References
    ----------
    .. [1] Howson, Joseph T. “Equilibria of Polymatrix Games.”
       Management Science 18, no. 5 (1972): 312–18.
       http://www.jstor.org/stable/2634798.

    """
    LOW_AVOIDER = 2.0
    # makes all of the costs greater than 0
    positive_cost_maker = polymg.range_of_payoffs()[1] + LOW_AVOIDER

    eye_N = np.eye(polymg.N)

    # Construct the LCP like Howson:
    M = np.vstack([
        np.hstack([
            np.zeros(
                (polymg.nums_actions[player], polymg.nums_actions[player])
            ) if p2 == player
            else (positive_cost_maker - polymg.polymatrix[(player, p2)])
            for p2 in range(polymg.N)
        ] + [
            -np.outer(np.ones(
                polymg.nums_actions[player]), eye_N[player])
        ])
        for player in range(polymg.N)
    ] + [
        np.hstack([
            np.hstack([
                np.outer(eye_N[player], np.ones(
                    polymg.nums_actions[player]))
                for player in range(polymg.N)
            ]
            ),
            np.zeros((polymg.N, polymg.N))
        ])
    ]
    )
    total_actions = sum(polymg.nums_actions)
    q = np.hstack([np.zeros(total_actions), -np.ones(polymg.N)])

    n = np.shape(M)[0]
    tableau = np.hstack([
        np.eye(n),
        -M,
        np.reshape(q, (-1, 1))
    ])

    basis = np.array(range(n))
    z = np.empty(n)

    if starting_player_actions is None:
        starting_player_actions = {
            player: 0
            for player in range(polymg.N)
        }
    else:
        valid_start = (
            len(starting_player_actions) == polymg.N and
            all(a < max_a for a, max_a in zip(
                starting_player_actions, polymg.nums_actions))
        )
        assert valid_start, "Invalid starting pure actions."

    for player in range(polymg.N):
        row = sum(polymg.nums_actions) + player
        col = n + sum(polymg.nums_actions[:player]) + \
            starting_player_actions[player]
        _pivoting(tableau, col, row)
        # These pivots do not count as iters
        basis[row] = col

    num_iter = 0
    converging = True

    # Array to store row indices in lex_min_ratio_test
    argmins = np.empty(n + polymg.N, dtype=np.int_)
    p = 0
    retro = False
    while p < polymg.N and converging:
        finishing_v = sum(polymg.nums_actions) + n + p
        finishing_x = n + \
            sum(polymg.nums_actions[:p]) + starting_player_actions[p]
        finishing_y = finishing_x - n

        pivcol = (
            finishing_v if not retro
            else finishing_x if finishing_y in basis
            else finishing_y
        )

        retro = False

        while True:

            if num_iter == max_iter:
                converging = False
                break
            num_iter += 1

            _, pivrow = _lex_min_ratio_test(
                tableau, pivcol, 0, argmins,
            )

            _pivoting(tableau, pivcol, pivrow)
            basis[pivrow], leaving_var = pivcol, basis[pivrow]

            if leaving_var == finishing_x or leaving_var == finishing_y:
                p += 1
                break
            elif leaving_var == finishing_v:
                # print("entering the backtracking case")
                p -= 1
                retro = True
                break
            elif leaving_var < n:
                pivcol = leaving_var + n
            else:
                pivcol = leaving_var - n

    combined_solution = _get_solution(tableau, basis, z)

    # might not actually be Nash Equilibrium if we hit max iter
    NE = tuple(
        combined_solution[
            sum(polymg.nums_actions[:player]):
            sum(polymg.nums_actions[:player + 1])
        ]
        for player in range(polymg.N)
    )

    if not full_output:
        return NE

    res = NashResult(NE=NE,
                     converged=converging,
                     num_iter=num_iter,
                     max_iter=max_iter,
                     init=starting_player_actions)

    return NE, res