# Source code for quantecon.game_theory.repeated_game

```"""
Tools for repeated game.

"""

import numpy as np
from scipy.spatial import ConvexHull
from numba import njit

[docs]class RepeatedGame:
"""
Class representing an N-player repeated game.

Parameters
----------
stage_game : NormalFormGame
The stage game used to create the repeated game.

delta : scalar(float)
The common discount rate at which all players discount the future.

Attributes
----------
sg : NormalFormGame
The stage game. See Parameters.

delta : scalar(float)
See Parameters.

N : scalar(int)
The number of players.

nums_actions : tuple(int)
Tuple of the numbers of actions, one for each player.
"""
def __init__(self, stage_game, delta):
self.sg = stage_game
self.delta = delta
self.N = stage_game.N
self.nums_actions = stage_game.nums_actions

[docs]    def equilibrium_payoffs(self, method=None, options=None):
"""
Compute the set of payoff pairs of all pure-strategy subgame-perfect
equilibria with public randomization for any repeated two-player games
with perfect monitoring and discounting.

Parameters
----------
method : str, optional
The method for solving the equilibrium payoff set.

options : dict, optional
A dictionary of method options. For example, 'abreu_sannikov'
method accepts the following options:

tol : scalar(float)
Tolerance for convergence checking.
max_iter : scalar(int)
Maximum number of iterations.
u_init : ndarray(float, ndim=1)
The initial guess of threat points.

Notes
-----
Here lists all the implemented methods. The default method
is 'abreu_sannikov'.

1. 'abreu_sannikov'
"""
if method is None:
method = 'abreu_sannikov'

if options is None:
options = {}

if method in ('abreu_sannikov', 'AS'):
return _equilibrium_payoffs_abreu_sannikov(self, **options)
else:
msg = f"method {method} not supported."
raise NotImplementedError(msg)

def _equilibrium_payoffs_abreu_sannikov(rpg, tol=1e-12, max_iter=500,
u_init=np.zeros(2)):
"""
Using 'abreu_sannikov' algorithm to compute the set of payoff pairs
of all pure-strategy subgame-perfect equilibria with public randomization
for any repeated two-player games with perfect monitoring and
discounting, following Abreu and Sannikov (2014).

Parameters
----------
rpg : RepeatedGame
Two player repeated game.

tol : scalar(float), optional(default=1e-12)
Tolerance for convergence checking.

max_iter : scalar(int), optional(default=500)
Maximum number of iterations.

u_init : ndarray(float, ndim=1), optional(default=np.zeros(2))
The initial guess of threat points.

Returns
-------
hull : scipy.spatial.ConvexHull
The convex hull of equilibrium payoff pairs.

References
----------
..  Abreu, Dilip, and Yuliy Sannikov. "An algorithm for
two‐player repeated games with perfect monitoring." Theoretical
Economics 9.2 (2014): 313-338.
"""
sg, delta = rpg.sg, rpg.delta

if sg.N != 2:
msg = "this algorithm only applies to repeated two-player games."
raise NotImplementedError(msg)

best_dev_gains = _best_dev_gains(rpg)
IC = np.empty(2)
action_profile_payoff = np.empty(2)
# auxiliary array for checking if payoff is inside the convex hull
# first two entries for payoff point, and the last entry is 1.
extended_payoff = np.ones(3)
# array to store new points of C in each intersection
# at most 4 new points will be generated
new_pts = np.empty((4, 2))
# array to store the points of W
# the length of v is limited by |A1|*|A2|*4
W_new = np.empty((np.prod(sg.nums_actions)*4, 2))
W_old = np.empty((np.prod(sg.nums_actions)*4, 2))
# count the new points generated in each iteration
n_new_pt = 0

# copy the threat points
u = np.copy(u_init)

# initialization
payoff_pts = \
sg.payoff_profile_array.reshape(np.prod(sg.nums_actions), 2)
W_new[:np.prod(sg.nums_actions)] = payoff_pts
n_new_pt = np.prod(sg.nums_actions)

n_iter = 0
while True:
W_old[:n_new_pt] = W_new[:n_new_pt]
n_old_pt = n_new_pt
hull = ConvexHull(W_old[:n_old_pt])

W_new, n_new_pt = \
_R(delta, sg.nums_actions, sg.payoff_arrays,
best_dev_gains, hull.points, hull.vertices,
hull.equations, u, IC, action_profile_payoff,
extended_payoff, new_pts, W_new)

n_iter += 1
if n_iter >= max_iter:
break

# check convergence
if n_new_pt == n_old_pt:
if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol:
break

# update threat points
_update_u(u, W_new[:n_new_pt])

hull = ConvexHull(W_new[:n_new_pt])

return hull

def _best_dev_gains(rpg):
"""
Calculate the normalized payoff gains from deviating from the current
action to the best response for each player.

Parameters
----------
rpg : RepeatedGame
Two player repeated game.

Returns
-------
best_dev_gains : tuple(ndarray(float, ndim=2))
The normalized best deviation payoff gain arrays.
best_dev_gains[i][ai, a-i] is normalized payoff gain
player i can get if originally players are choosing
ai and a-i, and player i deviates to the best response action.
"""
sg, delta = rpg.sg, rpg.delta

best_dev_gains = ((1-delta)/delta *
(np.max(sg.payoff_arrays[i], 0) - sg.payoff_arrays[i])
for i in range(2))

return tuple(best_dev_gains)

@njit
def _R(delta, nums_actions, payoff_arrays, best_dev_gains, points,
vertices, equations, u, IC, action_profile_payoff,
extended_payoff, new_pts, W_new, tol=1e-10):
"""
Updating the payoff convex hull by iterating all action pairs.
Using the R operator proposed by Abreu and Sannikov 2014.

Parameters
----------
delta : scalar(float)
The common discount rate at which all players discount
the future.

nums_actions : tuple(int)
Tuple of the numbers of actions, one for each player.

payoff_arrays : tuple(ndarray(float, ndim=2))
Tuple of the payoff arrays, one for each player.

best_dev_gains : tuple(ndarray(float, ndim=2))
Tuple of the normalized best deviation payoff gain arrays.
best_dev_gains[i][ai, a-i] is payoff gain player i
can get if originally players are choosing ai and a-i,
and player i deviates to the best response action.

points : ndarray(float, ndim=2)
Coordinates of the points in the W, which construct a
feasible payoff convex hull.

vertices : ndarray(float, ndim=1)
Indices of points forming the vertices of the convex hull,
which are in counterclockwise order.

equations : ndarray(float, ndim=2)
[normal, offset] forming the hyperplane equation of the facet

u : ndarray(float, ndim=1)
The threat points.

IC : ndarray(float, ndim=1)
The minimum IC continuation values.

action_profile_payoff : ndarray(float, ndim=1)
Array of payoff for one action profile.

extended_payoff : ndarray(float, ndim=2)
The array [payoff0, payoff1, 1] for checking if
[payoff0, payoff1] is in the feasible payoff convex hull.

new_pts : ndarray(float, ndim=1)
The 4 by 2 array for storing the generated potential
extreme points of one action profile. One action profile
can only generate at most 4 points.

W_new : ndarray(float, ndim=2)
Array for storing the coordinates of the generated potential
extreme points that construct a new feasible payoff convex hull.

tol: scalar(float), optional(default=1e-10)
The tolerance for checking if two values are equal.

Returns
-------
W_new : ndarray(float, ndim=2)
The coordinates of the generated potential extreme points
that construct a new feasible payoff convex hull.

n_new_pt : scalar(int)
The number of points in W_new that construct the feasible
payoff convex hull.
"""
n_new_pt = 0
for a0 in range(nums_actions):
for a1 in range(nums_actions):
action_profile_payoff = payoff_arrays[a0, a1]
action_profile_payoff = payoff_arrays[a1, a0]
IC = u + best_dev_gains[a0, a1]
IC = u + best_dev_gains[a1, a0]

# check if payoff is larger than IC
if (action_profile_payoff >= IC).all():
# check if payoff is inside the convex hull
extended_payoff[:2] = action_profile_payoff
if (np.dot(equations, extended_payoff) <= tol).all():
W_new[n_new_pt] = action_profile_payoff
n_new_pt += 1
continue

new_pts, n = _find_C(new_pts, points, vertices, equations,
extended_payoff, IC, tol)

for i in range(n):
W_new[n_new_pt] = \
delta * new_pts[i] + (1-delta) * action_profile_payoff
n_new_pt += 1

return W_new, n_new_pt

@njit
def _find_C(C, points, vertices, equations, extended_payoff, IC, tol):
"""
Find all the intersection points between the current convex hull
and the two IC constraints. It is done by iterating simplex
counterclockwise.

Parameters
----------
C : ndarray(float, ndim=2)
The 4 by 2 array for storing the generated potential
extreme points of one action profile. One action profile
can only generate at most 4 points.

points : ndarray(float, ndim=2)
Coordinates of the points in the W, which construct a
feasible payoff convex hull.

vertices : ndarray(float, ndim=1)
Indices of points forming the vertices of the convex hull,
which are in counterclockwise order.

equations : ndarray(float, ndim=2)
[normal, offset] forming the hyperplane equation of the facet

extended_payoff : ndarray(float, ndim=1)
The array [payoff0, payoff1, 1] for checking if
[payoff0, payoff1] is in the feasible payoff convex hull.

IC : ndarray(float, ndim=1)
The minimum IC continuation values.

tol : scalar(float)
The tolerance for checking if two values are equal.

Returns
-------
C : ndarray(float, ndim=2)
The generated potential extreme points.

n : scalar(int)
The number of found intersection points.
"""
n = 0
weights = np.empty(2)
# vertices is ordered counterclockwise
for i in range(len(vertices)-1):
n = _intersect(C, n, weights, IC,
points[vertices[i]],
points[vertices[i+1]], tol)

n = _intersect(C, n, weights, IC,
points[vertices[-1]],
points[vertices], tol)

# check the case that IC is an interior point of the convex hull
extended_payoff[:2] = IC
if (np.dot(equations, extended_payoff) <= tol).all():
C[n, :] = IC
n += 1

return C, n

@njit
def _intersect(C, n, weights, IC, pt0, pt1, tol):
"""
Find the intersection points of a half-closed simplex
(pt0, pt1] and IC constraints.

Parameters
----------
C : ndarray(float, ndim=2)
The 4 by 2 array for storing the generated points of
one action profile. One action profile can only
generate at most 4 points.

n : scalar(int)
The number of intersection points that have been found.

weights : ndarray(float, ndim=1)
The size 2 array for storing the weights when calculate
the intersection point of simplex and IC constraints.

IC : ndarray(float, ndim=1)
The minimum IC continuation values.

pt0 : ndarray(float, ndim=1)
Coordinates of the starting point of the simplex.

pt1 : ndarray(float, ndim=1)
Coordinates of the ending point of the simplex.

tol : scalar(float)
The tolerance for checking if two values are equal.

Returns
-------
n : scalar(int)
The updated number of found intersection points.
"""
for i in range(2):
if (abs(pt0[i] - pt1[i]) < tol):
if (abs(pt1[i] - IC[i]) < tol):
x = pt1[1-i]
else:
continue
else:
weights[i] = (pt0[i] - IC[i]) / (pt0[i] - pt1[i])
# pt0 is not included to avoid duplication
# weights in (0, 1]
if (0 < weights[i] <= 1):
x = (1 - weights[i]) * pt0[1-i] + weights[i] * pt1[1-i]
else:
continue
# x has to be strictly higher than IC[1-j]
# if it is equal, then it means IC is one of the vertex
# it will be added to C in below
if x - IC[1-i] > tol:
C[n, i] = IC[i]
C[n, 1-i] = x
n += 1
elif x - IC[1-i] > -tol:
# to avoid duplication when IC is a vertex
break

return n

@njit
def _update_u(u, W):
"""
Update the threat points if it not feasible in the new W,
by the minimum of new feasible payoffs.

Parameters
----------
u : ndarray(float, ndim=1)
The threat points.

W : ndarray(float, ndim=1)
The points that construct the feasible payoff convex hull.

Returns
-------
u : ndarray(float, ndim=1)
The updated threat points.
"""
for i in range(2):
W_min = W[:, i].min()
if u[i] < W_min:
u[i] = W_min

return u
```