Source code for quantecon.game_theory.repeated_game

"""
Tools for repeated game.

"""

import numpy as np
from scipy.spatial import ConvexHull
from numba import njit


[docs]class RepeatedGame: """ Class representing an N-player repeated game. Parameters ---------- stage_game : NormalFormGame The stage game used to create the repeated game. delta : scalar(float) The common discount rate at which all players discount the future. Attributes ---------- sg : NormalFormGame The stage game. See Parameters. delta : scalar(float) See Parameters. N : scalar(int) The number of players. nums_actions : tuple(int) Tuple of the numbers of actions, one for each player. """ def __init__(self, stage_game, delta): self.sg = stage_game self.delta = delta self.N = stage_game.N self.nums_actions = stage_game.nums_actions
[docs] def equilibrium_payoffs(self, method=None, options=None): """ Compute the set of payoff pairs of all pure-strategy subgame-perfect equilibria with public randomization for any repeated two-player games with perfect monitoring and discounting. Parameters ---------- method : str, optional The method for solving the equilibrium payoff set. options : dict, optional A dictionary of method options. For example, 'abreu_sannikov' method accepts the following options: tol : scalar(float) Tolerance for convergence checking. max_iter : scalar(int) Maximum number of iterations. u_init : ndarray(float, ndim=1) The initial guess of threat points. Notes ----- Here lists all the implemented methods. The default method is 'abreu_sannikov'. 1. 'abreu_sannikov' """ if method is None: method = 'abreu_sannikov' if options is None: options = {} if method in ('abreu_sannikov', 'AS'): return _equilibrium_payoffs_abreu_sannikov(self, **options) else: msg = f"method {method} not supported." raise NotImplementedError(msg)
def _equilibrium_payoffs_abreu_sannikov(rpg, tol=1e-12, max_iter=500, u_init=np.zeros(2)): """ Using 'abreu_sannikov' algorithm to compute the set of payoff pairs of all pure-strategy subgame-perfect equilibria with public randomization for any repeated two-player games with perfect monitoring and discounting, following Abreu and Sannikov (2014). Parameters ---------- rpg : RepeatedGame Two player repeated game. tol : scalar(float), optional(default=1e-12) Tolerance for convergence checking. max_iter : scalar(int), optional(default=500) Maximum number of iterations. u_init : ndarray(float, ndim=1), optional(default=np.zeros(2)) The initial guess of threat points. Returns ------- hull : scipy.spatial.ConvexHull The convex hull of equilibrium payoff pairs. References ---------- .. [1] Abreu, Dilip, and Yuliy Sannikov. "An algorithm for two‚Äźplayer repeated games with perfect monitoring." Theoretical Economics 9.2 (2014): 313-338. """ sg, delta = rpg.sg, rpg.delta if sg.N != 2: msg = "this algorithm only applies to repeated two-player games." raise NotImplementedError(msg) best_dev_gains = _best_dev_gains(rpg) IC = np.empty(2) action_profile_payoff = np.empty(2) # auxiliary array for checking if payoff is inside the convex hull # first two entries for payoff point, and the last entry is 1. extended_payoff = np.ones(3) # array to store new points of C in each intersection # at most 4 new points will be generated new_pts = np.empty((4, 2)) # array to store the points of W # the length of v is limited by |A1|*|A2|*4 W_new = np.empty((np.prod(sg.nums_actions)*4, 2)) W_old = np.empty((np.prod(sg.nums_actions)*4, 2)) # count the new points generated in each iteration n_new_pt = 0 # copy the threat points u = np.copy(u_init) # initialization payoff_pts = \ sg.payoff_profile_array.reshape(np.prod(sg.nums_actions), 2) W_new[:np.prod(sg.nums_actions)] = payoff_pts n_new_pt = np.prod(sg.nums_actions) n_iter = 0 while True: W_old[:n_new_pt] = W_new[:n_new_pt] n_old_pt = n_new_pt hull = ConvexHull(W_old[:n_old_pt]) W_new, n_new_pt = \ _R(delta, sg.nums_actions, sg.payoff_arrays, best_dev_gains, hull.points, hull.vertices, hull.equations, u, IC, action_profile_payoff, extended_payoff, new_pts, W_new) n_iter += 1 if n_iter >= max_iter: break # check convergence if n_new_pt == n_old_pt: if np.linalg.norm(W_new[:n_new_pt] - W_old[:n_new_pt]) < tol: break # update threat points _update_u(u, W_new[:n_new_pt]) hull = ConvexHull(W_new[:n_new_pt]) return hull def _best_dev_gains(rpg): """ Calculate the normalized payoff gains from deviating from the current action to the best response for each player. Parameters ---------- rpg : RepeatedGame Two player repeated game. Returns ------- best_dev_gains : tuple(ndarray(float, ndim=2)) The normalized best deviation payoff gain arrays. best_dev_gains[i][ai, a-i] is normalized payoff gain player i can get if originally players are choosing ai and a-i, and player i deviates to the best response action. """ sg, delta = rpg.sg, rpg.delta best_dev_gains = ((1-delta)/delta * (np.max(sg.payoff_arrays[i], 0) - sg.payoff_arrays[i]) for i in range(2)) return tuple(best_dev_gains) @njit def _R(delta, nums_actions, payoff_arrays, best_dev_gains, points, vertices, equations, u, IC, action_profile_payoff, extended_payoff, new_pts, W_new, tol=1e-10): """ Updating the payoff convex hull by iterating all action pairs. Using the R operator proposed by Abreu and Sannikov 2014. Parameters ---------- delta : scalar(float) The common discount rate at which all players discount the future. nums_actions : tuple(int) Tuple of the numbers of actions, one for each player. payoff_arrays : tuple(ndarray(float, ndim=2)) Tuple of the payoff arrays, one for each player. best_dev_gains : tuple(ndarray(float, ndim=2)) Tuple of the normalized best deviation payoff gain arrays. best_dev_gains[i][ai, a-i] is payoff gain player i can get if originally players are choosing ai and a-i, and player i deviates to the best response action. points : ndarray(float, ndim=2) Coordinates of the points in the W, which construct a feasible payoff convex hull. vertices : ndarray(float, ndim=1) Indices of points forming the vertices of the convex hull, which are in counterclockwise order. equations : ndarray(float, ndim=2) [normal, offset] forming the hyperplane equation of the facet u : ndarray(float, ndim=1) The threat points. IC : ndarray(float, ndim=1) The minimum IC continuation values. action_profile_payoff : ndarray(float, ndim=1) Array of payoff for one action profile. extended_payoff : ndarray(float, ndim=2) The array [payoff0, payoff1, 1] for checking if [payoff0, payoff1] is in the feasible payoff convex hull. new_pts : ndarray(float, ndim=1) The 4 by 2 array for storing the generated potential extreme points of one action profile. One action profile can only generate at most 4 points. W_new : ndarray(float, ndim=2) Array for storing the coordinates of the generated potential extreme points that construct a new feasible payoff convex hull. tol: scalar(float), optional(default=1e-10) The tolerance for checking if two values are equal. Returns ------- W_new : ndarray(float, ndim=2) The coordinates of the generated potential extreme points that construct a new feasible payoff convex hull. n_new_pt : scalar(int) The number of points in W_new that construct the feasible payoff convex hull. """ n_new_pt = 0 for a0 in range(nums_actions[0]): for a1 in range(nums_actions[1]): action_profile_payoff[0] = payoff_arrays[0][a0, a1] action_profile_payoff[1] = payoff_arrays[1][a1, a0] IC[0] = u[0] + best_dev_gains[0][a0, a1] IC[1] = u[1] + best_dev_gains[1][a1, a0] # check if payoff is larger than IC if (action_profile_payoff >= IC).all(): # check if payoff is inside the convex hull extended_payoff[:2] = action_profile_payoff if (np.dot(equations, extended_payoff) <= tol).all(): W_new[n_new_pt] = action_profile_payoff n_new_pt += 1 continue new_pts, n = _find_C(new_pts, points, vertices, equations, extended_payoff, IC, tol) for i in range(n): W_new[n_new_pt] = \ delta * new_pts[i] + (1-delta) * action_profile_payoff n_new_pt += 1 return W_new, n_new_pt @njit def _find_C(C, points, vertices, equations, extended_payoff, IC, tol): """ Find all the intersection points between the current convex hull and the two IC constraints. It is done by iterating simplex counterclockwise. Parameters ---------- C : ndarray(float, ndim=2) The 4 by 2 array for storing the generated potential extreme points of one action profile. One action profile can only generate at most 4 points. points : ndarray(float, ndim=2) Coordinates of the points in the W, which construct a feasible payoff convex hull. vertices : ndarray(float, ndim=1) Indices of points forming the vertices of the convex hull, which are in counterclockwise order. equations : ndarray(float, ndim=2) [normal, offset] forming the hyperplane equation of the facet extended_payoff : ndarray(float, ndim=1) The array [payoff0, payoff1, 1] for checking if [payoff0, payoff1] is in the feasible payoff convex hull. IC : ndarray(float, ndim=1) The minimum IC continuation values. tol : scalar(float) The tolerance for checking if two values are equal. Returns ------- C : ndarray(float, ndim=2) The generated potential extreme points. n : scalar(int) The number of found intersection points. """ n = 0 weights = np.empty(2) # vertices is ordered counterclockwise for i in range(len(vertices)-1): n = _intersect(C, n, weights, IC, points[vertices[i]], points[vertices[i+1]], tol) n = _intersect(C, n, weights, IC, points[vertices[-1]], points[vertices[0]], tol) # check the case that IC is an interior point of the convex hull extended_payoff[:2] = IC if (np.dot(equations, extended_payoff) <= tol).all(): C[n, :] = IC n += 1 return C, n @njit def _intersect(C, n, weights, IC, pt0, pt1, tol): """ Find the intersection points of a half-closed simplex (pt0, pt1] and IC constraints. Parameters ---------- C : ndarray(float, ndim=2) The 4 by 2 array for storing the generated points of one action profile. One action profile can only generate at most 4 points. n : scalar(int) The number of intersection points that have been found. weights : ndarray(float, ndim=1) The size 2 array for storing the weights when calculate the intersection point of simplex and IC constraints. IC : ndarray(float, ndim=1) The minimum IC continuation values. pt0 : ndarray(float, ndim=1) Coordinates of the starting point of the simplex. pt1 : ndarray(float, ndim=1) Coordinates of the ending point of the simplex. tol : scalar(float) The tolerance for checking if two values are equal. Returns ------- n : scalar(int) The updated number of found intersection points. """ for i in range(2): if (abs(pt0[i] - pt1[i]) < tol): if (abs(pt1[i] - IC[i]) < tol): x = pt1[1-i] else: continue else: weights[i] = (pt0[i] - IC[i]) / (pt0[i] - pt1[i]) # pt0 is not included to avoid duplication # weights in (0, 1] if (0 < weights[i] <= 1): x = (1 - weights[i]) * pt0[1-i] + weights[i] * pt1[1-i] else: continue # x has to be strictly higher than IC[1-j] # if it is equal, then it means IC is one of the vertex # it will be added to C in below if x - IC[1-i] > tol: C[n, i] = IC[i] C[n, 1-i] = x n += 1 elif x - IC[1-i] > -tol: # to avoid duplication when IC is a vertex break return n @njit def _update_u(u, W): """ Update the threat points if it not feasible in the new W, by the minimum of new feasible payoffs. Parameters ---------- u : ndarray(float, ndim=1) The threat points. W : ndarray(float, ndim=1) The points that construct the feasible payoff convex hull. Returns ------- u : ndarray(float, ndim=1) The updated threat points. """ for i in range(2): W_min = W[:, i].min() if u[i] < W_min: u[i] = W_min return u