def FPI_transformation_once(strategies_profile, payoff_matrices, rate): str_row, str_col = strategies_profile rows = str_row.shape[0] cols = str_col.shape[0] payoff_matrix_row, payoff_matrix_col = payoff_matrices payoff_row = str_row.dot(payoff_matrix_row).dot(str_col.T) vertex_payoff_row = np.identity(rows).dot(payoff_matrix_row).dot(str_col.T) payoff_gain_row = vertex_payoff_row - payoff_row lambda_row = np.where(payoff_gain_row > 0, payoff_gain_row, 0) payoff_col = str_col.dot(payoff_matrix_col.T).dot(str_row.T) vertex_payoff_col = np.identity(cols).dot(payoff_matrix_col.T).dot( str_row.T) payoff_gain_col = vertex_payoff_col - payoff_col lambda_col = np.where(payoff_gain_col > 0, payoff_gain_col, 0) if False: rate_in_use = rate * np.random.rand() * 100 else: rate_in_use = rate # alpha function on VGV entrywisely # lambda_row_in_use = np.apply_along_axis(alpha, 0, lambda_row) # lambda_col_in_use = np.apply_along_axis(alpha, 0, lambda_col) # update mixed strategies str_row = utils.vector_update(str_row, lambda_row, rate_in_use) str_col = utils.vector_update(str_col, lambda_col, rate_in_use) return (str_row, str_col)
def converge(self, rate, iterations): vgss_old = 10**15 start_time = datetime.datetime.now() self.init_stats() for i in range(iterations): payoff_row = self.str_row.dot(self.payoff_matrix_row).dot( self.str_col.T) vertex_payoff_row = np.identity(self.rows).dot( self.payoff_matrix_row).dot(self.str_col.T) payoff_gain_row = vertex_payoff_row - payoff_row self.lambda_row = np.where(payoff_gain_row > 0, payoff_gain_row, 0) payoff_col = self.str_col.dot(self.payoff_matrix_col.T).dot( self.str_row.T) vertex_payoff_col = np.identity(self.cols).dot( self.payoff_matrix_col.T).dot(self.str_row.T) payoff_gain_col = vertex_payoff_col - payoff_col self.lambda_col = np.where(payoff_gain_col > 0, payoff_gain_col, 0) # update the sucklest strategy that has the smallest VGS vgss = self.lambda_row.sum() + self.lambda_col.sum() if vgss < vgss_old: self.eqpt_row = self.str_row self.eqpt_col = self.str_col self.eqpt_lambda_row = self.lambda_row self.eqpt_lambda_col = self.lambda_col vgss_old = vgss # collect stats to return self.collect_stats() ''' # if random rate is used, it is not FPI anymore since there is no fixed function to be considered. if self.use_random_rate: rate_in_use = rate * np.random.rand() * 100 else: rate_in_use = rate ''' # alpha function on VGV entrywisely lambda_row_in_use = np.apply_along_axis(self.alpha, 0, self.lambda_row) lambda_col_in_use = np.apply_along_axis(self.alpha, 0, self.lambda_col) # update mixed strategies if type(rate) is list or type(rate) is tuple: self.str_row = utils.vector_update(self.str_row, lambda_row_in_use, rate[0]) self.str_col = utils.vector_update(self.str_col, lambda_col_in_use, rate[1]) else: self.str_row = utils.vector_update(self.str_row, lambda_row_in_use, rate) self.str_col = utils.vector_update(self.str_col, lambda_col_in_use, rate) stop_time = datetime.datetime.now() print('*** rate: %s iters: %s time: %s' % (rate, iterations, stop_time - start_time))
def run_one_iteration(self, game, rate): ''' the core of everything''' self.payoff = self.payoff_vector.dot(game.compute_prob_dist()) prob_dist = np.apply_along_axis(game.compute_prob_dist, 0, np.identity(self.pure_strategies_num), self) vertex_payoff = self.payoff_vector.dot(prob_dist) temp = vertex_payoff - self.payoff self.target = np.where(temp > 0, temp, 0) self.mixed_strategy = utils.vector_update(self.mixed_strategy, self.target, rate) self.vgs_l.append(self.target.sum())
def FPI(self): s = self.init_strategy for i in range(self.iters): payoff = s.dot(self.vertex_payoff) vertex_gain = np.where((self.vertex_payoff - payoff) > 0, self.vertex_payoff - payoff, 0) # vertex_gain = np.apply_along_axis(lambda x: 10 ** 4 * x * x, 0, vertex_gain) vertex_gain = np.apply_along_axis(lambda x: x, 0, vertex_gain) s = utils.vector_update(s, vertex_gain, self.rate) self.vertex_gain = vertex_gain self.strategy = s self.angle = utils.vector_angle(self.strategy, self.vertex_gain) self.payoff = payoff self.gamma = vertex_gain.dot(vertex_gain) / (1 / self.rate + vertex_gain.sum())
def run_one_iteration(self, game, rate, player_index): ''' the core of everything time complexity: (n-1)g^n multiplications, where g is the average of players' pure strategies number ''' # step 1: evalute vertex payoff vector: \vec{v} v = [] for j in np.arange(self.pure_strategies_num): vertex_prob_dist = game.compute_joint_dist_on_vertex( player_index, j) a_vertex_payoff = vertex_prob_dist.dot(self.payoff_vector) v.append(a_vertex_payoff) # step 2: compute payoff payoff = self.mixed_strategy.dot(v) # step 3: compute VGV temp = v - payoff self.VGV = np.where(temp > 0, temp, 0) # step 4: collect stats: VGS and path self.vgs_l.append(self.VGV.sum()) self.path_l.append(self.mixed_strategy) # step 5: update strategies self.mixed_strategy = utils.vector_update(self.mixed_strategy, self.VGV, rate)
size = 2 iterations = 1 * 10**4 lambda_function = strengthen_functions.PF12 s1 = utils.randomize_mixed_strategy(size) s2 = utils.randomize_mixed_strategy(size) # s1 = np.array([0.5, 0.5, 0, 0, 0, 0]) # s2 = np.array([0, 0, 0, 0, 0.5, 0.5]) print('initial strategy:', s1) print('initial strategy:', s2) distance_l = [] distance_old = 10**8 for i in range(iterations): target_1 = np.array([lambda_function(s1[0]), 1 - lambda_function(s1[0])]) target_2 = np.array([lambda_function(s2[0]), 1 - lambda_function(s2[0])]) s1 = utils.vector_update(s1, target_1, r) s2 = utils.vector_update(s2, target_2, r) distance = np.linalg.norm(s1 - s2) distance_l.append(distance / distance_old) if distance > distance_old: print(i, '+', distance_old, distance) else: print(i, '-', distance_old, distance) distance_old = distance # print(distance) print(distance_l[-100:]) print('final strategy:', s1.round(2), s2.round(2)) # print('final VG:', vertex_gain_1, vertex_gain_2) # import matplotlib.pyplot as plt # plt.plot(distance_l)
distance_ratio_l = [] distance_old = 10**8 distance_ratio_old = 0 s1_l = [] s2_l = [] for i in range(iterations): s1_l.append(s1) s2_l.append(s2) distance = np.linalg.norm(s1 - s2) payoff_1 = s1.dot(vertex_payoff) payoff_2 = s2.dot(vertex_payoff) vertex_gain_1 = np.where((vertex_payoff - payoff_1) > 0, vertex_payoff - payoff_1, 0) vertex_gain_2 = np.where((vertex_payoff - payoff_2) > 0, vertex_payoff - payoff_2, 0) s1 = utils.vector_update(s1, vertex_gain_1, r) s2 = utils.vector_update(s2, vertex_gain_2, r) # print(distance) distance_ratio = distance / distance_old distance_ratio_l.append(distance_ratio) # print(distance_ratio) if distance >= distance_old: print(i, 'Lipschitz K>=1, wrong: %s >= %s' % (distance, distance_old)) sys.exit(1) if distance_ratio <= distance_ratio_old: print(i, 'Lipschitz K decreasing, unexpected!') # sys.exit(1) distance_old = distance distance_ratio_old = distance_ratio print('final strategy:', s1, s2)
import numpy as np import utils r = 10**-4 size = 6 iterations = 1 * 10**4 s = utils.randomize_mixed_strategy(size) vertex_payoff = utils.randomize_payoff_vector(size) # vertex_payoff = np.array([0.1, 0.7, 0.7, 0.7, -2, -1]) vertex_payoff *= 1000 print('initial strategy:', s) print('payoff:', vertex_payoff) payoff_old = -10**8 for i in range(1000): payoff = s.dot(vertex_payoff) vertex_gain_origin = np.where((vertex_payoff - payoff) > 0, vertex_payoff - payoff, 0) vertex_gain = np.apply_along_axis(lambda x: x**2, 0, vertex_gain_origin) # vertex_gain = np.apply_along_axis(lambda x: x, 0, vertex_gain_origin) # the vanila s = utils.vector_update(s, vertex_gain, r) payoff_new = s.dot(vertex_payoff) gamma = vertex_gain.dot(vertex_gain_origin) / (1.0 / r + vertex_gain.sum()) print(payoff_new - payoff - gamma) print('final strategy:', s) print('final VG:', vertex_gain)
# t = s * p # t = strengthen_functions.PF12(s) t = strengthen_functions.PF30(s) # t = s * s * s + s * s + s # t = t / t.sum() return t angle_old = -1 angle_l = [] target_l = [] for i in range(iters): target = target_function(s) # print(s, target) angle = utils.vector_angle(s, target) s = utils.vector_update(s, target, r) angle_l.append(angle) target_l.append((target**2).sum()) if angle >= angle_old: print('+', angle_old, angle) # print('s', s) # print('p', p) # print('target', target) else: print('-', angle_old, angle) # print('s', s) # print('p', p) # print('target', target) # break angle_old = angle
import numpy as np import strengthen_functions import utils # s = utils.randomize_mixed_strategy(2) s = np.random.rand() s = np.array([s, 1 - s]) print(s) s_l = [] for i in range(10 ** 5): target = strengthen_functions.PF80(s[0]) target = np.array([target, 1 - target]) s = utils.vector_update(s, target, 10 ** -4) s_l.append(s) import matplotlib.pyplot as plt print(s_l[-1]) plt.scatter([i[0] for i in s_l], [i[1] for i in s_l]) plt.show()