def __init__( self, game_file_path, portfolio_strategy_files_paths, exp3g_gamma=0.02, exp3g_eta=0.025, utility_estimator_class=SimpleUtilityEstimator, utility_estimator_args=None): super().__init__() self.portfolio_size = len(portfolio_strategy_files_paths) self.bandit_algorithm = Exp3G(exp3g_gamma, exp3g_eta, self.portfolio_size) game = acpc.read_game_file(game_file_path) if utility_estimator_args is None: self.utility_estimator = utility_estimator_class(game, True) else: self.utility_estimator = utility_estimator_class(game, True, **utility_estimator_args) self.portfolio_trees = [] self.portfolio_dicts = [] for portfolio_strategy_file_path in portfolio_strategy_files_paths: strategy_tree, strategy_dict = read_strategy_from_file(game_file_path, portfolio_strategy_file_path) self.portfolio_trees += [strategy_tree] self.portfolio_dicts += [strategy_dict] self.portfolio_strategies_mixture = StrategiesWeightedMixture(game, self.portfolio_trees)
def read_strategy_from_file(game, strategy_file_path): strategy = {} with open(strategy_file_path, 'r') as strategy_file: for line in strategy_file: if not line.strip() or line.strip().startswith('#'): continue line_split = line.split(' ') strategy[line_split[0]] = [ float(probStr) for probStr in line_split[1:4] ] if not game: return strategy game_instance = acpc.read_game_file(game) if isinstance(game, str) else game strategy_tree = GameTreeBuilder(game_instance, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): nonlocal strategy node_strategy = np.array(strategy[str(node)]) np.copyto(node.strategy, node_strategy) walk_trees(on_node, strategy_tree) return strategy_tree, strategy
def train_portfolio_responses(game_file_path, opponent_strategy_trees, rnr_params, callback=None, log=False, parallel=False): num_opponents = len(opponent_strategy_trees) game = acpc.read_game_file(game_file_path) if log: print() responses = [None] * num_opponents params = [(i, num_opponents, log, parallel, game, rnr_params, opponent_strategy_trees) for i in range(num_opponents)] if parallel: with multiprocessing.Pool(max(int(multiprocessing.cpu_count() / 2), 2)) as p: for i, result in enumerate( p.imap_unordered(_train_response, params)): response_index, response_strategy = result responses[response_index] = response_strategy if callback: callback(response_index, response_strategy) if log: print('Progress: %s/%s' % (i + 1, num_opponents)) else: for response_index, response_strategy in map( lambda p: _train_response(p), params): if callback: callback(response_index, response_strategy) responses[response_index] = response_strategy return responses
def test_build_portfolio_not_crashing(self): game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) def on_node_always_call(node): if isinstance(node, ActionNode): node.strategy[1] = 1 def on_node_always_fold(node): if isinstance(node, ActionNode): if 0 in node.children: node.strategy[0] = 1 else: node.strategy[1] = 1 def on_node_uniform(node): if isinstance(node, ActionNode): action_count = len(node.children) action_probability = 1 / action_count for a in node.children: node.strategy[a] = action_probability opponents = [ self.create_strategy(game, on_node_always_call), self.create_strategy(game, on_node_always_fold), self.create_strategy(game, on_node_uniform) ] opponent_responses = train_portfolio_responses( KUHN_POKER_GAME_FILE_PATH, opponents, [(100, 800, 10, 2, 2)] * len(opponents)) portfolio_strategies, opponent_indices = optimize_portfolio( KUHN_POKER_GAME_FILE_PATH, opponents, opponent_responses) self.assertGreaterEqual(len(portfolio_strategies), 1) self.assertEqual(len(portfolio_strategies), len(opponent_indices))
def test_kuhn_cfr_checkpointing(self): game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) cfr = Cfr(game, show_progress=False) checkpoints_count = 0 def checkpoint_callback(game_tree, checkpoint_index, iterations): nonlocal checkpoints_count self.assertTrue(game_tree is not None) self.assertEqual(checkpoint_index, checkpoints_count) checkpoints_count += 1 cfr.train(60, weight_delay=15, checkpoint_iterations=15, checkpoint_callback=checkpoint_callback) self.assertEqual(checkpoints_count, 3)
def test_strategy_writing_and_reading(self): game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) strategy_tree = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): for a in range(3): if a in node.children: node.strategy[a] = 0.5 else: node.strategy[a] = 7 walk_trees(on_node, strategy_tree) write_strategy_to_file(strategy_tree, 'test/io_test_dummy.strategy') read_strategy_tree, _ = read_strategy_from_file(KUHN_POKER_GAME_FILE_PATH, 'test/io_test_dummy.strategy') self.assertTrue(is_strategies_equal(strategy_tree, read_strategy_tree))
def test_read_game_file(self): game = acpc.read_game_file('test.game') self.assertEqual(game.get_num_players(), 3) self.assertEqual(game.get_num_rounds(), 4) self.assertEqual(game.get_blind(0), 5) self.assertEqual(game.get_blind(1), 10) self.assertEqual(game.get_blind(2), 0) self.assertEqual(game.get_num_hole_cards(), 2) self.assertEqual(game.get_num_board_cards(0), 0) self.assertEqual(game.get_num_board_cards(1), 3) self.assertEqual(game.get_num_board_cards(2), 1) self.assertEqual(game.get_num_board_cards(3), 1) self.assertEqual(game.get_num_ranks(), 13) self.assertEqual(game.get_num_suits(), 4)
def train_and_show_results(self, test_spec): game_file_path = test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) base_strategy, _ = read_strategy_from_file( game_file_path, test_spec['base_strategy_path']) opponent = test_spec['opponent'] opponent_strategy = create_agent_strategy_from_trained_strategy( game_file_path, base_strategy, opponent[1], opponent[2], opponent[3]) strategy, exploitability, p = RnrParameterOptimizer(game).train( opponent_strategy, test_spec['exploitability'], test_spec['max_delta']) self.assertTrue(strategy != None) self.assertTrue(is_correct_strategy(strategy)) print('Final exploitability is %s with p of %s' % (exploitability, p))
def create_agent_strategy( game_file_path, tilt_action, tilt_type, tilt_probability, cfr_iterations=2000, cfr_weight_delay=700, show_progress=True): game = acpc.read_game_file(game_file_path) cfr = Cfr(game, show_progress=show_progress) cfr.train(cfr_iterations, cfr_weight_delay) return create_agent_strategy_from_trained_strategy( game_file_path, cfr.game_tree, tilt_action, tilt_type, tilt_probability, True)
def test_leduc_rnr_works(self): game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH) opponent_strategy = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(node): if isinstance(node, ActionNode): action_count = len(node.children) action_probability = 1 / action_count for a in node.children: node.strategy[a] = action_probability walk_trees(on_node, opponent_strategy) rnr = RestrictedNashResponse(game, opponent_strategy, 0.5, show_progress=False) rnr.train(10, 5)
def test_kuhn_action_minus_tilted_agent(self): kuhn_equilibrium, _ = read_strategy_from_file( KUHN_POKER_GAME_FILE_PATH, 'strategies/kuhn.limit.2p-equilibrium.strategy') game = acpc.read_game_file(KUHN_POKER_GAME_FILE_PATH) exploitability = Exploitability(game) tilted_agent_strategy = create_agent_strategy_from_trained_strategy( KUHN_POKER_GAME_FILE_PATH, kuhn_equilibrium, Action.CALL, TiltType.ADD, -0.5) self.assertTrue(is_correct_strategy(tilted_agent_strategy)) self.assertTrue( not is_strategies_equal(kuhn_equilibrium, tilted_agent_strategy)) equilibrium_exploitability = exploitability.evaluate(kuhn_equilibrium) raise_add_tilted_exploitability = exploitability.evaluate( tilted_agent_strategy) self.assertTrue( raise_add_tilted_exploitability > equilibrium_exploitability)
def create_agent_strategy_from_trained_strategy( game_file_path, strategy_tree, tilt_action, tilt_type, tilt_probability, in_place=False): tilt_action_index = tilt_action.value def on_node(node): if tilt_action_index in node.children: original_tilt_action_probability = node.strategy[tilt_action_index] new_tilt_action_probability = None if tilt_type == TiltType.ADD: new_tilt_action_probability = np.clip(original_tilt_action_probability + tilt_probability, 0, 1) elif tilt_type == TiltType.MULTIPLY: new_tilt_action_probability = np.clip( original_tilt_action_probability + original_tilt_action_probability * tilt_probability, 0, 1) node.strategy[tilt_action_index] = new_tilt_action_probability diff = new_tilt_action_probability - original_tilt_action_probability other_actions_probability = 1 - original_tilt_action_probability if diff != 0 and other_actions_probability == 0: other_action_probability_diff = diff / (len(node.children) - 1) for a in filter(lambda a: a != tilt_action_index, node.children): node.strategy[a] -= other_action_probability_diff elif diff != 0: for a in filter(lambda a: a != tilt_action_index, node.children): node.strategy[a] -= diff * (node.strategy[a] / other_actions_probability) result_strategy = None if in_place: result_strategy = strategy_tree else: game = acpc.read_game_file(game_file_path) result_strategy = GameTreeBuilder(game, StrategyTreeNodeProvider()).build_tree() copy_strategy(result_strategy, strategy_tree) walk_trees(on_node, result_strategy) return result_strategy
def read_log_file(game_file_path, log_file_path, player_names, player_trees=None): game = acpc.read_game_file(game_file_path) num_players = game.get_num_players() if len(player_names) != num_players: raise AttributeError('Wrong number of player names provided') if game.get_betting_type() != acpc.BettingType.LIMIT: raise AttributeError('Only limit betting games are supported') players = {} for i in range(num_players): player_name = player_names[i] player_tree = None if player_trees and player_name in player_trees: player_tree = player_trees[player_name] else: player_tree = GameTreeBuilder( game, SamplesTreeNodeProvider()).build_tree() players[player_name] = player_tree with open(log_file_path, 'r') as strategy_file: for line in strategy_file: if not line.strip() or line.strip().startswith('#') or len( line.split(':')) == 3: continue player_names = [ name.strip() for name in line.split(':')[-1].split('|') ] state = acpc.parse_state(game_file_path, line) current_player_trees = [players[name] for name in player_names] _add_state_to_sample_trees(game, state, current_player_trees, 0, 0) return players
def train_and_show_results(self, test_spec): game_file_path = test_spec['game_file_path'] portfolio_name = test_spec['portfolio_name'] agent_specs = test_spec['opponent_tilt_types'] if not _check_agent_names_unique(agent_specs): raise AttributeError( 'Agents must be unique so that they have unique names') strategies_directory_base = '%s/%s' % (TEST_OUTPUT_DIRECTORY, portfolio_name) strategies_directory = strategies_directory_base if 'overwrite_portfolio_path' not in test_spec or not test_spec[ 'overwrite_portfolio_path']: counter = 1 while os.path.exists(strategies_directory): strategies_directory = '%s(%s)' % (strategies_directory_base, counter) counter += 1 if not os.path.exists(strategies_directory): os.makedirs(strategies_directory) game = acpc.read_game_file(game_file_path) exp = Exploitability(game) # Delete results since they will be generated again for file in os.listdir(strategies_directory): absolute_path = '/'.join([strategies_directory, file]) if os.path.isfile(absolute_path): os.remove(absolute_path) base_strategy, _ = read_strategy_from_file( game_file_path, test_spec['base_strategy_path']) num_opponents = len(agent_specs) opponents = [] for agent in agent_specs: opponent_strategy = create_agent_strategy_from_trained_strategy( game_file_path, base_strategy, agent[0], agent[1], agent[2]) opponents += [opponent_strategy] parallel = test_spec['parallel'] if 'parallel' in test_spec else False response_paths = [ '%s/responses/%s-response.strategy' % (strategies_directory, _get_agent_name(agent)) for agent in agent_specs ] opponent_responses = [None] * num_opponents responses_to_train_indices = [] responses_to_train_opponents = [] responses_to_train_params = [] for i in range(num_opponents): if os.path.exists(response_paths[i]): response_strategy, _ = read_strategy_from_file( game_file_path, response_paths[i]) opponent_responses[i] = response_strategy else: responses_to_train_indices += [i] responses_to_train_opponents += [opponents[i]] responses_to_train_params += [agent_specs[i][3]] def on_response_trained(response_index, response_strategy): output_file_path = response_paths[ responses_to_train_indices[response_index]] output_file_dir = os.path.dirname(output_file_path) if not os.path.exists(output_file_dir): os.makedirs(output_file_dir) opponent_strategy = opponents[response_index] opponent_exploitability = exp.evaluate(opponent_strategy) response_exploitability = exp.evaluate(response_strategy) response_utility_vs_opponent = exp.evaluate( opponent_strategy, response_strategy) write_strategy_to_file(response_strategy, output_file_path, [ 'Opponent exploitability: %s' % opponent_exploitability, 'Response exploitability: %s' % response_exploitability, 'Response value vs opponent: %s' % response_utility_vs_opponent, ]) print('%s responses need to be trained' % len(responses_to_train_opponents)) responses_to_train_strategies = train_portfolio_responses( game_file_path, responses_to_train_opponents, responses_to_train_params, log=True, parallel=parallel, callback=on_response_trained) for i, j in enumerate(responses_to_train_indices): opponent_responses[j] = responses_to_train_strategies[i] if 'portfolio_cut_improvement_threshold' in test_spec: portfolio_strategies, response_indices = optimize_portfolio( game_file_path, opponents, opponent_responses, portfolio_cut_improvement_threshold=test_spec[ 'portfolio_cut_improvement_threshold'], log=True, output_directory=strategies_directory) else: portfolio_strategies, response_indices = optimize_portfolio( game_file_path, opponents, opponent_responses, log=True, output_directory=strategies_directory) portfolio_size = len(portfolio_strategies) agent_names = [ _get_agent_name(agent) for agent in np.take(agent_specs, response_indices, axis=0) ] print() for a in agent_specs: print(_get_agent_name(a)) response_strategy_file_names = [] for i, strategy in enumerate(portfolio_strategies): agent_name = agent_names[i] opponent_strategy = opponents[response_indices[i]] opponent_exploitability = exp.evaluate(opponent_strategy) response_exploitability = exp.evaluate(strategy) response_utility_vs_opponent = exp.evaluate( opponent_strategy, strategy) # Save portfolio response strategy response_strategy_output_file_path = '%s/%s-response.strategy' % ( strategies_directory, agent_name) response_strategy_file_names += [ response_strategy_output_file_path.split('/')[-1] ] write_strategy_to_file( strategy, response_strategy_output_file_path, [ 'Opponent exploitability: %s' % opponent_exploitability, 'Response exploitability: %s' % response_exploitability, 'Response value vs opponent: %s' % response_utility_vs_opponent, ]) # Save opponent strategy opponent_strategy_file_name = '%s-opponent.strategy' % agent_name opponent_strategy_output_file_path = '%s/%s' % ( strategies_directory, opponent_strategy_file_name) write_strategy_to_file(opponent_strategy, opponent_strategy_output_file_path) # Generate opponent ACPC script opponent_script_path = '%s/%s.sh' % (strategies_directory, agent_name) shutil.copy(BASE_OPPONENT_SCRIPT_PATH, opponent_script_path) _replace_in_file( opponent_script_path, OPPONENT_SCRIPT_REPLACE_STRINGS, [ WARNING_COMMENT, game_file_path, opponent_strategy_output_file_path.split('/')[-1] ]) for utility_estimation_method in UTILITY_ESTIMATION_METHODS: agent_name_method_name = '' if utility_estimation_method == UTILITY_ESTIMATION_METHODS[ 0] else '-%s' % utility_estimation_method agent_script_path = '%s/%s%s.sh' % ( strategies_directory, portfolio_name, agent_name_method_name) shutil.copy(BASE_AGENT_SCRIPT_PATH, agent_script_path) strategies_replacement = '' for i in range(portfolio_size): strategies_replacement += ' "${SCRIPT_DIR}/%s"' % response_strategy_file_names[ i] if i < (portfolio_size - 1): strategies_replacement += ' \\\n' _replace_in_file(agent_script_path, AGENT_SCRIPT_REPLACE_STRINGS, [ WARNING_COMMENT, game_file_path, '"%s"' % utility_estimation_method, strategies_replacement ])
def run_tournament(self, test_spec): workspace_dir = os.getcwd() game_file_path = workspace_dir + '/' + test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) if game.get_num_players() != 2: raise AttributeError('Only games with 2 players are supported') tournament_name = test_spec['name'] confidence = test_spec['confidence'] max_confidence_interval_half_size = test_spec['max_confidence_interval_half_size'] logs_base_dir = get_new_path('%s/%s/%s-%s+-%s' % ( workspace_dir, FILES_PATH, tournament_name, int(confidence * 100), int(max_confidence_interval_half_size * 1000))) if not os.path.exists(logs_base_dir): os.makedirs(logs_base_dir) row_agents = test_spec['row_agents'] row_num_agents = len(row_agents) row_agent_scripts_paths = [workspace_dir + '/' + agent[2] for agent in row_agents] column_agents = test_spec['column_agents'] column_num_agents = len(column_agents) column_agent_scripts_paths = [workspace_dir + '/' + agent[2] for agent in column_agents] seeds = [] seeds_file_path = '%s/%s/seeds.log' % (workspace_dir, FILES_PATH) if not os.path.exists(seeds_file_path): max_seed = (2**30) - 1 for _ in range(5000): seeds += [random.randint(1, max_seed)] with open(seeds_file_path, 'w') as file: for seed in seeds: file.write(str(seed) + '\n') else: with open(seeds_file_path, 'r') as seeds_file: for seed in seeds_file: seeds += [int(float(seed))] scores_table = [[None for j in range(column_num_agents)] for i in range(row_num_agents)] agent_pairs_evaluated = [] env = os.environ.copy() env['PATH'] = os.path.dirname(sys.executable) + ':' + env['PATH'] for i in range(row_num_agents): for j in range(column_num_agents): row_agent_name = row_agents[i][0] column_agent_name = column_agents[j][0] if row_agent_name == column_agent_name: continue agent_pair_key = tuple(sorted([row_agent_name, column_agent_name])) if agent_pair_key in agent_pairs_evaluated: continue row_agent_script_path = row_agent_scripts_paths[i] column_agent_script_path = column_agent_scripts_paths[j] match_name = '%s-vs-%s' % (row_agent_name, column_agent_name) match_name_reversed = '%s-vs-%s' % (column_agent_name, row_agent_name) match_logs_dir = ('%s/%s' % (logs_base_dir, match_name)).replace('\n', '') print() print('Evaluating %s' % match_name) best_confidence_interval_half_size = float('inf') row_player_mean_utility = -1 run_counter = 0 log_readings = [] while best_confidence_interval_half_size > max_confidence_interval_half_size: run_counter += 1 run_logs_dir = '%s/run_%s' % (match_logs_dir, run_counter) os.makedirs(run_logs_dir) if len(seeds) < run_counter: seeds += [int(datetime.now().timestamp())] seed = seeds[run_counter - 1] normal_order_logs_name = '%s/%s' % (run_logs_dir, match_name) proc = subprocess.Popen( [ MATCH_SCRIPT, normal_order_logs_name, game_file_path, str(NUM_TOURNAMENT_HANDS), str(seed), row_agent_name, row_agent_script_path, column_agent_name, column_agent_script_path], cwd=ACPC_INFRASTRUCTURE_DIR, env=env, stdout=subprocess.PIPE) proc.stdout.readline().decode('utf-8').strip() log_readings += [get_player_utilities_from_log_file(normal_order_logs_name + '.log')] reversed_order_logs_name = '%s/%s' % (run_logs_dir, match_name_reversed) proc = subprocess.Popen( [ MATCH_SCRIPT, reversed_order_logs_name, game_file_path, str(NUM_TOURNAMENT_HANDS), str(seed), column_agent_name, column_agent_script_path, row_agent_name, row_agent_script_path], cwd=ACPC_INFRASTRUCTURE_DIR, env=env, stdout=subprocess.PIPE) proc.stdout.readline().decode('utf-8').strip() log_readings += [get_player_utilities_from_log_file(reversed_order_logs_name + '.log')] data, player_names = get_logs_data(*log_readings) means, interval_half_size, _, _ = calculate_confidence_interval(data, confidence) print('Run %s, current confidence interval half size: %s' % (run_counter, interval_half_size[0])) best_confidence_interval_half_size = interval_half_size[0] row_player_index = player_names.index(row_agent_name) row_player_mean_utility = means[row_player_index] scores_table[i][j] = row_player_mean_utility agent_pairs_evaluated += [agent_pair_key] print() print() scores_copy = copy.deepcopy(scores_table) for i in range(row_num_agents): scores_copy[i] = [row_agents[i][1]] + [None if score is None else score * 1000 for score in scores_copy[i]] column_agent_names = [agent[1] for agent in column_agents] avg_results_table_string = tabulate(scores_copy, headers=column_agent_names, tablefmt='grid') print(avg_results_table_string) confidence_line = 'Confidence interval: %s%% +- %s' % (int(confidence * 100), int(max_confidence_interval_half_size * 1000)) print(confidence_line) with open('%s/results.log' % logs_base_dir, 'w') as file: file.write(avg_results_table_string) file.write('\n') file.write('All utilities in mbb/g\n') file.write(confidence_line) file.write('\n')
def run_evaluation(self, test_spec): print() workspace_dir = os.getcwd() game_file_path = workspace_dir + '/' + test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) if game.get_num_players() != 2: raise AttributeError('Only games with 2 players are supported') agents = test_spec['agents'] num_matches = test_spec['num_matches'] num_match_hands = test_spec['num_match_hands'] if game.get_num_players() != len(agents): raise AttributeError('Wrong number of players') game_name = game_file_path.split('/')[-1][:-len('.game')] test_directory = '%s/%s/test-%s-[%s]-%sx%s' % ( workspace_dir, FILES_PATH, game_name, ';'.join( map(lambda a: a[0], agents)), num_matches, num_match_hands) test_data_directory = '%s/data' % test_directory force_recreate_data = test_spec[ 'force_recreate_data'] if 'force_recreate_data' in test_spec else False data_created = True if not force_recreate_data: if os.path.exists(test_directory): for i in range(num_matches): if not os.path.exists('%s/match_%s' % (test_data_directory, i)): data_created = False break else: data_created = False if not data_created or force_recreate_data: if os.path.exists(test_data_directory): shutil.rmtree(test_data_directory) for i in range(num_matches): match_data_dir = '%s/match_%s' % (test_data_directory, i) if not os.path.exists(match_data_dir): os.makedirs(match_data_dir) seed = int(datetime.now().timestamp()) env = os.environ.copy() env['PATH'] = os.path.dirname( sys.executable) + ':' + env['PATH'] proc = subprocess.Popen([ MATCH_SCRIPT, '%s/normal' % match_data_dir, game_file_path, str(num_match_hands), str(seed), agents[0][0], workspace_dir + '/' + agents[0][1], agents[1][0], workspace_dir + '/' + agents[1][1], ], cwd=ACPC_INFRASTRUCTURE_DIR, env=env, stdout=subprocess.PIPE) proc.stdout.readline().decode('utf-8').strip() proc = subprocess.Popen([ MATCH_SCRIPT, '%s/reversed' % match_data_dir, game_file_path, str(num_match_hands), str(seed), agents[1][0], workspace_dir + '/' + agents[1][1], agents[0][0], workspace_dir + '/' + agents[0][1], ], cwd=ACPC_INFRASTRUCTURE_DIR, env=env, stdout=subprocess.PIPE) proc.stdout.readline().decode('utf-8').strip() print('Data created') log_file_paths = [] for i in range(num_matches): log_file_paths += [ '%s/match_%s/normal.log' % (test_data_directory, i), '%s/match_%s/reversed.log' % (test_data_directory, i), ] agent_strategies = {} for agent in agents: if len(agent) >= 3: strategy, _ = read_strategy_from_file(game_file_path, agent[2]) agent_strategies[agent[0]] = strategy utility_estimators = test_spec['utility_estimators'] output_table = [[None for j in range(3)] for i in range(len(utility_estimators))] for i, utility_estimator_spec in enumerate(utility_estimators): utility_estimator_name = utility_estimator_spec[0] utility_estimator_class = utility_estimator_spec[1] utility_estimator_instance = None if utility_estimator_class is not None: if len(utility_estimator_spec) == 2: utility_estimator_instance = utility_estimator_class( game, False) elif len(utility_estimator_spec) > 2: utility_estimator_args = utility_estimator_spec[2] utility_estimator_instance = utility_estimator_class( game, False, **utility_estimator_args) log_readings = [ get_player_utilities_from_log_file( log_file_path, game_file_path=game_file_path, utility_estimator=utility_estimator_instance, player_strategies=agent_strategies) for log_file_path in log_file_paths ] data, player_names = get_logs_data(*log_readings) player_zero_index = player_names.index(agents[0][0]) output_table[i][0] = utility_estimator_name means = np.mean(data, axis=0) stds = np.std(data, axis=0) output_table[i][1] = means[player_zero_index] output_table[i][2] = stds[player_zero_index] print() print(tabulate(output_table, headers=['mean', 'SD'], tablefmt='grid')) print() print('Total num hands: %s' % data.shape[0])
def test_leduc_cfr_works(self): game = acpc.read_game_file(LEDUC_POKER_GAME_FILE_PATH) cfr = Cfr(game, show_progress=False) cfr.train(5, weight_delay=2)
def test_kuhn_bigdeck_2round_cfr_works(self): game = acpc.read_game_file(KUHN_BIG_DECK_2ROUND_POKER_GAME_FILE_PATH) cfr = Cfr(game, show_progress=False) cfr.train(5, weight_delay=2)
def evaluate_agent(self, test_spec): portfolio_name = test_spec['portfolio_name'] portfolio_directory = '%s/%s' % (PORTFOLIOS_DIRECTORY, portfolio_name) game_file_path = test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) if game.get_num_players() != 2: raise AttributeError('Only games with 2 players are supported') response_strategy_paths = [] opponent_names = [] opponent_script_paths = [] for file in os.listdir(portfolio_directory): if file.endswith('-response.strategy'): response_strategy_paths += [file] elif file.endswith('.sh') and not file.startswith(portfolio_name): opponent_names += [file[:-len('.sh')]] opponent_script_paths += [ '%s/%s' % (portfolio_directory, file) ] portfolio_size = len(response_strategy_paths) logs_dir = '/'.join([GAME_LOGS_DIRECTORY, portfolio_name]) if os.path.exists(logs_dir): shutil.rmtree(logs_dir) os.makedirs(logs_dir) big_blind_size = get_big_blind_size(game) env = os.environ.copy() env['PATH'] = os.path.dirname(sys.executable) + ':' + env['PATH'] print() for i in range(portfolio_size): opponent_name = opponent_names[i] logs_path = '%s/%s' % (logs_dir, opponent_name) proc = subprocess.Popen([ START_DEALER_AND_OPPONENT_SCRIPT_PATH, game_file_path, logs_path, opponent_name, opponent_script_paths[i], portfolio_name ], env=env, stdout=subprocess.PIPE) port_number = proc.stdout.readline().decode('utf-8').strip() client = acpc.Client(game_file_path, '127.0.1.1', port_number) full_response_strategy_paths = [ '%s/%s' % (portfolio_directory, s) for s in response_strategy_paths ] utility_estimator_args = test_spec[ 'utility_estimator_args'] if 'utility_estimator_args' in test_spec else None client.play( ImplicitModellingAgent( game_file_path, full_response_strategy_paths, utility_estimator_class=test_spec[ 'utility_estimator_class'], utility_estimator_args=utility_estimator_args)) scores_line = proc.stdout.readline().decode('utf-8').strip() agent_score = float(scores_line.split(':')[1].split('|')[1]) agent_score_mbb_per_game = (agent_score / NUM_EVAL_HANDS) * big_blind_size print('%s vs %s: %s' % (portfolio_name, opponent_name, agent_score_mbb_per_game))
def optimize_portfolio(game_file_path, opponent_strategies, response_strategies, portfolio_size=-1, portfolio_cut_improvement_threshold=0.05, log=False, output_directory=None): num_opponents = len(opponent_strategies) if portfolio_size == num_opponents or portfolio_cut_improvement_threshold == 0: return response_strategies, range(num_opponents) game = acpc.read_game_file(game_file_path) exp = Exploitability(game) if log: print() utilities = np.zeros([num_opponents, num_opponents]) for i in range(num_opponents): for j in range(num_opponents): utilities[i, j] = exp.evaluate(opponent_strategies[j], response_strategies[i]) portfolio_utilities = np.zeros(num_opponents) response_added = np.ones(num_opponents, dtype=np.intp) * -1 response_total_utility = np.mean(utilities, axis=1) best_response_index = np.argmax(response_total_utility) portfolio_utilities[0] = response_total_utility[best_response_index] response_added[0] = best_response_index max_utilities = np.zeros(num_opponents) np.copyto(max_utilities, utilities[best_response_index]) response_available = [True] * num_opponents response_available[best_response_index] = False for i in range(1, num_opponents): best_portfolio_utility = None best_max_utilities = None best_response_to_add = None for j in range(num_opponents): if response_available[j]: new_max_utilities = np.maximum(max_utilities, utilities[j]) new_portfolio_utility = np.mean(new_max_utilities) if not best_portfolio_utility or new_portfolio_utility > best_portfolio_utility: best_portfolio_utility = new_portfolio_utility best_max_utilities = new_max_utilities best_response_to_add = j response_available[best_response_to_add] = False max_utilities = best_max_utilities portfolio_utilities[i] = best_portfolio_utility response_added[i] = best_response_to_add final_portfolio_size = None if portfolio_size > 0: final_portfolio_size = portfolio_size else: min_portfolio_utility = portfolio_utilities[0] max_portfolio_utility = portfolio_utilities[-1] total_utility_improvement = max_portfolio_utility - min_portfolio_utility minimal_improvement = total_utility_improvement * portfolio_cut_improvement_threshold final_portfolio_size = 1 for i in range(1, num_opponents): if (portfolio_utilities[i] - portfolio_utilities[i - 1]) >= minimal_improvement: final_portfolio_size += 1 else: break if log: print('Utilities table:') for i in range(num_opponents): print('\t'.join([str(u) for u in utilities[i]])) print('Response added: %s' % response_added) print('Final portfolio size: %s' % final_portfolio_size) plt.figure(dpi=160) plt.plot( np.arange(num_opponents, dtype=np.intp) + 1, portfolio_utilities) plt.plot(final_portfolio_size, portfolio_utilities[final_portfolio_size - 1], marker='o', color='r') plt.title('Portfolio utility') plt.xlabel('Portfolio size') plt.ylabel('Portfolio value [mbb/g]') plt.grid() if output_directory: plt.savefig('%s/portoflio_size_utility.png' % output_directory) else: plt.show() portfolio_response_indices = response_added[:final_portfolio_size] return np.take(response_strategies, portfolio_response_indices), portfolio_response_indices
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) exploitability = Exploitability(game) iteration_counts = np.zeros(0) exploitability_values = np.zeros([1, 0]) best_exploitability = float("inf") best_exploitability_strategy = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def checkpoint_callback(game_tree, checkpoint_index, iterations): nonlocal iteration_counts nonlocal exploitability_values nonlocal best_exploitability nonlocal best_exploitability_strategy iteration_counts = np.append(iteration_counts, iterations) if CHECK_STRATEGY_CORRECTNESS: self.assertTrue(is_correct_strategy(game_tree)) exploitability_value = exploitability.evaluate(game_tree) exploitability_values = np.append(exploitability_values, exploitability_value) if COLLECT_MIN_EXPLOITABILITY and exploitability_value < best_exploitability: best_exploitability = exploitability_value copy_strategy(best_exploitability_strategy, game_tree) cfr = Cfr(game) cfr.train(test_spec['training_iterations'], weight_delay=test_spec['weight_delay'], checkpoint_iterations=test_spec['checkpoint_iterations'], checkpoint_callback=checkpoint_callback, minimal_action_probability=0.00006) best_response = BestResponse(game).solve(cfr.game_tree) player_utilities, _ = PlayerUtility(game).evaluate( cfr.game_tree, best_response) print(player_utilities.tolist()) print('Exploitability: %s' % exploitability.evaluate(cfr.game_tree)) if COLLECT_MIN_EXPLOITABILITY: min_exploitability = exploitability.evaluate( best_exploitability_strategy) min_exploitability_best_response = BestResponse(game).solve( best_exploitability_strategy) min_exploitability_player_utilities, _ = PlayerUtility( game).evaluate(best_exploitability_strategy, min_exploitability_best_response) self.assertEqual(min_exploitability, exploitability_values.min()) print('Minimum exploitability: %s' % min_exploitability) print('Minimum exploitability player utilities: %s' % min_exploitability_player_utilities.tolist()) else: print('Minimum exploitability: %s' % exploitability_values.min()) plt.figure(dpi=160) plt.plot(iteration_counts, exploitability_values, linewidth=0.8) plt.title(test_spec['title']) plt.xlabel('Training iterations') plt.ylabel('Strategy exploitability [mbb/g]') plt.grid() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s-st:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations'], test_spec['checkpoint_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) write_strategy_to_file( cfr.game_tree, '%s/%s(it:%s).strategy' % (FIGURES_FOLDER, game_name, test_spec['training_iterations']), [ '# Game utility against best response: %s' % player_utilities.tolist() ])
def train_and_show_results(self, test_spec): game_file_path = test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) base_strategy, _ = read_strategy_from_file( game_file_path, test_spec['base_strategy_path']) agents = test_spec['opponent_tilt_types'] num_agents = len(agents) game_name = game_file_path.split('/')[1][:-5] overwrite_figure = test_spec[ 'overwrite_figure'] if 'overwrite_figure' in test_spec else False figure_path = get_new_path( '%s/%s(it:%s-st:%s)' % (FIGURES_FOLDER, game_name, test_spec['training_iterations'], test_spec['checkpoint_iterations']), '.png', overwrite_figure) create_path_dirs(figure_path) exp = Exploitability(game) checkpoints_count = math.ceil( (test_spec['training_iterations'] - 700) / test_spec['checkpoint_iterations']) iteration_counts = np.zeros(checkpoints_count) exploitability_values = np.zeros([num_agents, checkpoints_count]) vs_opponent_utility_values = np.zeros([num_agents, checkpoints_count]) opponent_exploitability_values = np.zeros(num_agents) for i, agent in enumerate(agents): print('%s/%s' % (i + 1, num_agents)) opponent_strategy = create_agent_strategy_from_trained_strategy( game_file_path, base_strategy, agent[0], agent[1], agent[2]) self.assertTrue(is_correct_strategy(opponent_strategy)) if 'print_opponent_strategies' in test_spec and test_spec[ 'print_opponent_strategies']: write_strategy_to_file( opponent_strategy, '%s/%s.strategy' % (os.path.dirname(figure_path), get_agent_name(agent))) if 'print_best_responses' in test_spec and test_spec[ 'print_best_responses']: opponent_best_response = BestResponse(game).solve( opponent_strategy) write_strategy_to_file( opponent_best_response, '%s/%s-best_response.strategy' % (os.path.dirname(figure_path), get_agent_name(agent))) if PLOT_OPPONENT_EXPLOITABILITY: opponent_exploitability = exp.evaluate(opponent_strategy) opponent_exploitability_values[i] = opponent_exploitability print('%s exploitability: %s' % (get_agent_name(agent), opponent_exploitability)) def checkpoint_callback(game_tree, checkpoint_index, iterations): if i == 0: iteration_counts[checkpoint_index] = iterations self.assertTrue(is_correct_strategy(game_tree)) exploitability_values[i, checkpoint_index] = exp.evaluate( game_tree) vs_opponent_utility_values[i, checkpoint_index] = exp.evaluate( opponent_strategy, game_tree) rnr = RestrictedNashResponse(game, opponent_strategy, agent[3]) rnr.train(test_spec['training_iterations'], checkpoint_iterations=test_spec['checkpoint_iterations'], checkpoint_callback=checkpoint_callback) if 'print_response_strategies' in test_spec and test_spec[ 'print_response_strategies']: write_strategy_to_file( rnr.game_tree, '%s-%s-p=%s.strategy' % (figure_path[:-len('.png')], get_agent_name(agent), agent[3])) print('Vs opponent value: %s' % exp.evaluate(opponent_strategy, rnr.game_tree)) print('Exploitability: %s' % exp.evaluate(rnr.game_tree)) plt.figure(dpi=300) ax = plt.subplot(111) for j in range(i + 1): p = plt.plot(iteration_counts, exploitability_values[j], label='%s-p=%s exploitability' % (get_agent_name(agents[j]), agents[j][3]), linewidth=LINE_WIDTH) plt.plot(iteration_counts, vs_opponent_utility_values[j], '--', label='Utility against opponent strategy', color=p[0].get_color(), linewidth=LINE_WIDTH) if PLOT_OPPONENT_EXPLOITABILITY: plt.plot(iteration_counts, np.ones(checkpoints_count) * opponent_exploitability_values[j], ':', label='Opponent exploitability', color=p[0].get_color(), linewidth=LINE_WIDTH) plt.title(test_spec['title']) plt.xlabel('Training iterations') plt.ylabel('Strategy exploitability [mbb/g]') plt.grid() handles, labels = ax.get_legend_handles_labels() new_handles = [] new_labels = [] for i in range(PLOT_COUNT_PER_AGENT): for j in range(i, len(handles), PLOT_COUNT_PER_AGENT): new_handles += [handles[j]] new_labels += [labels[j]] lgd = plt.legend(new_handles, new_labels, loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=PLOT_COUNT_PER_AGENT) plt.savefig(figure_path, bbox_extra_artists=(lgd, ), bbox_inches='tight') print('Figure written to %s' % figure_path)
def train_and_show_results(self, test_spec): game = acpc.read_game_file(test_spec['game_file_path']) weak_opponent_samples_tree = GameTreeBuilder( game, SamplesTreeNodeProvider()).build_tree() weak_opponent_strategy_tree = GameTreeBuilder( game, StrategyTreeNodeProvider()).build_tree() def on_node(samples_node, strategy_node): if isinstance(samples_node, ActionNode): child_count = len(samples_node.children) samples_count = random.randrange(15) for i, a in enumerate(samples_node.children): if i < (child_count - 1) and samples_count > 0: action_samples_count = random.randrange(samples_count + 1) samples_count -= action_samples_count samples_node.action_decision_counts[ a] = action_samples_count else: samples_node.action_decision_counts[a] = samples_count samples_sum = np.sum(samples_node.action_decision_counts) if samples_sum > 0: strategy_node.strategy = samples_node.action_decision_counts / samples_sum else: for a in strategy_node.children: strategy_node.strategy[a] = 1 / len( strategy_node.children) walk_trees(on_node, weak_opponent_samples_tree, weak_opponent_strategy_tree) self.assertTrue(is_correct_strategy(weak_opponent_strategy_tree)) exploitability = Exploitability(game) num_test_counts = test_spec['test_counts'] data = np.zeros([num_test_counts, 2, len(P_MAX_VALUES)]) for i in range(num_test_counts): print('%s/%s' % (i + 1, num_test_counts)) for j, p_max in enumerate(P_MAX_VALUES): print('Pmax: %s - %s/%s' % (p_max, j + 1, len(P_MAX_VALUES))) dbr = DataBiasedResponse(game, weak_opponent_samples_tree, p_max=p_max) dbr.train(test_spec['training_iterations']) data[i, 0, j] = exploitability.evaluate(dbr.game_tree) data[i, 1, j] = exploitability.evaluate(weak_opponent_strategy_tree, dbr.game_tree) plt.figure(dpi=160) for k in range(i + 1): run_index = math.floor(k / 2) xdata = data[k, 0, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 0, 0:j + 1] ydata = data[k, 1, :] if k < i or j == (len(P_MAX_VALUES) - 1) else data[k, 1, 0:j + 1] plt.plot(xdata, ydata, label='Run %s' % (run_index + 1), marker='o', linewidth=0.8) if 'title' in test_spec: plt.title(test_spec['title']) plt.xlabel('DBR trained strategy exploitability [mbb/g]') plt.ylabel( 'Random opponent exploitation by DBR strategy [mbb/g]') plt.grid() if num_test_counts > 1: plt.legend() game_name = test_spec['game_file_path'].split('/')[1][:-5] figure_output_path = '%s/%s(it:%s).png' % ( FIGURES_FOLDER, game_name, test_spec['training_iterations']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) print('\033[91mThis test needs your assistance! ' + 'Check the generated graph %s!\033[0m' % figure_output_path)
def create_agents_and_plot_exploitabilities(self, test_spec): base_strategy, _ = read_strategy_from_file( test_spec['game_file_path'], test_spec['base_strategy_path']) game = acpc.read_game_file(test_spec['game_file_path']) exploitability = Exploitability(game) plot_equilibrium = test_spec['plot_equilibrium'] if 'plot_equilibrium' in test_spec else True if plot_equilibrium: equilibrium_exploitability = exploitability.evaluate(base_strategy) tilt_probabilities = test_spec['tilt_probabilities'] exploitability_values = np.zeros([len(TILT_TYPES), len(tilt_probabilities)]) plot_exploitabilities = test_spec['plot_exploitabilities'] if 'plot_exploitabilities' in test_spec else True if plot_exploitabilities: for i, tilt_type in enumerate(TILT_TYPES): for j, tilt_probability in enumerate(tilt_probabilities): tilted_agent = create_agent_strategy_from_trained_strategy( test_spec['game_file_path'], base_strategy, tilt_type[1], tilt_type[2], tilt_probability) exploitability_values[i, j] = exploitability.evaluate(tilted_agent) plt.figure(dpi=160) for j in range(i + 1): plt.plot( tilt_probabilities, exploitability_values[j], label=TILT_TYPES[j][0], linewidth=0.8) if plot_equilibrium: plt.plot( tilt_probabilities, [equilibrium_exploitability] * len(tilt_probabilities), 'r--', label='Equilibrium', linewidth=1.5) # plt.title(test_spec['title']) plt.xlabel('Tilt amount') plt.ylabel('Agent exploitability [mbb/g]') plt.grid() plt.legend() figure_output_path = '%s/%s.png' % (FIGURES_FOLDER, test_spec['figure_filename']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path) plot_agent_comparison = test_spec['plot_agent_comparison'] if 'plot_agent_comparison' in test_spec else False if plot_agent_comparison: agents_strategies = [] agent_names = [] for i, tilt_type in enumerate(TILT_TYPES): for j, tilt_probability in enumerate(tilt_probabilities): agent_names += ['%s %s %s' % (str(tilt_type[1]).split('.')[1], str(tilt_type[2]).split('.')[1], tilt_probability)] agents_strategies += [create_agent_strategy_from_trained_strategy( test_spec['game_file_path'], base_strategy, tilt_type[1], tilt_type[2], tilt_probability)] num_agents = len(agent_names) scores_table = np.zeros([num_agents, num_agents]) num_comparisons = 0 for i in range(num_agents): for j in range(i, num_agents): num_comparisons += 1 with tqdm(total=num_comparisons) as pbar: for i in range(num_agents): for j in range(i, num_agents): scores_table[i, j] = exploitability.evaluate(agents_strategies[j], agents_strategies[i]) scores_table[j, i] = -scores_table[i, j] pbar.update(1) max_score = scores_table.max() min_score = scores_table.min() # plt.figure(dpi=160) fig, ax = plt.subplots() cax = plt.imshow(scores_table, cmap=plt.cm.RdYlGn) plt.xticks(np.arange(num_agents), agent_names) plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right", rotation_mode="anchor") plt.yticks(np.arange(num_agents), agent_names) # plt.yticks(rotation=35) # plt.tick_params( # axis='x', # which='both', # bottom=False, # top=False, # labelbottom=False) cbar = fig.colorbar(cax, ticks=[min_score, 0, max_score]) cbar.ax.set_yticklabels([round(min_score), '0', round(max_score)]) plt.tight_layout() plt.gcf().subplots_adjust(left=0.1) figure_output_path = '%s/%s-comparison.png' % (FIGURES_FOLDER, test_spec['figure_filename']) figures_directory = os.path.dirname(figure_output_path) if not os.path.exists(figures_directory): os.makedirs(figures_directory) plt.savefig(figure_output_path, dpi=160)
strategy_file_lines_sorted = sorted(strategy_file_lines) progress.update(1) except NameError: strategy_file_lines_sorted = sorted(strategy_file_lines) strategy_file_lines_sorted = ['# Training iterations: %s\n' % iterations ] + strategy_file_lines_sorted try: with tqdm(total=1) as progress: progress.set_description('Writing strategy file') _write_to_output_file(output_path, strategy_file_lines_sorted) progress.update(1) except NameError: _write_to_output_file(output_path, strategy_file_lines_sorted) if __name__ == "__main__": if len(sys.argv) < 4: print("Usage {game_file_path} {iterations} {strategy_output_path}") sys.exit(1) iterations = int(sys.argv[2]) output_path = sys.argv[3] game = acpc.read_game_file(sys.argv[1]) cfr = Cfr(game) cfr.train(iterations) _write_strategy(cfr.game_tree, iterations, output_path)
def run_evaluation(self, test_spec): print() workspace_dir = os.getcwd() game_file_path = workspace_dir + '/' + test_spec['game_file_path'] game = acpc.read_game_file(game_file_path) if game.get_num_players() != 2: raise AttributeError('Only games with 2 players are supported') test_name = test_spec['test_name'] base_agent = test_spec['base_agent'] validation_agents = test_spec['validation_agents'] num_matches = test_spec['num_matches'] num_match_hands = test_spec['num_match_hands'] game_name = game_file_path.split('/')[-1][:-len('.game')] validation_agent_names = [ _get_agent_name(agent) for agent in validation_agents ] test_directory = '%s/%s/%s' % (workspace_dir, FILES_PATH, test_name) agents_data_directories = [] for validation_agent in validation_agents: agent_data_dir = '%s/%s-[%s;%s]-%sx%s' % ( test_directory, game_name, base_agent[0], _get_agent_name(validation_agent), num_matches, num_match_hands) agents_data_directories += [agent_data_dir] force_recreate_data = test_spec[ 'force_recreate_data'] if 'force_recreate_data' in test_spec else False base_validation_agent_strategy = None validation_agent_strategies = [] for x in range(len(validation_agents)): agent_data_directory = agents_data_directories[x] validation_agent = validation_agents[x] data_created = True if not force_recreate_data: if os.path.exists(agent_data_directory): for i in range(num_matches): match_dir = '%s/match_%s' % (agent_data_directory, i) if not os.path.exists(match_dir) or len( os.listdir(match_dir)) == 0: data_created = False break else: data_created = False if base_validation_agent_strategy is None: base_validation_agent_strategy, _ = read_strategy_from_file( game_file_path, test_spec['base_validation_agents_strategy_path']) validation_agent_strategy = create_agent_strategy_from_trained_strategy( game_file_path, base_validation_agent_strategy, validation_agent[0], validation_agent[1], validation_agent[2]) validation_agent_strategies += [validation_agent_strategy] if not data_created or force_recreate_data: if os.path.exists(agent_data_directory): shutil.rmtree(agent_data_directory) validation_agent_strategy_path = '%s/%s.strategy' % ( test_directory, _get_agent_name(validation_agent)) write_strategy_to_file(validation_agent_strategy, validation_agent_strategy_path) for i in range(num_matches): match_data_dir = '%s/match_%s' % (agent_data_directory, i) if not os.path.exists(match_data_dir): os.makedirs(match_data_dir) seed = int(datetime.now().timestamp()) env = os.environ.copy() env['PATH'] = os.path.dirname( sys.executable) + ':' + env['PATH'] proc = subprocess.Popen([ MATCH_SCRIPT, '%s/normal' % match_data_dir, game_file_path, str(num_match_hands), str(seed), base_agent[0], _get_agent_name(validation_agent), ], cwd=ACPC_INFRASTRUCTURE_DIR, env=env, stdout=subprocess.PIPE) ports_string = proc.stdout.readline().decode( 'utf-8').strip() ports = ports_string.split(' ') args = [ (game_file_path, ports[0], base_agent[1]), (game_file_path, ports[1], validation_agent_strategy_path), ] with multiprocessing.Pool(2) as p: p.map(_run_agent, args) proc = subprocess.Popen([ MATCH_SCRIPT, '%s/reversed' % match_data_dir, game_file_path, str(num_match_hands), str(seed), _get_agent_name(validation_agent), base_agent[0], ], cwd=ACPC_INFRASTRUCTURE_DIR, env=env, stdout=subprocess.PIPE) ports_string = proc.stdout.readline().decode( 'utf-8').strip() ports = ports_string.split(' ') args = [ (game_file_path, ports[0], validation_agent_strategy_path), (game_file_path, ports[1], base_agent[1]), ] with multiprocessing.Pool(2) as p: p.map(_run_agent, args) print('Data created') output = [] def prin(string=''): nonlocal output output += [string] print(string) utility_estimators = test_spec['utility_estimators'] agents_log_files_paths = [] for x in range(len(validation_agents)): agents_data_directory = agents_data_directories[x] log_file_paths = [] for i in range(num_matches): log_file_paths += [ '%s/match_%s/normal.log' % (agents_data_directory, i), '%s/match_%s/reversed.log' % (agents_data_directory, i), ] agents_log_files_paths += [log_file_paths] agent_strategies = {} for i in range(len(validation_agents)): agent_strategies[ validation_agent_names[i]] = validation_agent_strategies[i] prin( 'Cell contains utility of row player based on observation of column player' ) for utility_estimator_spec in utility_estimators: utility_estimator_name = utility_estimator_spec[0] utility_estimator_class = utility_estimator_spec[1] utility_estimator_instance = None if utility_estimator_class is not None: if len(utility_estimator_spec) == 2: utility_estimator_instance = utility_estimator_class( game, False) elif len(utility_estimator_spec) > 2: utility_estimator_args = utility_estimator_spec[2] utility_estimator_instance = utility_estimator_class( game, False, **utility_estimator_args) prin() prin('%s (mean | SD)' % utility_estimator_name) output_table = [[None for j in range(len(validation_agents) + 1)] for i in range(len(validation_agents))] for i in range(len(validation_agents)): output_table[i][0] = validation_agent_names[i] for x in range(len(validation_agents)): log_readings = [ get_player_utilities_from_log_file( log_file_path, game_file_path=game_file_path, utility_estimator=utility_estimator_instance, player_strategies=agent_strategies, evaluated_strategies=validation_agent_strategies) for log_file_path in agents_log_files_paths[x] ] data, player_names = get_logs_data(*log_readings) means = np.mean(data, axis=0) stds = np.std(data, axis=0) player_index = player_names.index(validation_agent_names[x]) for y in range(len(validation_agents)): output_table[y][x + 1] = '%s | %s' % (means[player_index][y], stds[player_index][y]) prin( tabulate(output_table, headers=validation_agent_names, tablefmt='grid')) prin() prin('Total num hands: %s' % data.shape[0]) output_log_path = get_new_path( '%s/output-%sx%s' % (test_directory, num_matches, num_match_hands), '.log') with open(output_log_path, 'w') as file: for line in output: file.write(line + '\n')