def get_hidden_states_bucket(player_perspective, fails): global perspective_fails_to_bucket global bucket_to_hidden_states key = (player_perspective, tuple(fails)) if key in perspective_fails_to_bucket: return perspective_fails_to_bucket[key] print "Calculating big lookup table..." all_hidden_states = possible_hidden_states( ['merlin', 'minion', 'servant', 'assassin'], 5) possible_beliefs_to_key = defaultdict(lambda: []) for roles in all_hidden_states: beliefs = [ starting_hidden_states(player, roles, all_hidden_states) for player in range(5) ] perspectives = [ get_python_perspective(roles, player) for player in range(5) ] for perspective, b in zip(perspectives, beliefs): possible_beliefs_to_key[frozenset(b)].append((perspective, ())) for p1size in [2, 3]: for p1 in itertools.combinations(range(5), p1size): num_bad = len([p for p in p1 if roles[p] in EVIL_ROLES]) if num_bad == 0: continue for num_fail in range(1, num_bad + 1): new_beliefs = [ filter_hidden_states(b, p1, num_fail) for b in beliefs ] for perspective, b in zip(perspectives, new_beliefs): possible_beliefs_to_key[frozenset(b)].append( (perspective, ((p1, num_fail), ))) for p2size in [2, 3]: for p2 in itertools.combinations(range(5), p2size): num_bad2 = len( [p for p in p2 if roles[p] in EVIL_ROLES]) if num_bad2 == 0: continue for num_fail2 in range(1, num_bad2 + 1): for perspective, b in zip( perspectives, new_beliefs): b_prime = filter_hidden_states( b, p2, num_fail2) possible_beliefs_to_key[frozenset( b_prime)].append( (perspective, ((p1, num_fail), (p2, num_fail2)))) for i, (hidden_states, keys) in enumerate(possible_beliefs_to_key.iteritems()): assert len(bucket_to_hidden_states) == i bucket_to_hidden_states.append(list(hidden_states)) for key in keys: perspective_fails_to_bucket[key] = i return get_hidden_states_bucket(player_perspective, fails)
def process_game(root, game): try: hidden_state = reconstruct_hidden_state(game) if len(hidden_state) >= 7: return print game['id'] possible = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) perspectives = [ perspective_from_hidden_states( starting_hidden_states(player, hidden_state, possible)) for player, _ in enumerate(hidden_state) ] tree_roots = [ root.setdefault((player, perspective), { 'move_counts': {}, 'transitions': {} }) for player, perspective in enumerate(perspectives) ] state = AvalonState.start_state(len(hidden_state)) for round_ in game['log']: tree_roots, state = handle_round(tree_roots, state, hidden_state, round_) except AssertionError: print game['id'], 'is bad'
def get_starting_belief(hidden_state, player, hidden_state_to_index): belief = np.zeros(len(hidden_state_to_index)) for hidden_state in starting_hidden_states(player, hidden_state, hidden_state_to_index.keys()): belief[hidden_state_to_index[hidden_state]] += 1 return belief / np.sum(belief)
def get_player_perspectives_for_hidden_state(all_hidden, hidden_state): global __hidden_state_to_perspectives if hidden_state not in __hidden_state_to_perspectives: __hidden_state_to_perspectives[hidden_state] = [ starting_hidden_states(player, hidden_state, all_hidden) for player, _ in enumerate(hidden_state) ] return __hidden_state_to_perspectives[hidden_state]
def run_large_tournament(bots_classes, roles, games_per_matching=50): print "Running {}".format(' '.join(map(lambda c: c.__name__, bots_classes))) start_state = AvalonState.start_state(len(roles)) result = [] all_hidden_states = possible_hidden_states(set(roles), num_players=len(roles)) seen_hidden_states = set([]) for hidden_state in itertools.permutations(roles): if hidden_state in seen_hidden_states: continue seen_hidden_states.add(hidden_state) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(hidden_state)) ] seen_bot_orders = set([]) for bot_order in itertools.permutations(bots_classes): bot_order_str = tuple([bot_cls.__name__ for bot_cls in bot_order]) if bot_order_str in seen_bot_orders: continue seen_bot_orders.add(bot_order_str) for _ in range(games_per_matching): bots = [ bot_cls.create_and_reset(start_state, player, role, beliefs[player]) for player, ( bot_cls, role) in enumerate(zip(bot_order, hidden_state)) ] values, game_end = run_game(start_state, hidden_state, bots) game_stat = { 'winner': game_end[0], 'win_type': game_end[1], } for player, (bot_cls, role) in enumerate(zip(bot_order, hidden_state)): game_stat['bot_{}'.format(player)] = bot_cls.__name__ game_stat['bot_{}_role'.format(player)] = role game_stat['bot_{}_payoff'.format(player)] = values[player] result.append(game_stat) df = pd.DataFrame(result, columns=sorted(result[0].keys())) df['winner'] = df['winner'].astype('category') df['win_type'] = df['win_type'].astype('category') for player in range(len(roles)): df['bot_{}'.format(player)] = df['bot_{}'.format(player)].astype( 'category') df['bot_{}_role'.format(player)] = df['bot_{}_role'.format( player)].astype('category') return df
def run_learning_tournament(bot_classes, winrate_track=None, winrate_window=10000000): bots = [(bot_class(), bot_class.__name__, num == winrate_track) for num, bot_class in enumerate(bot_classes)] hidden_state = ['merlin', 'servant', 'servant', 'assassin', 'minion'] all_hidden_states = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) beliefs_for_hidden_state = {} start_state = AvalonState.start_state(len(hidden_state)) wins = [] game_num = 0 while True: game_num += 1 random.shuffle(hidden_state) random.shuffle(bots) bot_ids = [bot_name for _, bot_name, _ in bots] if tuple(hidden_state) not in beliefs_for_hidden_state: beliefs_for_hidden_state[tuple(hidden_state)] = [ starting_hidden_states(player, tuple(hidden_state), all_hidden_states) for player in range(len(hidden_state)) ] beliefs = beliefs_for_hidden_state[tuple(hidden_state)] track_num = None bot_objs = [] for i, (bot, bot_name, track) in enumerate(bots): if track: track_num = i bot.reset(start_state, i, hidden_state[i], beliefs[i]) bot.set_bot_ids(bot_ids) bot_objs.append(bot) results, _ = run_game(start_state, tuple(hidden_state), bot_objs) for i, (bot, bot_name, track) in enumerate(bots): bot.show_roles(hidden_state, bot_ids) if track_num is not None: wins.append(int(results[track_num] > 0)) if len(wins) > winrate_window: wins.pop(0) if game_num % 10 == 0 and winrate_track is not None: print "Winrate: {}%".format(100 * float(sum(wins)) / len(wins))
def run_simple_tournament(config, num_games=1000, granularity=100): tournament_statistics = { 'bots': [{ 'bot': bot['bot'].__name__, 'role': bot['role'], 'wins': 0, 'total': 0, 'win_percent': 0, 'payoff': 0.0 } for bot in config], 'end_types': {} } hidden_state = tuple([bot['role'] for bot in config]) all_hidden_states = possible_hidden_states(set(hidden_state), num_players=len(config)) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(config)) ] pool = multiprocessing.Pool(4) results = [] for i in range(num_games): results.append( pool.apply_async(run_game_and_create_bots, (hidden_state, beliefs, config))) for i, result in enumerate(results): if i % granularity == 0: print "Waiting for game {}".format(i) payoffs, end_type = result.get() tournament_statistics['end_types'][ end_type] = 1 + tournament_statistics['end_types'].get( end_type, 0) for b, payoff in zip(tournament_statistics['bots'], payoffs): b['wins'] += 1 if payoff > 0.0 else 0 b['payoff'] += payoff b['total'] += 1 for b in tournament_statistics['bots']: b['win_percent'] = float(b['wins']) / float(b['total']) pool.close() pool.join() return tournament_statistics
def run_single_threaded_tournament(config, num_games=1000, granularity=100): tournament_statistics = { 'bots': [{ 'bot': bot['bot'].__name__, 'role': bot['role'], 'wins': 0, 'total': 0, 'win_percent': 0, 'payoff': 0.0 } for bot in config], 'end_types': {} } hidden_state = tuple([bot['role'] for bot in config]) all_hidden_states = possible_hidden_states(set(hidden_state), num_players=len(config)) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(config)) ] start_state = AvalonState.start_state(len(hidden_state)) bots = [bot['bot']() for bot in config] # pool = multiprocessing.Pool() results = [] for i in range(num_games): if i % granularity == 0: print i for player, (bot, c) in enumerate(zip(bots, config)): bot.reset(start_state, player, c['role'], beliefs[player]) payoffs, end_type = run_game(start_state, hidden_state, bots) tournament_statistics['end_types'][ end_type] = 1 + tournament_statistics['end_types'].get( end_type, 0) for b, payoff in zip(tournament_statistics['bots'], payoffs): b['wins'] += 1 if payoff > 0.0 else 0 b['payoff'] += payoff b['total'] += 1 for b in tournament_statistics['bots']: b['win_percent'] = float(b['wins']) / float(b['total']) return tournament_statistics
def run_large_tournament_parallel(pool, bots_classes, roles, games_per_matching=50): print "Running {}".format(' '.join(map(lambda c: c.__name__, bots_classes))) start_state = AvalonState.start_state(len(roles)) async_results = [] all_hidden_states = possible_hidden_states(set(roles), num_players=len(roles)) seen_hidden_states = set([]) for hidden_state in itertools.permutations(roles): if hidden_state in seen_hidden_states: continue seen_hidden_states.add(hidden_state) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(hidden_state)) ] seen_bot_orders = set([]) for bot_order in itertools.permutations(bots_classes): bot_order_str = tuple([bot_cls.__name__ for bot_cls in bot_order]) if bot_order_str in seen_bot_orders: continue seen_bot_orders.add(bot_order_str) for _ in range(games_per_matching): async_result = pool.apply_async( large_tournament_parallel_helper, (bot_order, hidden_state, beliefs, start_state)) async_results.append(async_result) result = [async_result.get() for async_result in async_results] df = pd.DataFrame(result, columns=sorted(result[0].keys())) df['winner'] = df['winner'].astype('category') df['win_type'] = df['win_type'].astype('category') for player in range(len(roles)): df['bot_{}'.format(player)] = df['bot_{}'.format(player)].astype( 'category') df['bot_{}_role'.format(player)] = df['bot_{}_role'.format( player)].astype('category') return df
def process_game(game, bot_class, stats, verbose=True, num_players=None, max_num_players=7, min_game_id=0, max_game_id=50000, roles=None): try: hidden_state = reconstruct_hidden_state(game) if num_players is not None: if len(hidden_state) != num_players: return else: if len(hidden_state) >= max_num_players: return if game['id'] >= max_game_id or game['id'] < min_game_id: return if roles is not None: if not all(role in roles for role in hidden_state): return if verbose: print game['id'] print hidden_state possible = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) perspectives = [ starting_hidden_states(player, hidden_state, possible) for player, _ in enumerate(hidden_state) ] state = AvalonState.start_state(len(hidden_state)) bots = [ bot_class.create_and_reset(state, player, role, perspectives[player]) for player, role in enumerate(hidden_state) ] for round_ in game['log']: state = handle_round(game, state, hidden_state, bots, round_, stats) except AssertionError: if verbose: print game['id'], 'is bad'
def get_bot_merlin_prediction(bot_class, game): hidden_state = reconstruct_hidden_state(game) state = AvalonState.start_state(len(hidden_state)) possible = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) perspectives = [ starting_hidden_states(player, hidden_state, possible) for player, _ in enumerate(hidden_state) ] assassin_player = hidden_state.index('assassin') assassin_perspective = perspectives[assassin_player] assassin_bot = bot_class.create_and_reset(state, assassin_player, 'assassin', assassin_perspective) for round_ in game['log']: state = handle_round(game, state, hidden_state, assassin_bot, assassin_player, round_) final_round = game['log'][-1] assert 'findMerlin' in final_round find_merlin = round_['findMerlin'] assert find_merlin['assassin'] == assassin_player legal_moves = state.legal_actions(assassin_player, hidden_state) move_probs = assassin_bot.get_move_probabilities(state, legal_moves) return { 'human_guess': find_merlin['merlin_guess'], 'bot_human_prob': move_probs[find_merlin['merlin_guess']], 'correct_guess': hidden_state.index('merlin'), 'bot_correct_prob': move_probs[hidden_state.index('merlin')], 'top_pick': np.argmax(move_probs), 'top_pick_prob': np.max(move_probs), 'game': game['id'], 'merlin': game['players'][hidden_state.index('merlin')]['player_id'], 'assassin': game['players'][hidden_state.index('assassin')]['player_id'] }
def calculate_observation_ll(hidden_state, bot_classes, observation_history, tremble=0.0): all_hidden_states = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(hidden_state)) ] state = AvalonState.start_state(len(hidden_state)) bots = [ bot() for bot in bot_classes ] for i, bot in enumerate(bots): bot.reset(state, i, hidden_state[i], beliefs[i]) log_likelihood = 0.0 for obs_type, obs in observation_history: assert obs_type == state.status, "Incorrect matchup {} != {}".format(obs_type, state.status) moving_players = state.moving_players() moves = [] if obs_type == 'propose': player = moving_players[0] legal_actions = state.legal_actions(player, hidden_state) move = ProposeAction(proposal=obs) index = legal_actions.index(move) moves.append(move) move_probs = bots[player].get_move_probabilities(state, legal_actions) move_probs = (1.0 - tremble) * move_probs + tremble * (np.ones(len(legal_actions))/len(legal_actions)) log_likelihood += np.log(move_probs[index]) elif obs_type == 'vote': for p, vote_up in zip(moving_players, obs): legal_actions = state.legal_actions(p, hidden_state) move = VoteAction(up=vote_up) index = legal_actions.index(move) moves.append(move) move_probs = bots[p].get_move_probabilities(state, legal_actions) move_probs = (1.0 - tremble) * move_probs + tremble * (np.ones(len(legal_actions))/len(legal_actions)) log_likelihood += np.log(move_probs[index]) elif obs_type == 'run': bad_guys_on_mission = [p for p in state.proposal if hidden_state[p] in EVIL_ROLES ] if len(bad_guys_on_mission) < obs: # Impossible - fewer bad than failed return np.log(0.0) player_fail_probability = {} for bad in bad_guys_on_mission: legal_actions = state.legal_actions(bad, hidden_state) move = MissionAction(fail=True) index = legal_actions.index(move) move_probs = bots[bad].get_move_probabilities(state, legal_actions) move_probs = (1.0 - tremble) * move_probs + tremble * (np.ones(len(legal_actions))/len(legal_actions)) player_fail_probability[bad] = move_probs[index] failure_prob = 0.0 moves = [ MissionAction(fail=False) ] * len(state.proposal) for bad_failers in itertools.combinations(bad_guys_on_mission, r=obs): specific_fail_prob = 1.0 for bad in bad_guys_on_mission: moves[state.proposal.index(bad)] = MissionAction(fail=True) if bad in bad_failers else MissionAction(fail=False) specific_fail_prob *= player_fail_probability[bad] if bad in bad_failers else (1.0 - player_fail_probability[bad]) failure_prob += specific_fail_prob log_likelihood += np.log(failure_prob) new_state, _, observation = state.transition(moves, hidden_state) for player, bot in enumerate(bots): if player in moving_players: move = moves[moving_players.index(player)] else: move = None bot.handle_transition(state, new_state, observation, move=move) state = new_state return log_likelihood
def process_game(game): try: hidden_state = reconstruct_hidden_state(game) if len(hidden_state) != 5: return print game['id'] possible = possible_hidden_states(set(hidden_state), num_players=len(hidden_state)) perspectives = [ starting_hidden_states(player, hidden_state, possible) for player, _ in enumerate(hidden_state) ] perceptions = [ create_perception(perspective) for perspective in perspectives ] vote_inputs = [[] for _ in hidden_state] propose_inputs = [[] for _ in hidden_state] for round_ in game['log']: last_proposal_dict = None last_proposal = np.zeros(5) last_votes = np.zeros(5) for proposal_num in ['1', '2', '3', '4', '5']: if proposal_num not in round_: break proposal_dict = last_proposal_dict = round_[proposal_num] proposer = proposal_dict['proposer'] for player, perception in enumerate(perceptions): propose_inputs[player].append( np.concatenate([ perception.flat, onehot(proposer), last_proposal, last_votes ])) proposal = onehot(proposal_dict['team']) PROPOSE_INPUTS.append(propose_inputs[proposer][:]) PROPOSE_OUTPUTS.append(proposal) votes = np.array([ -1.0 if vote == 'Reject' else 1.0 for vote in proposal_dict['votes'] ]) for player, vote in enumerate(proposal_dict['votes']): vote_inputs[player].append( np.concatenate([ perceptions[player].flat, onehot(player), proposal, np.zeros(5) ])) VOTE_INPUTS.append(vote_inputs[player][:]) VOTE_OUTPUTS.append(1.0 if vote == 'Approve' else 0.0) vote_inputs[player][-1] = np.concatenate([ perceptions[player].flat, onehot(player), proposal, votes ]) last_proposal = proposal last_votes = votes num_fails = len([ mission_vote for mission_vote in round_['mission'] if mission_vote == 'Fail' ]) for player, perspective in enumerate(perspectives): perspectives[player] = filter_hidden_states( perspectives[player], set(last_proposal_dict['team']), num_fails) perceptions[player] = create_perception(perspectives[player]) except AssertionError: print game['id'], 'is bad'
def run_and_print_game(config): bot_counts = {} bot_names = [] for bot in config: base_name = bot['bot'].__name__ bot_counts[base_name] = bot_counts.get(base_name, 0) + 1 bot_names.append("{}_{}".format(base_name, bot_counts[base_name])) print " Role | Bot | Evil " print "----------------------------------------------" for name, bconf in zip(bot_names, config): print "{: >11} | {: >24} | {: >4}".format( bconf['role'], name, 'Yes' if bconf['role'] in EVIL_ROLES else '') hidden_state = tuple([bot['role'] for bot in config]) all_hidden_states = possible_hidden_states(set(hidden_state), num_players=len(config)) beliefs = [ starting_hidden_states(player, hidden_state, all_hidden_states) for player in range(len(config)) ] state = AvalonState.start_state(len(hidden_state)) bots = [bot['bot']() for bot in config] for i, bot in enumerate(bots): bot.reset(state, i, hidden_state[i], beliefs[i]) print "=============== Round 1 ================" while not state.is_terminal(): moving_players = state.moving_players() moves = [ bots[player].get_action(state, state.legal_actions(player, hidden_state)) for player in moving_players ] if state.status == 'propose': player = moving_players[0] legal_actions = state.legal_actions(player, hidden_state) move_probs = bots[player].get_move_probabilities( state, legal_actions) move_prob = move_probs[legal_actions.index(moves[0])] print "Proposal #{}. {} proposes ({:0.2f}):".format( state.propose_count + 1, bot_names[moving_players[0]], move_prob) for player in moves[0].proposal: print " - {}".format(bot_names[player]) elif state.status == 'vote': for player, move in zip(moving_players, moves): legal_actions = state.legal_actions(player, hidden_state) move_probs = bots[player].get_move_probabilities( state, legal_actions) move_prob = move_probs[legal_actions.index(move)] print "{: >24} votes {: <4} ({:0.2f})".format( bot_names[player], 'UP' if move.up else 'DOWN', move_prob) elif state.status == 'run': print "--- Mission results ---" for player, move in zip(moving_players, moves): legal_actions = state.legal_actions(player, hidden_state) move_probs = bots[player].get_move_probabilities( state, legal_actions) move_prob = move_probs[legal_actions.index(move)] print "{: >24}: {} ({:0.2f})".format( bot_names[player], 'FAIL' if move.fail else 'SUCCEED', move_prob) elif state.status == 'merlin': print "===== Final chance: pick merlin! =====" assassin = hidden_state.index('assassin') legal_actions = state.legal_actions(assassin, hidden_state) move_probs = bots[assassin].get_move_probabilities( state, legal_actions) move_prob = move_probs[legal_actions.index(moves[0])] assassin_pick = moves[assassin].merlin print '{} picked {} - {}! ({:0.2f})'.format( bot_names[assassin], bot_names[assassin_pick], 'CORRECT' if assassin_pick == hidden_state.index('merlin') else 'WRONG', move_prob) new_state, _, observation = state.transition(moves, hidden_state) for player, bot in enumerate(bots): if player in moving_players: move = moves[moving_players.index(player)] else: move = None bot.handle_transition(state, new_state, observation, move=move) if state.status == 'run' and new_state.status == 'propose': print "=============== Round {} ================".format( new_state.succeeds + new_state.fails + 1) state = new_state print state.game_end