示例#1
0
def test_after_game_players_have_no_cards():
    p1 = Player("bob")
    p2 = Player("sharon")
    game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=1)
    game.play_round()
    assert len(p1.hand) == 0
    assert len(p2.hand) == 0
示例#2
0
def test_after_turn_hands_exchange_two_player():
    p1 = Player("bob")
    p2 = Player("sharon")
    game = Game(deck=StandardDeck(), agents=[p1, p2], cards_per_player=10)
    p1_hand_before, p2_hand_before = p1.hand, p2.hand
    game.play_turn()
    assert len(p1.hand) == 9
    assert len(p2.hand) == 9
    assert all([(_ in p2_hand_before) for _ in p1.hand])
    assert all([(_ in p1_hand_before) for _ in p2.hand])
示例#3
0
def simple_egg_score_test():
    p1 = Player("bob")
    p2 = Player("sharon")
    d = Deck.create([NigiriCard('egg')], [1000])
    g = Game(deck=d, agents=[p1, p2], cards_per_player=10)
    g.play_round()
    g.play_round()
    print(g.gamelog)
    assert g.gamelog.shape[0] == 42
    assert g.gamelog['reward'][2] == 0.0
    assert g.gamelog['reward'][3] == 0.0
示例#4
0
def test_certain_cards_carry_rewards_at_end_of_round():
    p1 = Player("bob")
    p2 = Player("sharon")
    # create a deck with no cards that are worth points during a round
    d = Deck.create([MakiCard(3)], [100])
    g = Game(deck=d, agents=[p1, p2], cards_per_player=10)
    g.play_round()
    g.play_round()
    print(g.gamelog)
    assert g.gamelog.shape[0] == 42
    assert g.gamelog['reward'][0] == 0.0
    assert g.gamelog['reward'][1] == 0.0
示例#5
0
def test_after_turn_hands_exchange_three_player():
    p1 = Player("bob")
    p2 = Player("sharon")
    p3 = Player("alice")
    game = Game(deck=StandardDeck(), agents=[p1, p2, p3], cards_per_player=8)
    p1_hand_before, p2_hand_before, p3_hand_before = p1.hand, p2.hand, p3.hand
    game.play_turn()
    assert len(p1.hand) == 7
    assert len(p2.hand) == 7
    assert len(p3.hand) == 7
    assert all([(_ in p3_hand_before) for _ in p1.hand])
    assert all([(_ in p2_hand_before) for _ in p3.hand])
    assert all([(_ in p1_hand_before) for _ in p2.hand])
示例#6
0
class Environment():
    def __init__(self, name, opponents):
        self.name = name
        self.user_agent = "foo"
        self.game = Game(agents=[self.user_agent] + opponents)

    def reset(self):
        self.game.reset_game()
        return self.game.get_observation("env-player")

    def action_space(self):
        return self.game.get_action_space("env-player")

    def step(self, action):
示例#7
0
def test_interal_game_reset_functionality():
    p1 = Player("bob")
    p2 = Player("sharon")
    game = Game(deck=StandardDeck(),
                agents=[p1, p2],
                n_rounds=2,
                cards_per_player=10)
    game.play_round()
    assert game.turn == 11
    game.play_round()
    assert game.turn == 21
    game.reset_game()
    assert game.turn == 0
    assert len(p1.hand) == 10
    assert len(p2.hand) == 10


# def test_simulated_games_should_be_distinct():
#     p1 = Player("bob")
#     p2 = Player("sharon")
#     game = Game(deck_constructor=StandardDeck, agents=[p1, p2], n_rounds=2, cards_per_player=10)
#     result1 = game.simulate_game()
#     result2 = game.simulate_game()
#     result3 = game.simulate_game()
#     result4 = game.simulate_game()
#     result5 = game.simulate_game()
#     print(result5[['action', 'player', 'round', 'reward']])
#     assert result5['bob'] < 100
#     assert result5['sharon'] < 100
示例#8
0
def test_simple_one_winner_one_round():
    p1 = Player("bob")
    p2 = Player("sharon")
    d = Deck.create([NigiriCard('egg'), NigiriCard('salmon')], [19, 1])
    g = Game(deck=d, agents=[p1, p2], cards_per_player=10, n_rounds=1)
    g.play_round()
    bob_log = g.gamelog[g.gamelog['player'] == 'bob']
    bob_final_reward = bob_log['reward'].iloc[-1]
    sharon_log = g.gamelog[g.gamelog['player'] == 'sharon']
    sharon_final_reward = sharon_log['reward'].iloc[-1]
    print(g.gamelog)
    assert g.gamelog.shape[0] == 22
    assert (bob_final_reward == 10.) or (bob_final_reward == 11.)
    assert (sharon_final_reward == 10.) or (sharon_final_reward == 11.)
示例#9
0
def test_reward_in_log_needs_to_accumulate():
    p1 = Player("bob")
    p2 = Player("sharon")
    d = StandardDeck()
    # d = Deck(egg=15, salmon=15, squid=15, tempura=15,
    #          sashimi=15, dumpling=15, pudding=0,
    #          wasabi=15, maki1=10, maki2=10, maki3=10)
    g = Game(deck=d, agents=[p1, p2], cards_per_player=10, n_rounds=2)
    g.simulate_game()
    df = g.gamelog.sort_values(["player", "turn"])
    for player in ["bob", "sharon", "alice"]:
        print(df[df['player'] == player])
    p1_rewards = df[df['player'] == 'bob']['reward']
    p2_rewards = df[df['player'] == 'sharon']['reward']
    print(g.scores)
    assert all([_ >= 0 for _ in (p1_rewards - p1_rewards.shift().fillna(0))])
    assert all([_ >= 0 for _ in (p2_rewards - p2_rewards.shift().fillna(0))])
示例#10
0
def test_game_assigns_correct_number_of_cards_3player():
    p1 = Player("bob")
    p2 = Player("sharon")
    p3 = Player("alice")
    game = Game(deck=StandardDeck(), agents=[p1, p2, p3], cards_per_player=8)
    assert len(game.players.keys()) == 3
    assert isinstance(game.deck, Deck)
    assert len(game.players["bob"].hand) == 8
    assert len(game.players["sharon"].hand) == 8
    assert len(game.players["alice"].hand) == 8
示例#11
0
def test_game_contains_players_cards_and_deck_2player():
    p1 = Player("bob")
    p2 = Player("sharon")
    game = Game(deck=StandardDeck(), agents=[p1, p2], cards_per_player=10)
    assert len(game.players.keys()) == 2
    assert isinstance(game.deck, Deck)
    print(game.players)
    print(game.players["bob"])
    assert len(game.players["bob"].hand) == 10
    assert len(game.players["sharon"].hand) == 10
示例#12
0
def test_after_turn_hands_exchange_two_player():
    p1 = Player("bob")
    p2 = Player("sharon")
    game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2)
    print(game.scores)
    scores0 = game.end_results()
    game.play_round()
    scores1 = game.end_results()
    game.play_round()
    scores2 = game.end_results()
    print(scores1)
    print(scores2)
    assert scores0['bob'] == 0.0
    assert scores1['bob'] > 0.0
    assert scores2['bob'] > 0.0
    assert scores2['sharon'] > 0.0
示例#13
0
def test_certain_cards_carry_no_rewards_within_rounds():
    p1 = Player("bob")
    p2 = Player("sharon")
    # create a deck with no cards that are worth points during a round
    d = Deck.create([PuddingCard(), WasabiCard(), MakiCard(3)], [8, 4, 4 * 7])
    g = Game(deck=d, agents=[p1, p2], cards_per_player=5)
    g.play_turn()
    assert g.gamelog.shape[0] == 4
    assert g.gamelog['reward'][0] == 0.0
    assert g.gamelog['reward'][1] == 0.0
    g.play_turn()
    g.play_turn()
    g.play_turn()
    assert g.gamelog['reward'].iloc[-1] == 0.0
示例#14
0
def test_turns_update():
    p1 = Player("bob")
    p2 = Player("sharon")
    game = Game(deck=StandardDeck(), agents=[p1, p2], n_rounds=2)
    game.play_round()
    assert game.turn == 11
    game.play_round()
    assert game.turn == 21
示例#15
0
def test_after_turn_hands_exchange_three_player():
    p1 = Player("bob")
    p2 = Player("sharon")
    p3 = Player("alice")
    game = Game(deck=StandardInfiniDeck(), agents=[p1, p2, p3], n_rounds=3)
    scores0 = game.end_results()
    game.play_round()
    scores1 = game.end_results()
    game.play_round()
    scores2 = game.end_results()
    game.play_round()
    scores3 = game.end_results()
    assert scores3['bob'] > scores1['bob']
    assert scores3['alice'] > scores0['alice']
    assert scores3['bob'] > scores1['bob']
    assert scores3['sharon'] > scores1['sharon']
示例#16
0
 def __init__(self, name, opponents):
     self.name = name
     self.user_agent = "foo"
     self.game = Game(agents=[self.user_agent] + opponents)
示例#17
0
#Set up policy
policy = Policy('LSTM', 22, 20, 1, 11)
torch.manual_seed(123)

#Parameters
gamma = 0.99

#Set up optim
lr = 1e-2
optimizer = optim.Adam(policy.parameters(), lr=lr)
log_interval = 10

#Play games
deck = StandardDeck()
N_cards = len(list(set([str(_) for _ in deck])))
p1 = Pg_player(policy=policy, name="PG_player01")
p2 = Simple_player(weights=[1 / N_cards] * N_cards, name="SIMPLE_player01")

ewma = 0.5
alpha = 0.95
for n in range(100):
    game = Game([p1, p2], verbose=False)
    game.simulate_game()
    win = game.did_player_win(p1.name)
    ewma = alpha * ewma + (1 - alpha) * int(win)
    print('At %3i ewma win ratio %5.3f' % (n, ewma))

    finish_game(policy, gamma=gamma, optimizer=optimizer)
    p1.prev_reward = None
    optimizer = adjust_learning_rate(optimizer, n, lr, 30)
示例#18
0
def test_reward_in_log_needs_to_accumulate():
    p1 = Player("bob")
    p2 = Player("sharon")
    d = Deck.create([DumplingCard()], [1000])
    game = Game(deck=d, agents=[p1, p2], n_rounds=2)