def main(in1, in2):
    in1 = str(in1)
    in2 = str(in2)

    p1_dict = {"human": players.HumanPlayer("Team Jimmy", 1),
               "random": players.RandomPlayer(1),
               "mini_easy": players.MinimaxPlayer(1, 3),
               "mini_medium": players.MinimaxPlayer(1, 5),
               "mini_hard": players.MinimaxPlayer(1, 6),
               "net_random": players.NetPlayer(1, "Random"),
               "net_easy": players.NetPlayer(1, "Easy"),
               "net_medium": players.NetPlayer(1, "Medium"),
               "net_hard": players.NetPlayer(1, "Hard")
               }
    result = ""
    h2_name = "second place"
    for c in h2_name:
        result = result + c + '\u0336'
    p2_dict = {"human": players.HumanPlayer("Team " + result + " Ben", 2),
               "random": players.RandomPlayer(2),
               "mini_easy": players.MinimaxPlayer(2, 3),
               "mini_medium": players.MinimaxPlayer(2, 5),
               "mini_hard": players.MinimaxPlayer(2, 6),
               "net_random": players.NetPlayer(2, "Random"),
               "net_easy": players.NetPlayer(2, "Easy"),
               "net_medium": players.NetPlayer(2, "Medium"),
               "net_hard": players.NetPlayer(2, "Hard")
               }

    player1 = p1_dict[in1]
    player2 = p2_dict[in2]

    game_board = GameBoard([player1, player2])

    game_board.game_loop()  # there has to be a better way to do this
示例#2
0
def main():
    try:
        args, constants = init_variables()
        sock = utils.connect_to_server(args.host, args.port, args.name,
                                       constants)
        if args.smart:
            ply = players.SmartPlayer(args.name, sock)
        elif args.random:
            ply = players.RandomPlayer(args.name, sock)
        else:
            ply = players.SmartPlayer(args.name, sock)
        ply.play()
    except KeyboardInterrupt:
        if constants.connected:
            sock.shutdown(socket.SHUT_RDWR)
            sock.close()
示例#3
0
def main():
    '''Main Method'''
    RANDOM_PLAYER = players.RandomPlayer()
    SEQUENTIAL_PLAYER = players.SequentialPlayer()
    MOSTCOMMON_PLAYER = players.MostCommonPlayer()
    HISTORIC_PLAYER = players.HistoricPlayer(3)

    print("Welcome to the Rock, Paper & Scissor game!")
    print(
        "Please type in valid players: 'random', 'sequential', 'mostcommon' or 'historic'."
    )

    player1 = input("Who is player 1? ")
    player2 = input("Who is player 2? ")

    def get_player(player):
        '''Getting the chosen player'''
        try:
            my_player = None
            if player == "random":
                my_player = RANDOM_PLAYER
            elif player == "sequential":
                my_player = SEQUENTIAL_PLAYER
            elif player == "mostcommon":
                my_player = MOSTCOMMON_PLAYER
            elif player == "historic":
                my_player = HISTORIC_PLAYER
            return my_player
        except:
            print("You did not type a valid playerclass")

    first_player = get_player(player1)
    second_player = get_player(player2)

    MULTIPLE_GAMES = MultipleGames(first_player, second_player, 100)
    MULTIPLE_GAMES.arrange_tournament()
示例#4
0
def train_defence():
    # ゲームボードの準備
    kp = kakerlakenpoker.Kakerlakenpoker()
    p1_rndact = players.RandomPlayer(kp, PLAYER1)
    p2_rndact = players.RandomPlayer(kp, PLAYER2)
    # 環境と行動の次元数
    obs_size = 40
    n_actions = 2
    #学習ゲーム回数
    n_episodes = 3000
    #カウンタの宣言
    win = 0
    miss = 0
    # Q-functionとオプティマイザーのセットアップ
    q_func = qf.QFunction(obs_size, n_actions)
    if USE_GPU: q_func.to_gpu(0)
    optimizer = chainer.optimizers.Adam(eps=1e-2)
    optimizer.setup(q_func)
    # 報酬の割引率
    gamma = 0.95
    # Epsilon-greedyを使ってたまに冒険。50000ステップでend_epsilonとなる
    p1_explorer = chainerrl.explorers.LinearDecayEpsilonGreedy(
        start_epsilon=1.0,
        end_epsilon=0.3,
        decay_steps=50000,
        random_action_func=p1_rndact.random_defence_action_func)
    # Experience ReplayというDQNで用いる学習手法で使うバッファ
    replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6)
    agent_p1 = chainerrl.agents.DoubleDQN(q_func,
                                          optimizer,
                                          replay_buffer,
                                          gamma,
                                          p1_explorer,
                                          replay_start_size=500,
                                          target_update_interval=100)
    t1 = time.time()
    for i in range(1, n_episodes + 1):
        kp.reset()
        reward = 0
        reward_avg = 0
        turn = 0
        while not kp.done:
            off_act = p2_rndact.random_offence_action_func()
            off_act_vec = np.zeros(8, dtype=np.float32)
            off_act_vec[off_act % 8] = 1
            env = np.append(kp.get_env().copy(), off_act_vec)
            def_act = agent_p1.act_and_train(env.copy(), reward)
            reward += kp.step_and_reward(off_act, def_act, PLAYER2)
            kp.check_winner()
            if kp.done is True:
                if kp.winner == 1:
                    reward += 100
                    win += 1
                elif kp.winner == -1:
                    reward += -100
                else:
                    reward += -100
                if kp.miss is True:
                    miss += 1
                agent_p1.stop_episode_and_train(env.copy(), reward, True)
            else:
                # print("***Turn",turn,"***")
                # print(kp.show())
                last_state = kp.get_env().copy()
                turn += 1
        reward_avg += reward
        if i % N_INFO == 0:
            print("***Episodes", i, "***")
            print("win:", win)
            print("miss", miss)
            print("reward avg:", reward_avg / N_INFO)
            print("rnd:", p1_rndact.random_count)
            win = 0
            reward_avg = 0
            miss = 0
            p1_rndact.random_count = 0
            t2 = time.time()
            print("time:" + str(t2 - t1))
            t1 = time.time()
    agent_p1.save("defence_model3000")
示例#5
0
def main():
    kp = Kakerlakenpoker()
    kp.reset()
    human_player = players.HumanPlayer()
    p1_rndact = players.RandomPlayer(kp, PLAYER1)
    # Q-functionとオプティマイザーのセットアップ
    off_q_func = qf.QFunction(32, 64)
    # q_func.to_gpu(0)
    off_optimizer = chainer.optimizers.Adam(eps=1e-2)
    off_optimizer.setup(off_q_func)
    gamma = 0.95
    # Epsilon-greedyを使ってたまに冒険。50000ステップでend_epsilonとなる
    off_explorer = chainerrl.explorers.LinearDecayEpsilonGreedy(
        start_epsilon=1.0,
        end_epsilon=0.3,
        decay_steps=50000,
        random_action_func=p1_rndact.random_offence_action_func)
    # Experience ReplayというDQNで用いる学習手法で使うバッファ
    off_replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6)
    urayama_offence = chainerrl.agents.DoubleDQN(off_q_func,
                                                 off_optimizer,
                                                 off_replay_buffer,
                                                 gamma,
                                                 off_explorer,
                                                 replay_start_size=500,
                                                 target_update_interval=100)

    # Q-functionとオプティマイザーのセットアップ
    def_q_func = qf.QFunction(40, 2)
    # q_func.to_gpu(0)
    def_optimizer = chainer.optimizers.Adam(eps=1e-2)
    def_optimizer.setup(def_q_func)
    def_explorer = chainerrl.explorers.LinearDecayEpsilonGreedy(
        start_epsilon=1.0,
        end_epsilon=0.3,
        decay_steps=50000,
        random_action_func=p1_rndact.random_defence_action_func)
    # Experience ReplayというDQNで用いる学習手法で使うバッファ
    def_replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6)

    urayama_defence = chainerrl.agents.DoubleDQN(def_q_func,
                                                 def_optimizer,
                                                 def_replay_buffer,
                                                 gamma,
                                                 def_explorer,
                                                 replay_start_size=500,
                                                 target_update_interval=100)
    # chainerrl.agent.load_npz_no_strict("offence_model3000",urayama_offence)
    # chainerrl.agent.load_npz_no_strict("defence_model3000",urayama_defence)
    urayama_offence.load("offence_model3000")
    urayama_defence.load("defence_model3000")
    offence_act = [urayama_offence.act, human_player.offence_act]
    defence_act = [urayama_defence.act, human_player.defence_act]
    turn = PLAYER1  #PLAYER1がurayama, PLAYER2がhuman
    turn_count = 1
    while not kp.done:
        print("***Turn", str(turn_count), "***")
        kp.show_vs_URAYAMA()
        off_act = offence_act[turn](kp.get_env().copy())
        off_act_vec = np.zeros(8, dtype=np.float32)
        off_act_vec[off_act % 8] = 1
        if turn == PLAYER1:
            print("URAYAMA declare:" + str(off_act % 8))
        else:
            print("Player declare:" + str(off_act % 8))
        def_act = defence_act[PLAYER2 - turn](np.append(
            kp.get_env().copy(), off_act_vec))
        ans = "True" if def_act == 1 else "Lie"
        if turn == PLAYER1:
            print("Player answer:" + ans)
        else:
            print("URAYAMA answer:" + ans)
        is_turn_change = kp.step(off_act, def_act, turn)
        kp.check_winner()
        if kp.done is True:
            if kp.winner == 1:
                print("URAYAMA win")
            elif kp.winner == -1:
                print("YOU win")
            else:
                print("Error")
            if kp.miss is True:
                print("MISS")
        if is_turn_change:
            turn = PLAYER1 if turn == PLAYER2 else PLAYER2  #ターンの交換
        turn_count += 1
示例#6
0
rho = 0.2
initialEpsilon = 1.0
epsilonDecay = 0.99
seed1 = None
seed2 = None

printturns = False

trainIterations = 2000
randTestIterations = 200
aiTrainIterations = 2000
aiTestIterations = 200

p1 = players.AIPlayer(rho=rho, epsilon=initialEpsilon, seed=seed1)
p2 = players.RandomPlayer(seed=None)

print("Dots & Boxes AI Demo")
print("--------------------------------------------")

aiWins = 0
for i in range(trainIterations):
    g = game.Game()
    if i % 2 is 0:
        g.play(p1, p2, printturns=printturns)
    else:
        g.play(p2, p1, printturns=printturns)
    if g.score.index(max(g.score)) is p1.playernum:
        aiWins += 1
    p1.epsilon *= epsilonDecay
print("Train vs. Random:\t{} wins out of {}".format(aiWins, trainIterations))
示例#7
0
                util.save_to_file(data_out, data)

        print("=== Statistics ===")
        print("{} ({}%) wins by Player 1 ({})".format(p1_wins,
                                                      100.0 * p1_wins / iters,
                                                      self.p1.name))
        print("{} ({}%) wins by Player 2 ({})".format(p2_wins,
                                                      100.0 * p2_wins / iters,
                                                      self.p2.name))
        print("{} ({}%) ties".format(ties, 100.0 * ties / iters))


if __name__ == "__main__":
    net = nets.Connect4Network()
    data = util.read_from_file("test.csv")
    X, y = util.split_features_labels(data)
    X, y = util.shuffle_data(X, y)
    X = np.expand_dims(X, axis=1)
    X = torch.from_numpy(X).float()
    y = torch.from_numpy(y).float()
    net.fit(X, y, batch_size=32)

    #player_1 = players.DeepMinimaxPlayer("Susan", net, 4)
    player_1 = players.RandomPlayer("Bimbo")
    #player_1 = players.MinimaxPlayer("Max", 4)
    #player_2 = players.MinimaxPlayer("Min", 6)
    player_2 = players.DeepMinimaxPlayer("Susan", net, 4)

    gs = ConnectFourSimulator(player_1, player_2)
    gs.run(100, verbose=False, data_out="test.csv")