Пример #1
0
if __name__ == "__main__":
    # args
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model_path")
    parser.add_argument("-s", "--save", dest="save", action="store_true")
    parser.set_defaults(save=False)
    args = parser.parse_args()

    # environmet, agent
    env = CatchBall()
    agent = DQNAgent(env.enable_actions, env.name)
    agent.load_model(args.model_path)

    # variables
    win, lose = 0, 0
    state_t_1, reward_t, terminal = env.observe()

    # animate
    fig = plt.figure(figsize=(env.screen_n_rows / 2, env.screen_n_cols / 2))
    fig.canvas.set_window_title("{}-{}".format(env.name, agent.name))
    img = plt.imshow(state_t_1, interpolation="none", cmap="gray")
    ani = animation.FuncAnimation(fig, animate, init_func=init, interval=(1000 / env.frame_rate), blit=True)

    if args.save:
        # save animation (requires ImageMagick)
        ani_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), "tmp", "demo-{}.gif".format(env.name))
        ani.save(ani_path, writer="imagemagick", fps=env.frame_rate)
    else:
        # show animation
        plt.show()
Пример #2
0
    n_epochs = 1000

    # environment, agent
    env = CatchBall()
    agent = DQNAgent(env.enable_actions, env.name)

    # variables
    win = 0

    for e in range(n_epochs):
        # reset
        frame = 0
        loss = 0.0
        Q_max = 0.0
        env.reset()
        state_t_1, reward_t, terminal = env.observe()

        while not terminal:
            state_t = state_t_1

            # execute action in environment
            action_t = agent.select_action(state_t, agent.exploration)
            env.execute_action(action_t)

            # observe environment
            state_t_1, reward_t, terminal = env.observe()

            # store experience
            agent.store_experience(state_t, action_t, reward_t, state_t_1,
                                   terminal)
Пример #3
0
    parser.add_argument("-m", "--model_path")
    args = parser.parse_args()

    env = CatchBall()
    agent = DQNAgent(env.enable_actions, env.name)
    agent.load_model(args.model_path)

    env.reset_board_status()
    env.set_test_game()

    # 1ゲーム内の処理開始地点
    while env.is_playable() is True:
        env.print_board()
        env.test_player_play()

        state = env.observe()

        while True is True:
            action = agent.select_action(state, 0.0)
            hand_result = env.test_ai_play(action)
            if hand_result == "ok":
                break
            elif hand_result == "ng":
                x = int(action % 8)
                y = int(action / 8)
                DebugLog.error(str(action) + ":" + str(x) + ":" + str(y))
                pass
            elif hand_result == "pass":
                break
            else:
                print "Hung up"
Пример #4
0
    # environment, agent
    env = CatchBall()
    agent = DQNAgent(env.enable_actions, env.name)

    # variables
    win = 0

    for e in range(n_epochs):
        # reset
        frame = 0
        loss = 0.0
        Q_max = 0.0
        env.reset()

        state_t, reward_t_none = env.observe()

        # execute action in environment
        action_t = agent.select_action(state_t, agent.exploration)
        env.execute_action(action_t)

        # observe environment
        state_t, reward_t = env.observe()

        # store experience
        agent.store_experience(state_t, action_t, reward_t)

        # experience replay
        agent.experience_replay()

        # for log
Пример #5
0
    n_epochs = 0
    n_game = 100

    env = CatchBall()
    agent = DQNAgent(env.enable_actions, env.name)
    total_result_log = ""

    for e in range(n_game):
        # るーぷ開始地点
        frame = 0
        win = 0
        loss = 0.0
        Q_max = 0.0
        env.reset_board_status()
        env.set_new_game()
        state_after = env.observe()

        # 1ゲーム内の処理開始地点
        while env.is_playable() is True:
            # print "*********************************************************************************"

            state_before = copy.deepcopy(state_after)

            # 自分の手がOKになるまでループ(置けないところに置く可能性がある為)
            while True is True:
                env.is_available()

                # 手を選ばせる。盤面情報と手のブレ率(random)を与える
                # hand_result = env.random_play()
                action_t = agent.select_action(state_before, agent.exploration)
                hand_result = env.learning_play(action_t)