if __name__ == "__main__": # args parser = argparse.ArgumentParser() parser.add_argument("-m", "--model_path") parser.add_argument("-s", "--save", dest="save", action="store_true") parser.set_defaults(save=False) args = parser.parse_args() # environmet, agent env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) agent.load_model(args.model_path) # variables win, lose = 0, 0 state_t_1, reward_t, terminal = env.observe() # animate fig = plt.figure(figsize=(env.screen_n_rows / 2, env.screen_n_cols / 2)) fig.canvas.set_window_title("{}-{}".format(env.name, agent.name)) img = plt.imshow(state_t_1, interpolation="none", cmap="gray") ani = animation.FuncAnimation(fig, animate, init_func=init, interval=(1000 / env.frame_rate), blit=True) if args.save: # save animation (requires ImageMagick) ani_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), "tmp", "demo-{}.gif".format(env.name)) ani.save(ani_path, writer="imagemagick", fps=env.frame_rate) else: # show animation plt.show()
n_epochs = 1000 # environment, agent env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) # variables win = 0 for e in range(n_epochs): # reset frame = 0 loss = 0.0 Q_max = 0.0 env.reset() state_t_1, reward_t, terminal = env.observe() while not terminal: state_t = state_t_1 # execute action in environment action_t = agent.select_action(state_t, agent.exploration) env.execute_action(action_t) # observe environment state_t_1, reward_t, terminal = env.observe() # store experience agent.store_experience(state_t, action_t, reward_t, state_t_1, terminal)
parser.add_argument("-m", "--model_path") args = parser.parse_args() env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) agent.load_model(args.model_path) env.reset_board_status() env.set_test_game() # 1ゲーム内の処理開始地点 while env.is_playable() is True: env.print_board() env.test_player_play() state = env.observe() while True is True: action = agent.select_action(state, 0.0) hand_result = env.test_ai_play(action) if hand_result == "ok": break elif hand_result == "ng": x = int(action % 8) y = int(action / 8) DebugLog.error(str(action) + ":" + str(x) + ":" + str(y)) pass elif hand_result == "pass": break else: print "Hung up"
# environment, agent env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) # variables win = 0 for e in range(n_epochs): # reset frame = 0 loss = 0.0 Q_max = 0.0 env.reset() state_t, reward_t_none = env.observe() # execute action in environment action_t = agent.select_action(state_t, agent.exploration) env.execute_action(action_t) # observe environment state_t, reward_t = env.observe() # store experience agent.store_experience(state_t, action_t, reward_t) # experience replay agent.experience_replay() # for log
n_epochs = 0 n_game = 100 env = CatchBall() agent = DQNAgent(env.enable_actions, env.name) total_result_log = "" for e in range(n_game): # るーぷ開始地点 frame = 0 win = 0 loss = 0.0 Q_max = 0.0 env.reset_board_status() env.set_new_game() state_after = env.observe() # 1ゲーム内の処理開始地点 while env.is_playable() is True: # print "*********************************************************************************" state_before = copy.deepcopy(state_after) # 自分の手がOKになるまでループ(置けないところに置く可能性がある為) while True is True: env.is_available() # 手を選ばせる。盤面情報と手のブレ率(random)を与える # hand_result = env.random_play() action_t = agent.select_action(state_before, agent.exploration) hand_result = env.learning_play(action_t)