def perform_learning_step(epoch, step, network, replay_memory, reward_gen): def exploration_rate(epoch): """# Define exploration rate change over time""" start_eps = 1.0 end_eps = 0.1 const_eps_epochs = 0.1 * n_epoch # 10% of learning time eps_decay_epochs = 0.6 * n_epoch # 60% of learning time if epoch < const_eps_epochs: return start_eps elif epoch < eps_decay_epochs: # Linear decay return start_eps - (epoch - const_eps_epochs) / \ (eps_decay_epochs - const_eps_epochs) * (start_eps - end_eps) else: return end_eps #print(game.is_episode_finished(),":",end="") s1 = preprocess(game.get_state().screen_buffer) eps = exploration_rate(epoch) if random() <= eps: #print(len(actions)) a = randint(0, len(actions) - 1) else: # Choose the best action according to the network. a = network.get_best_action(np.array([s1])) reward = game.make_action(actions[a], frame_repeat) reward_gen.update_reward() reward = reward_gen.get_reward() isterminal = game.is_episode_finished() s2 = preprocess(game.get_state().screen_buffer) if not isterminal else None # Remember the transition that was just experienced. replay_memory.add_transition(s1, a, s2, isterminal, reward) if replay_memory.size > batch_size: s1, a, s2, isterminal, r = replay_memory.get_sample(batch_size) q2 = network.get_q_target_values(s2) #q2 = np.max(network.get_q_target_values(s2), axis=1) #q2 = np.max(network.get_q_values(s2), axis=1) target_q = network.get_q_values(s1) target_q[np.arange(target_q.shape[0]), a] = r + discount_factor * (1 - isterminal) * q2 #print(target_q) network.learn(s1, target_q, reward, epoch * steps_per_epoch + step)
frag_count = game.get_game_variable(GameVariable.FRAGCOUNT) death_count = game.get_game_variable(GameVariable.DEATHCOUNT) print("FRAG: %d, DEATH: %d" % (frag_count, death_count)) # Add bots for i in range(bots_num): game.send_game_command("addbot") reward_gen = reward_generater.reward_generater(game) while not game.is_episode_finished(): if game.is_player_dead(): game.respawn_player() reward_gen.reset_position() state = preprocess(game.get_state().screen_buffer) best_action_index = network.get_best_action(state) game.make_action(actions[best_action_index], frame_repeat) reward_gen.update_reward() reward = reward_gen.get_reward() test_scores.append(reward) test_scores = np.array(test_scores) print("Results: mean: %.1f(+-)%.1f," % (test_scores.mean(), test_scores.std()), \ "min: %.1f," % test_scores.min(), "max: %.1f," % test_scores.max()) frag_count = game.get_game_variable(GameVariable.FRAGCOUNT) death_count = game.get_game_variable(GameVariable.DEATHCOUNT) print("FRAG: %d, DEATH: %d" % (frag_count, death_count)) print("total: %d" % (reward_gen.get_total_reward()))
print("%d actions is activate" % (n_actions)) print(actions) replay_memory = replay_memory.ReplayMemory(resolution, capacity) session = tf.Session() network = network.network(session, resolution, n_actions, learning_rate) network.restore_model(ckpt_path + "model.ckpt") game.set_window_visible(True) game.set_mode(Mode.ASYNC_PLAYER) game.init() for _ in range(episodes_to_watch): game.new_episode() while not game.is_episode_finished(): state = preprocess(game.get_state().screen_buffer) best_action_index = network.get_best_action(np.array([state])) best_action_index = best_action_index[0] # Instead of make_action(a, frame_repeat) in order to make the animation smooth game.set_action(actions[best_action_index]) for _ in range(frame_repeat): game.advance_action() # Sleep between episodes sleep(1.0) score = game.get_total_reward() print("Total score: ", score)