def perform_learning_step(epoch, step, network, replay_memory, reward_gen):
    def exploration_rate(epoch):
        """# Define exploration rate change over time"""
        start_eps = 1.0
        end_eps = 0.1
        const_eps_epochs = 0.1 * n_epoch  # 10% of learning time
        eps_decay_epochs = 0.6 * n_epoch  # 60% of learning time

        if epoch < const_eps_epochs:
            return start_eps
        elif epoch < eps_decay_epochs:
            # Linear decay
            return start_eps - (epoch - const_eps_epochs) / \
                               (eps_decay_epochs - const_eps_epochs) * (start_eps - end_eps)
        else:
            return end_eps

    #print(game.is_episode_finished(),":",end="")
    s1 = preprocess(game.get_state().screen_buffer)

    eps = exploration_rate(epoch)
    if random() <= eps:
        #print(len(actions))
        a = randint(0, len(actions) - 1)
    else:
        # Choose the best action according to the network.
        a = network.get_best_action(np.array([s1]))
    reward = game.make_action(actions[a], frame_repeat)
    reward_gen.update_reward()
    reward = reward_gen.get_reward()

    isterminal = game.is_episode_finished()
    s2 = preprocess(game.get_state().screen_buffer) if not isterminal else None

    # Remember the transition that was just experienced.
    replay_memory.add_transition(s1, a, s2, isterminal, reward)

    if replay_memory.size > batch_size:
        s1, a, s2, isterminal, r = replay_memory.get_sample(batch_size)

        q2 = network.get_q_target_values(s2)
        #q2 = np.max(network.get_q_target_values(s2), axis=1)
        #q2 = np.max(network.get_q_values(s2), axis=1)
        target_q = network.get_q_values(s1)

        target_q[np.arange(target_q.shape[0]),
                 a] = r + discount_factor * (1 - isterminal) * q2
        #print(target_q)
        network.learn(s1, target_q, reward, epoch * steps_per_epoch + step)
    frag_count = game.get_game_variable(GameVariable.FRAGCOUNT)
    death_count = game.get_game_variable(GameVariable.DEATHCOUNT)
    print("FRAG: %d, DEATH: %d" % (frag_count, death_count))
    # Add bots
    for i in range(bots_num):
        game.send_game_command("addbot")
    reward_gen = reward_generater.reward_generater(game)

    while not game.is_episode_finished():

        if game.is_player_dead():
            game.respawn_player()
            reward_gen.reset_position()

        state = preprocess(game.get_state().screen_buffer)
        best_action_index = network.get_best_action(state)
        game.make_action(actions[best_action_index], frame_repeat)

        reward_gen.update_reward()
        reward = reward_gen.get_reward()
        test_scores.append(reward)

    test_scores = np.array(test_scores)
    print("Results: mean: %.1f(+-)%.1f," % (test_scores.mean(), test_scores.std()), \
                  "min: %.1f," % test_scores.min(), "max: %.1f," % test_scores.max())

    frag_count = game.get_game_variable(GameVariable.FRAGCOUNT)
    death_count = game.get_game_variable(GameVariable.DEATHCOUNT)
    print("FRAG: %d, DEATH: %d" % (frag_count, death_count))
    print("total: %d" % (reward_gen.get_total_reward()))
示例#3
0
    print("%d actions is activate" % (n_actions))
    print(actions)

    replay_memory = replay_memory.ReplayMemory(resolution, capacity)

    session = tf.Session()
    network = network.network(session, resolution, n_actions, learning_rate)

    network.restore_model(ckpt_path + "model.ckpt")

    game.set_window_visible(True)
    game.set_mode(Mode.ASYNC_PLAYER)
    game.init()

    for _ in range(episodes_to_watch):
        game.new_episode()
        while not game.is_episode_finished():
            state = preprocess(game.get_state().screen_buffer)
            best_action_index = network.get_best_action(np.array([state]))
            best_action_index = best_action_index[0]

            # Instead of make_action(a, frame_repeat) in order to make the animation smooth
            game.set_action(actions[best_action_index])
            for _ in range(frame_repeat):
                game.advance_action()

        # Sleep between episodes
        sleep(1.0)
        score = game.get_total_reward()
        print("Total score: ", score)