Exemplo n.º 1
0
def main(model_path, rom_args, learner_args, network_args, num_threads, epochs, logdir, save_interval):
    # create env
    environment = ALEEnvironment(**rom_args)

    # create network then load
    num_actions = environment.get_num_actions()
    input_shape = [learner_args['phi_length']] + environment.get_state_shape()
    network = TargetDQN(input_shape, num_actions, 'dqn', **network_args)
    network.load(model_path)

    # create threads
    del learner_args['epsilon_annealing_start']
    learner = QThreadLearner(environment, network, {}, **learner_args, epsilon_annealing_start=0.01, testing=True)

    # run 100 episodes
    reward_list = []
    try:
        for _ in range(100):
            reward = learner.run_episode(environment)
            print('Episode: {}. Steps: {}. Reward: {}'.format(_, environment.curr_step_count, reward))
            reward_list.append(reward)
    except KeyboardInterrupt:
        pass

    import matplotlib.pyplot as plt
    plt.title('Max: {0}, Mean: {1}, Min: {2}'.format(max(reward_list), np.mean(reward_list), min(reward_list)))
    plt.plot(reward_list)
    plt.show()
    return max(reward_list), np.mean(reward_list), min(reward_list)
Exemplo n.º 2
0
def main(rom_args, learner_args, network_args, num_threads, epochs, logdir,
         save_interval):
    # create envs for each thread
    environments = [ALEEnvironment(**rom_args) for _ in range(num_threads)]

    # create shared network
    num_actions = environments[0].get_num_actions()
    input_shape = [learner_args['phi_length']
                   ] + environments[0].get_state_shape()
    network = NStepA3CLSTM(input_shape, num_actions, **network_args)

    # create thread host
    thread_host = AsyncThreadHost(network, log_dir=logdir)

    # create threads
    threads = [
        RecurrentThreadLearner(environments[t], network,
                               thread_host.shared_dict, **learner_args)
        for t in range(num_threads)
    ]

    reward_list = thread_host.run_epochs(epochs,
                                         threads,
                                         save_interval=save_interval)

    import matplotlib.pyplot as plt
    plt.plot([x[1] for x in reward_list], [x[0] for x in reward_list], '.')
    plt.savefig(logdir + 'rewards.png')
    plt.show()
    return max([x[0] for x in reward_list])