示例#1
0
def evaluate_experiment(env_id, experiment_name, num_envs=96):
    # fixed seeds
    random.seed(0)
    np.random.seed(0)
    tf.random.set_random_seed(0)

    params = AgentTMAX.Params(experiment_name)
    params = params.load()
    params.seed = 0

    # for faster evaluation
    params.num_envs = num_envs
    params.num_workers = 32 if num_envs >= 32 else num_envs

    def make_env_func():
        e = create_env(env_id, skip_frames=True)
        e.seed(0)
        return e

    agent = AgentTMAX(make_env_func, params)
    agent.initialize()

    rate, speed = 0, -1

    multi_env = None
    try:
        multi_env = MultiEnv(
            params.num_envs,
            params.num_workers,
            make_env_func=make_env_func,
            stats_episodes=params.stats_episodes,
        )

        success, avg_speed = evaluate_locomotion_agent(agent, multi_env)

        log.info('Finished evaluating experiment %s', experiment_name)
        rate = np.mean(success)
        speed = -1
        avg_speed = [s for s in avg_speed if s > 0]
        if len(avg_speed) > 0:
            speed = np.mean(avg_speed)

        log.info('Success rate %.1f%%, avg. speed %.2f edges/frame', rate * 100, speed)

    except (Exception, KeyboardInterrupt, SystemExit):
        log.exception('Interrupt...')
    finally:
        log.info('Closing env...')
        if multi_env is not None:
            multi_env.close()

    agent.finalize()
    return rate, speed
示例#2
0
    def test_dist_training(self):
        t = Timing()

        def make_env():
            return make_doom_env(doom_env_by_name(TEST_ENV_NAME))

        params = AgentTMAX.Params('__test_dist_train__')
        params.distance_target_buffer_size = 1000

        with t.timeit('generate_data'):
            # first: generate fake random data
            buffer = Buffer()

            obs1 = np.full([84, 84, 3], 0, dtype=np.uint8)
            obs1[:, :, 1] = 255
            obs2 = np.full([84, 84, 3], 0, dtype=np.uint8)
            obs2[:, :, 2] = 255

            data_size = params.distance_target_buffer_size
            for i in range(data_size):
                same = i % 2 == 0
                if same:
                    if random.random() < 0.5:
                        obs_first = obs_second = obs1
                    else:
                        obs_first = obs_second = obs2
                else:
                    obs_first, obs_second = obs1, obs2
                    if random.random() < 0.5:
                        obs_second, obs_first = obs_first, obs_second

                buffer.add(obs_first=obs_first,
                           obs_second=obs_second,
                           labels=0 if same else 1)

        with t.timeit('init'):
            agent = AgentTMAX(make_env, params)
            agent.initialize()

            params.distance_train_epochs = 1
            params.distance_batch_size = 256
            agent.distance.train(buffer, 1, agent)

        with t.timeit('train'):
            params.distance_train_epochs = 2
            params.distance_batch_size = 64
            agent.distance.train(buffer, 1, agent, t)

        agent.finalize()

        log.info('Timing: %s', t)
        shutil.rmtree(params.experiment_dir())