def evaluate_experiment(env_id, experiment_name, num_envs=96): # fixed seeds random.seed(0) np.random.seed(0) tf.random.set_random_seed(0) params = AgentTMAX.Params(experiment_name) params = params.load() params.seed = 0 # for faster evaluation params.num_envs = num_envs params.num_workers = 32 if num_envs >= 32 else num_envs def make_env_func(): e = create_env(env_id, skip_frames=True) e.seed(0) return e agent = AgentTMAX(make_env_func, params) agent.initialize() rate, speed = 0, -1 multi_env = None try: multi_env = MultiEnv( params.num_envs, params.num_workers, make_env_func=make_env_func, stats_episodes=params.stats_episodes, ) success, avg_speed = evaluate_locomotion_agent(agent, multi_env) log.info('Finished evaluating experiment %s', experiment_name) rate = np.mean(success) speed = -1 avg_speed = [s for s in avg_speed if s > 0] if len(avg_speed) > 0: speed = np.mean(avg_speed) log.info('Success rate %.1f%%, avg. speed %.2f edges/frame', rate * 100, speed) except (Exception, KeyboardInterrupt, SystemExit): log.exception('Interrupt...') finally: log.info('Closing env...') if multi_env is not None: multi_env.close() agent.finalize() return rate, speed
def test_dist_training(self): t = Timing() def make_env(): return make_doom_env(doom_env_by_name(TEST_ENV_NAME)) params = AgentTMAX.Params('__test_dist_train__') params.distance_target_buffer_size = 1000 with t.timeit('generate_data'): # first: generate fake random data buffer = Buffer() obs1 = np.full([84, 84, 3], 0, dtype=np.uint8) obs1[:, :, 1] = 255 obs2 = np.full([84, 84, 3], 0, dtype=np.uint8) obs2[:, :, 2] = 255 data_size = params.distance_target_buffer_size for i in range(data_size): same = i % 2 == 0 if same: if random.random() < 0.5: obs_first = obs_second = obs1 else: obs_first = obs_second = obs2 else: obs_first, obs_second = obs1, obs2 if random.random() < 0.5: obs_second, obs_first = obs_first, obs_second buffer.add(obs_first=obs_first, obs_second=obs_second, labels=0 if same else 1) with t.timeit('init'): agent = AgentTMAX(make_env, params) agent.initialize() params.distance_train_epochs = 1 params.distance_batch_size = 256 agent.distance.train(buffer, 1, agent) with t.timeit('train'): params.distance_train_epochs = 2 params.distance_batch_size = 64 agent.distance.train(buffer, 1, agent, t) agent.finalize() log.info('Timing: %s', t) shutil.rmtree(params.experiment_dir())