def experiment(variant): with open('expert_demos_listing.yaml', 'r') as f: listings = yaml.load(f.read()) demos_path = listings[variant['expert_name']]['file_paths'][ variant['expert_idx']] print(demos_path) buffer_save_dict = joblib.load(demos_path) target_state_buffer = buffer_save_dict['data'] # target_state_buffer /= variant['rescale'] state_indices = torch.LongTensor(variant['state_indices']) env_specs = variant['env_specs'] env = get_env(env_specs) env.seed(env_specs['eval_env_seed']) print('\n\nEnv: {}'.format(env_specs['env_name'])) print('kwargs: {}'.format(env_specs['env_kwargs'])) print('Obs Space: {}'.format(env.observation_space)) print('Act Space: {}\n\n'.format(env.action_space)) policy = joblib.load(variant['policy_checkpoint'])['exploration_policy'] if variant['eval_deterministic']: policy = MakeDeterministic(policy) policy.to(ptu.device) eval_sampler = PathSampler(env, policy, variant['num_eval_steps'], variant['max_path_length'], no_terminal=variant['no_terminal'], render=variant['render'], render_kwargs=variant['render_kwargs']) test_paths = eval_sampler.obtain_samples() obs = [] for path in test_paths: obs += path['observations'] x = [o[0] for o in obs] y = [o[1] for o in obs] fig, ax = plt.subplots(figsize=(6, 6)) plt.scatter(x, y) plt.xlim(-1.25, 20) plt.ylim(-1.25, 10) ax.set_yticks([0, 5, 10]) ax.set_xticks([0, 5, 10, 15, 20]) plt.savefig('./figs/' + variant['env_specs']['task_name'] + '.pdf', bbox_inches='tight') return 1
def experiment(variant): env_specs = variant['env_specs'] env = get_env(env_specs) env.seed(env_specs['eval_env_seed']) print('\n\nEnv: {}'.format(env_specs['env_name'])) print('kwargs: {}'.format(env_specs['env_kwargs'])) print('Obs Space: {}'.format(env.observation_space)) print('Act Space: {}\n\n'.format(env.action_space)) if variant['scale_env_with_demo_stats']: with open('expert_demos_listing.yaml', 'r') as f: listings = yaml.load(f.read()) expert_demos_path = listings[variant['expert_name']]['file_paths'][ variant['expert_idx']] buffer_save_dict = joblib.load(expert_demos_path) env = ScaledEnv( env, obs_mean=buffer_save_dict['obs_mean'], obs_std=buffer_save_dict['obs_std'], acts_mean=buffer_save_dict['acts_mean'], acts_std=buffer_save_dict['acts_std'], ) policy = joblib.load(variant['policy_checkpoint'])['exploration_policy'] if variant['eval_deterministic']: policy = MakeDeterministic(policy) policy.to(ptu.device) eval_sampler = PathSampler(env, policy, variant['num_eval_steps'], variant['max_path_length'], no_terminal=variant['no_terminal'], render=variant['render'], render_kwargs=variant['render_kwargs']) test_paths = eval_sampler.obtain_samples() average_returns = eval_util.get_average_returns(test_paths) print(average_returns) return 1
env.seed(env_specs['eval_env_seed']) with open('expert_demos_listing.yaml', 'r') as f: listings = yaml.load(f.read()) expert_demos_path = listings['norm_halfcheetah_32_demos_sub_20']['file_paths'][0] buffer_save_dict = joblib.load(expert_demos_path) env = ScaledEnv( env, obs_mean=buffer_save_dict['obs_mean'], obs_std=buffer_save_dict['obs_std'], acts_mean=buffer_save_dict['acts_mean'], acts_std=buffer_save_dict['acts_std'], ) bc_policy = joblib.load('/scratch/hdd001/home/kamyar/output/paper-version-hc-bc/paper_version_hc_bc_2019_05_19_00_32_05_0000--s-0/params.pkl')['exploration_policy'] bc_policy = MakeDeterministic(bc_policy) bc_policy.to(ptu.device) dagger_policy = joblib.load('/scratch/hdd001/home/kamyar/output/dagger-halfcheetah/dagger_halfcheetah_2019_08_20_16_30_36_0000--s-0/params.pkl')['exploration_policy'] dagger_policy = MakeDeterministic(dagger_policy) dagger_policy.to(ptu.device) irl_policy = joblib.load('/scratch/hdd001/home/kamyar/output/hc_airl_ckpt/params.pkl')['exploration_policy'] irl_policy = MakeDeterministic(irl_policy) irl_policy.to(ptu.device) fig, ax = plt.subplots(1) eval_sampler = PathSampler( env, bc_policy, 20000,
def experiment(specs): if not specs['use_scripted_policy']: policy_is_scripted = False policy = joblib.load(specs['expert_path'])['policy'] else: policy_is_scripted = True policy = get_scripted_policy(specs['scripted_policy_name']) if specs['use_deterministic_expert']: policy = MakeDeterministic(policy) if ptu.gpu_enabled(): policy.to(ptu.device) env = get_env(specs['env_specs']) env.seed(specs['env_specs']['env_seed']) # make the replay buffers max_path_length = specs['max_path_length'] if 'wrap_absorbing' in specs and specs['wrap_absorbing']: """ There was an intial implementation for this in v1.0 in gen_irl_expert_trajs.py """ raise NotImplementedError() _max_buffer_size = (max_path_length + 2) * specs['num_rollouts'] else: _max_buffer_size = max_path_length * specs['num_rollouts'] _max_buffer_size = int( np.ceil(_max_buffer_size / float(specs['subsample_factor']))) buffer_constructor = lambda: EnvReplayBuffer( _max_buffer_size, env, ) train_buffer = buffer_constructor() test_buffer = buffer_constructor() render = specs['render'] render_kwargs = specs['render_kwargs'] check_for_success = specs['check_for_success'] print('\n') # fill the train buffer fill_buffer(train_buffer, env, policy, specs['num_rollouts'], max_path_length, no_terminal=specs['no_terminal'], policy_is_scripted=policy_is_scripted, render=render, render_kwargs=render_kwargs, check_for_success=check_for_success, wrap_absorbing=False, subsample_factor=specs['subsample_factor']) # fill the test buffer fill_buffer(test_buffer, env, policy, specs['num_rollouts'], max_path_length, no_terminal=specs['no_terminal'], policy_is_scripted=policy_is_scripted, render=render, render_kwargs=render_kwargs, check_for_success=check_for_success, wrap_absorbing=False, subsample_factor=specs['subsample_factor']) # save the replay buffers logger.save_extra_data({ 'train': train_buffer, 'test': test_buffer }, name='expert_demos.pkl') return 1