oracle = v['oracle'] if task_var == 0: task_var = 'direc' exp_prefix = 'bugfix_trpo_maml_antdirec' + str(max_path_length) if oracle: env = TfEnv(normalize(AntEnvDirecOracle())) else: env = TfEnv(normalize(AntEnvRandDirec())) elif task_var == 1: task_var = 'vel' exp_prefix = 'posticml_trpo_maml_ant' + str(max_path_length) if oracle: env = TfEnv(normalize(AntEnvOracle())) else: env = TfEnv(normalize(AntEnvRand())) elif task_var == 2: print("HERE") task_var = 'pos' exp_prefix = 'posticml_trpo_maml_antpos_' + str(max_path_length) if oracle: env = TfEnv(normalize(AntEnvRandGoalOracle())) else: env = TfEnv(normalize(AntEnvRandGoal())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), )
step_sizes = [0.1, 0.2, 1.0, 0.0] initial_params_files = [file1, file2, None, file3] all_avg_returns = [] for step_i, initial_params_file in zip(range(len(step_sizes)), initial_params_files): avg_returns = [] for goal in goals: if initial_params_file is not None and 'oracle' in initial_params_file: env = normalize(AntEnvOracle()) n_itr = 1 else: env = normalize(AntEnvRand()) n_itr = 4 env = TfEnv(env) policy = GaussianMLPPolicy( # random policy name='policy', env_spec=env.spec, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) if initial_params_file is not None: policy = None baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env,