def test_finite_difference_hvp_2x2_non_diagonal(self, a_val, b_val, x_val, y_val, vector): """Test Hessian-vector product for a function with two variables whose Hessian is non-diagonal. """ a_val = [a_val] b_val = [b_val] vector = np.array([vector], dtype=np.float32) policy = HelperPolicy(n_vars=2) params = policy.get_params() x, y = params[0], params[1] a = tf.constant(a_val) b = tf.constant(b_val) f = a * (x**3) + b * (y**3) + (x**2) * y + (y**2) * x expected_hessian = compute_hessian(f, [x, y]) expected_hvp = tf.matmul(vector, expected_hessian) reg_coeff = 1e-5 hvp = FiniteDifferenceHvp(base_eps=1) self.sess.run(tf.compat.v1.global_variables_initializer()) self.sess.run(x.assign([x_val])) self.sess.run(y.assign([y_val])) hvp.update_hvp(f, policy, (a, b), reg_coeff) hx = hvp.build_eval((np.array(a_val), np.array(b_val))) hvp = hx(vector[0]) expected_hvp = expected_hvp.eval() assert np.allclose(hvp, expected_hvp)
def test_finite_difference_hvp(self): """Test Hessian-vector product for a function with one variable.""" policy = HelperPolicy(n_vars=1) x = policy.get_params()[0] a_val = np.array([5.0]) a = tf.constant([0.0]) f = a * (x**2) expected_hessian = 2 * a_val vector = np.array([10.0]) expected_hvp = expected_hessian * vector reg_coeff = 1e-5 hvp = FiniteDifferenceHvp() self.sess.run(tf.compat.v1.global_variables_initializer()) hvp.update_hvp(f, policy, (a, ), reg_coeff) hx = hvp.build_eval(np.array([a_val])) computed_hvp = hx(vector) assert np.allclose(computed_hvp, expected_hvp)
def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner( snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv(normalize(ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args ))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)} algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler local_runner.setup( algo=algo, env=env, sampler_cls=sampler_cls, sampler_args={"open_loop": False, "sim": sim, "reward_function": reward_function, 'n_envs': n_parallel}) # Run the experiment local_runner.train(**runner_args)
def test_pickleable(self): policy = HelperPolicy(n_vars=1) x = policy.get_params()[0] a_val = np.array([5.0]) a = tf.constant([0.0]) f = a * (x**2) vector = np.array([10.0]) reg_coeff = 1e-5 hvp = FiniteDifferenceHvp() self.sess.run(tf.compat.v1.global_variables_initializer()) hvp.update_hvp(f, policy, (a, ), reg_coeff) hx = hvp.build_eval(np.array([a_val])) before_pickle = hx(vector) hvp = pickle.loads(pickle.dumps(hvp)) hvp.update_hvp(f, policy, (a, ), reg_coeff) after_pickle = hx(vector) assert np.equal(before_pickle, after_pickle)
if trial > args.trial_start: old_log_dir = args.log_dir + '/' + str(trial - 1) logger.pop_prefix() logger.remove_text_output(osp.join(old_log_dir, 'text.txt')) logger.remove_tabular_output(osp.join(old_log_dir, 'process.csv')) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.push_prefix("[" + args.exp_name + '_trial ' + str(trial) + "]") np.random.seed(trial) params = policy.get_params() sess.run(tf.variables_initializer(params)) baseline = LinearFeatureBaseline(env_spec=env.spec) optimizer = ConjugateGradientOptimizer optimizer_args = {'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5)} top_paths = BPQ.BoundedPriorityQueue(top_k) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=args.batch_size, step_size=args.step_size, n_itr=args.n_itr, store_paths=True, optimizer=optimizer, optimizer_args=optimizer_args, max_path_length=max_path_length, top_paths=top_paths, plot=False,
def run_task(snapshot_config, *_): config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): with LocalTFRunner(snapshot_config=snapshot_config, max_cpus=4, sess=sess) as local_runner: # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment if 'id' in env_args: env_args.pop('id') env = TfEnv( normalize( ASTEnv(simulator=sim, reward_function=reward_function, spaces=spaces, **env_args))) # Instantiate the garage objects policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) sampler_cls = ASTVectorizedSampler sampler_args['sim'] = sim sampler_args['reward_function'] = reward_function local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) # Run the experiment local_runner.train(**runner_args) if save_expert_trajectory: load_convert_and_save_drl_expert_trajectory( last_iter_filename=os.path.join( run_experiment_args['log_dir'], 'itr_' + str(runner_args['n_epochs'] - 1) + '.pkl'), expert_trajectory_filename=os.path.join( run_experiment_args['log_dir'], 'expert_trajectory.pkl')) print('done!')
def run_task(snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}) # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: with tf.variable_scope('AST', reuse=tf.AUTO_REUSE): # Instantiate the example classes sim = ExampleAVSimulator(**sim_args) reward_function = ExampleAVReward(**reward_args) spaces = ExampleAVSpaces(**spaces_args) # Create the environment # env1 = GoExploreASTEnv(open_loop=False, # blackbox_sim_state=True, # fixed_init_state=True, # s_0=[-0.5, -4.0, 1.0, 11.17, -35.0], # simulator=sim, # reward_function=reward_function, # spaces=spaces # ) env1 = gym.make(id=env_args.pop('id'), simulator=sim, reward_function=reward_function, spaces=spaces, **env_args) env2 = normalize(env1) env = TfEnv(env2) sampler_cls = BatchSampler # sampler_args = {'n_envs': n_parallel} sampler_args = {} # expert_trajectory_file = log_dir + '/expert_trajectory.p' # with open(expert_trajectory_file, 'rb') as f: # expert_trajectory = pickle.load(f) # # #Run backwards algorithm to robustify with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as local_runner: policy = GaussianLSTMPolicy(env_spec=env.spec, **policy_args) # name='lstm_policy', # env_spec=env.spec, # hidden_dim=64, # use_peepholes=True) baseline = LinearFeatureBaseline(env_spec=env.spec, **baseline_args) optimizer = ConjugateGradientOptimizer optimizer_args = { 'hvp_approach': FiniteDifferenceHvp(base_eps=1e-5) } algo = BackwardAlgorithm(env=env, env_spec=env.spec, policy=policy, baseline=baseline, optimizer=optimizer, optimizer_args=optimizer_args, **algo_args) # expert_trajectory=expert_trajectory[-1], # epochs_per_step = 10, # scope=None, # max_path_length=max_path_length, # discount=discount, # gae_lambda=1, # center_adv=True, # positive_adv=False, # fixed_horizon=False, # pg_loss='surrogate_clip', # lr_clip_range=1.0, # max_kl_step=1.0, # policy_ent_coeff=0.0, # use_softplus_entropy=False, # use_neg_logli_entropy=False, # stop_entropy_gradient=False, # entropy_method='no_entropy', # name='PPO', # ) local_runner.setup(algo=algo, env=env, sampler_cls=sampler_cls, sampler_args=sampler_args) results = local_runner.train(**runner_args) # pdb.set_trace() print('done') log_dir = run_experiment_args['log_dir'] with open(log_dir + '/paths.gz', 'wb') as f: try: compress_pickle.dump(results, f, compression="gzip", set_default_extension=False) except MemoryError: print('1') # pdb.set_trace() for idx, result in enumerate(results): with open( log_dir + '/path_' + str(idx) + '.gz', 'wb') as ff: try: compress_pickle.dump( result, ff, compression="gzip", set_default_extension=False) except MemoryError: print('2')