def experiment(variant): seed = variant['seed'] n_parallel = variant['n_parallel'] log_dir = variant['log_dir'] setup(seed, n_parallel, log_dir) fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 100 num_grad_updates = 1 meta_step_size = variant['mlr'] env = TfEnv(normalize(PointEnvRandGoal())) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=variant['hidden_sizes'], ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) algo.train()
def experiment(variant): fast_learning_rate = variant['fast_learning_rate'] fast_batch_size = variant['fast_batch_size'] meta_batch_size = variant['meta_batch_size'] max_path_length = variant['max_path_length'] num_grad_updates = variant['num_grad_updates'] meta_step_size = variant['meta_step_size'] env = TfEnv(BallEnv()) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) algo.train()
def main(): config = tf.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config): # env = TfEnv(normalize(GridWorldEnvRand('four-state'))) env = DummyVecEnv([make_env]) policy = MAMLCategoricalMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, prob_network = nature_cnn hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100,100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=800, use_maml=use_maml, step_size=meta_step_size, plot=False, ) run_experiment_lite( algo.train(), n_parallel=4, snapshot_mode="last", seed=1, exp_prefix='trpo_maml_4state', exp_name='trpo_maml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates), plot=False, )
def experiment(variant): seed = variant['seed'] n_parallel = variant['n_parallel'] log_dir = variant['log_dir'] setup(seed, n_parallel, log_dir) expertDataLoc = variant['expertDataLoc'] expertDataItr = variant['expertDataItr'] fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 150 num_grad_updates = 1 meta_step_size = variant['mlr'] regionSize = variant['regionSize'] if regionSize == '20X20': tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_v1.pkl' else: assert regionSize == '60X30' tasksFile = '/root/code/multiworld/multiworld/envs/goals/PickPlace_60X30.pkl' tasks = pickle.load(open(tasksFile, 'rb')) envType = variant['envType'] if envType == 'Push': baseEnv = SawyerPushEnv(tasks=tasks) else: assert (envType) == 'PickPlace' baseEnv = SawyerPickPlaceEnv(tasks=tasks) env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation'])) env = TfEnv(NormalizedBoxEnv(env)) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=variant['hidden_sizes'], ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, numExpertPolicies=20, expertDataInfo={ 'expert_loc': expertDataLoc, 'expert_itr': expertDataItr }) algo.train()
hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100,100), ) if bas == 'zero': baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=100, use_maml=use_maml, step_size=meta_step_size, plot=False, ) run_experiment_lite( algo.train(), n_parallel=1, snapshot_mode="last", python_command='python3', seed=1, exp_prefix='vpg_maml_point100', exp_name='trpomaml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates), plot=False,
baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=n_itr, use_maml=use_maml, use_pooled_goals=True, step_size=meta_step_size, plot=False, pre_std_modifier=pre_std_modifier, post_std_modifier_train=post_std_modifier_train, post_std_modifier_test=post_std_modifier_test, # goals_pool_to_load=R7DOF_GOALS_LOCATION, # goals_pickle_to=R7DOF_GOALS_LOCATION, # goals_pool_size=200, ) run_experiment_lite( algo.train(), n_parallel=1, #10, If you use more than 1, your std modifiers may not work snapshot_mode="last", python_command='python3',
policy = MAMLCategoricalMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=0.1, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=20, # number of trajs for grad update max_path_length=int(args.n), meta_batch_size=40, num_grad_updates=1, n_itr=args.iters, use_maml=True, step_size=0.01) run_experiment_lite( algo.train(), exp_prefix=args.expt_name, exp_name='run_{}'.format(args.seed), n_parallel=1, snapshot_mode="gap", snapshot_gap=20, python_command='python3', seed=args.seed,
baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline( env_spec=env.spec) else: baseline = GaussianMLPBaseline( env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size= fast_batch_size, # number of trajs for alpha grad update max_path_length=max_path_length, meta_batch_size= meta_batch_size, # number of tasks sampled for beta grad update num_grad_updates= num_grad_updates, # number of alpha grad updates n_itr=100, use_maml=use_maml, step_size=meta_step_size, plot=False, pre_std_modifier=pre_std_modifier, post_std_modifier_train=post_std_modifier_train, post_std_modifier_test=post_std_modifier_test) run_experiment_lite( algo.train(), n_parallel=1, snapshot_mode="last", python_command='python3', seed=seed, exp_prefix='maml_trpo_push100',
def experiment(variant): seed = variant['seed'] n_parallel = variant['n_parallel'] log_dir = variant['log_dir'] setup(seed, n_parallel, log_dir) fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 150 num_grad_updates = 1 meta_step_size = variant['mlr'] tasksFile = '/root/code/multiworld/multiworld/envs/goals/Door_60X20X20.pkl' tasks = pickle.load(open(tasksFile, 'rb')) baseEnv = SawyerDoorOpenEnv(tasks=tasks) env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation'])) env = TfEnv(NormalizedBoxEnv(env)) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=variant['hidden_sizes'], ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) # import os # saveDir = variant['saveDir'] # if os.path.isdir(saveDir)==False: # os.mkdir(saveDir) # logger.set_snapshot_dir(saveDir) # #logger.set_snapshot_gap(20) # logger.add_tabular_output(saveDir+'progress.csv') algo.train()
if bas == 'zero': baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for alpha grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, # number of tasks sampled for beta grad update num_grad_updates=num_grad_updates, # number of alpha grad updates n_itr=100, #100 use_maml=use_maml, step_size=meta_step_size, plot=False, pre_std_modifier=pre_std_modifier, post_std_modifier_train=post_std_modifier_train, post_std_modifier_test=post_std_modifier_test, meta_train_on_expert_traj=False, goals_pool_to_load=POINT_GOALS_LOCATION[".local"], # goals_pickle_to=POINT_GOALS_LOCATION[".local"], # goals_pool_size=1000, ) run_experiment_lite( algo.train(), n_parallel=1, snapshot_mode="last", python_command='python3', seed=1,
env = TfEnv(env) policy = MAMLGaussianMLPPolicy( # random policy name='policy', env_spec=env.spec, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=20, # number of trajs for grad update max_path_length=20, meta_batch_size=1, num_grad_updates=1, n_itr=n_itr, use_maml=True, step_size=step_sizes[step_i], plot=False, ) run_experiment_lite( algo.meta_online_train(), # Number of parallel workers for sampling n_parallel=2, # Only keep the snapshot parameters for the last iteration snapshot_mode="all", # Specifies the seed for the experiment. If this is not provided, a random seed # will be used
policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=0.1, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=20, # number of trajs for grad update max_path_length=200, meta_batch_size=40, num_grad_updates=1, n_itr=800, use_maml=bool(args.use_maml), step_size=0.01, plot=False, ) run_experiment_lite( algo.train(), exp_prefix=args.expt_name, exp_name='run_{}'.format(args.seed), n_parallel=8, # Only keep the snapshot parameters for the last iteration snapshot_mode="gap", snapshot_gap=25,
policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=v['fast_lr'], # learning rate of policy hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=v['fast_batch_size'], # Number of samples per iteration. max_path_length=max_path_length, # Maximum length of a single rollout. meta_batch_size=v[ 'meta_batch_size'], # Number of tasks sampled per meta-update num_grad_updates=num_grad_updates, # Number of fast gradient updates n_itr=n_itr, # Number of iterations. use_maml=use_maml, step_size=v['meta_step_size'], # learning rate of meta-update plot=False, ) direc = 'direc22' if direc else 'slope' run_experiment_lite( algo.train(), exp_prefix='trpo_maml_cheetah' + direc + str(max_path_length), exp_name='maml' + str(int(use_maml)) + '_fbs' + str(v['fast_batch_size']) + '_mbs' + str(v['meta_batch_size']) + '_flr_' + str(v['fast_lr']) + '_mlr' + str(v['meta_step_size']), # Number of parallel workers for sampling
baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=None, load_policy= "/home/rosen/maml_rl/data/local/PU-TR/PU_TRrelu.f0.0_081018_15_42/itr_799.pkl", baseline=baseline, batch_size=fast_batch_size, max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=n_itr, use_maml=use_maml, use_pooled_goals=True, step_size=meta_step_size, plot=False, pre_std_modifier=pre_std_modifier, post_std_modifier_train=post_std_modifier_train, post_std_modifier_test=post_std_modifier_test, goals_pool_to_load=PUSHER_GOALS_LOCATION, # goals_pickle_to=PUSHER_GOALS_LOCATION, goals_pool_size=200, ) run_experiment_lite( algo.train(), n_parallel= 1, #10, If you use more than 1, your std modifiers may not work snapshot_mode="all",
hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=500, use_maml=use_maml, step_size=meta_step_size, numExpertPolicies=20, num_imSteps=num_imSteps, expertDataInfo={ 'expert_loc': expertDataLoc, 'expert_itr': expertDataItr }, plot=False, ) """run_experiment_lite( algo.train(), n_parallel=4, snapshot_mode="all", # python_command='python3', seed=1,
policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=v['fast_lr'], hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=v['fast_batch_size'], # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=v['meta_batch_size'], num_grad_updates=num_grad_updates, n_itr=800, use_maml=use_maml, step_size=v['meta_step_size'], plot=False, ) direc = 'direc' if direc else '' run_experiment_lite( algo.train(), exp_prefix='trpo_maml_cheetah' + direc + str(max_path_length), exp_name='maml' + str(int(use_maml)) + '_fbs' + str(v['fast_batch_size']) + '_mbs' + str(v['meta_batch_size']) + '_flr_' + str(v['fast_lr']) + '_mlr' + str(v['meta_step_size']), # Number of parallel workers for sampling
def experiment(variant): seed = variant['seed'] tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 150 num_grad_updates = 1 meta_step_size = variant['mlr'] regionSize = variant['regionSize'] if regionSize == '20X20': tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_6_8.pkl' else: assert regionSize == '60X30' tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_60X30.pkl' tasks = pickle.load(open(tasksFile, 'rb')) envType = variant['envType'] if envType == 'Push': baseEnv = SawyerPushEnv(tasks=tasks) else: assert (envType) == 'PickPlace' baseEnv = SawyerPickPlaceEnv(tasks=tasks) env = FinnMamlEnv( FlatGoalEnv(baseEnv, obs_keys=['state_observation', 'state_desired_goal'])) env = TfEnv(NormalizedBoxEnv(env)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=None, load_policy=variant['init_param_file'], baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) import os saveDir = variant['saveDir'] if os.path.isdir(saveDir) == False: os.mkdir(saveDir) logger.set_snapshot_dir(saveDir) logger.add_tabular_output(saveDir + 'progress.csv') algo.train()