def get_variants(domain, task, policy): params = { 'prefix': '{}/{}'.format(domain, task), 'domain': domain, 'task': task, 'git_sha': get_git_rev(), 'env_params': ENV_PARAMS[domain].get(task, {}), 'policy_params': POLICY_PARAMS[policy][domain], 'value_fn_params': VALUE_FUNCTION_PARAMS, 'algorithm_params': deep_update( ALGORITHM_PARAMS_BASE, ALGORITHM_PARAMS[domain] ), 'replay_buffer_params': REPLAY_BUFFER_PARAMS, 'sampler_params': SAMPLER_PARAMS, 'run_params': deep_update(RUN_PARAMS_BASE, RUN_PARAMS[domain]), } # TODO: Remove flatten. Our variant generator should support nested params params = flatten(params, separator='.') vg = VariantGenerator() for key, val in params.items(): if isinstance(val, list) or callable(val): vg.add(key, val) else: vg.add(key, [val]) return vg
def build_nested_variant_generator(exp_spec): assert check_exp_spec_format(exp_spec) from rllab.misc.instrument import VariantGenerator variables = exp_spec['variables'] constants = exp_spec['constants'] # check if we're effectively just running a single experiment if variables is None: def vg_fn(): dict_to_yield = constants dict_to_yield.update(exp_spec['meta_data']) yield dict_to_yield return vg_fn variables = flatten_dict(variables) vg = VariantGenerator() for k, v in variables.items(): vg.add(k, v) def vg_fn(): for flat_variables in vg.variants(): dict_to_yield = add_variable_to_constant_specs( constants, flat_variables) dict_to_yield.update(exp_spec['meta_data']) del dict_to_yield['_hidden_keys'] yield dict_to_yield return vg_fn
def experiment(variant): # we have to generate the combinations for the env_specs env_specs = variant['env_specs'] env_specs_vg = VariantGenerator() env_spec_constants = {} for k, v in env_specs.items(): if isinstance(v, list): env_specs_vg.add(k, v) else: env_spec_constants[k] = v env_specs_list = [] for es in env_specs_vg.variants(): del es['_hidden_keys'] es.update(env_spec_constants) env_specs_list.append(es) print(env_specs_list) print(env_specs_list[0]) env_sampler = EnvSampler(env_specs_list) # set up similar to non-meta version sample_env, _ = env_sampler() if variant['algo_params']['concat_env_params_to_obs']: meta_params_dim = sample_env.env_meta_params.shape[0] else: meta_params_dim = 0 obs_dim = int(np.prod(sample_env.observation_space.shape)) action_dim = int(np.prod(sample_env.action_space.shape)) net_size = variant['net_size'] qf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + meta_params_dim, output_size=1, ) vf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + meta_params_dim, output_size=1, ) policy = TanhGaussianPolicy( hidden_sizes=[net_size, net_size], obs_dim=obs_dim + meta_params_dim, action_dim=action_dim, ) algorithm = MetaSoftActorCritic(env_sampler=env_sampler, policy=policy, qf=qf, vf=vf, **variant['algo_params']) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train() return 1
def get_variants(): params = SHARED_PARAMS variant_generator = VariantGenerator() for key, val in params.items(): if isinstance(val, list): variant_generator.add(key, val) else: variant_generator.add(key, [val]) return variant_generator
def get_variants(args): env_params = ENV_PARAMS[args.env] params = SHARED_PARAMS params.update(env_params) vg = VariantGenerator() for key, val in params.items(): if isinstance(val, list): vg.add(key, val) else: vg.add(key, [val]) return vg
def get_variants(args): env_params = ENV_PARAMS[args.env] params = SHARED_PARAMS params.update(env_params) vg = VariantGenerator() for key, val in params.items(): if isinstance(val, list): vg.add(key, val) else: vg.add(key, [val]) return vg
def main(): # Set up multiple experiments at once vg = VariantGenerator() vg.add('target_velocity', [0.7]) vg.add('seed', [100]) print('Number of Configurations: ', len(vg.variants())) # Run each experiment variant for vv in vg.variants(): run_experiment_lite(stub_method_call=run_task, variant=vv, n_parallel=1, snapshot_mode='last', seed=vv['seed'])
def get_variants(args): env_params = ENV_PARAMS[args.env] params = SHARED_PARAMS params.update(env_params) params.update({'seed': args.seed}) if args.num_skills is not None: params.update({'num_skills': args.num_skills}) if args.eval_freq is not None: params.update({'eval_freq': args.eval_freq}) vg = VariantGenerator() for key, val in params.items(): if isinstance(val, list): vg.add(key, val) else: vg.add(key, [val]) return vg
def get_variants(args): env_params = ENV_PARAMS[args.env] params = COMMON_PARAMS params.update(env_params) if args.mode == 'local': trained_policies_base = os.path.join(os.getcwd(), 'sac/policies/trained_policies') elif args.mode == 'ec2': trained_policies_base = '/root/code/rllab/sac/policies/trained_policies' params['low_level_policy_path'] = [ os.path.join(trained_policies_base, p) for p in params['low_level_policy_path'] ] vg = VariantGenerator() for key, val in params.items(): if isinstance(val, list): vg.add(key, val) else: vg.add(key, [val]) return vg
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', ['HalfCheetahEnvRandParams' ]) # HalfCheetahEnvRandParams #TODO ReacherEnvRandParams vg.add('n_itr', [40]) vg.add('log_scale_limit', [0.0]) vg.add('step_size', [0.01]) vg.add('seed', [22, 33, 55]) #TODO set back to [1, 11, 21, 31, 41] vg.add('discount', [0.99]) vg.add('path_length', [100]) vg.add('batch_size_env_samples', [4000]) vg.add('batch_size_dynamics_samples', [100000]) vg.add('initial_random_samples', [None]) vg.add('dynamic_model_epochs', [(1000, 1000)]) #TODO vg.add('num_gradient_steps_per_iter', [30]) #TODO vg.add('hidden_nonlinearity_policy', ['tanh']) vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_policy', [(32, 32)]) vg.add('hidden_sizes_model', [(512, 512)]) vg.add('weight_normalization_model', [False]) vg.add('retrain_model_when_reward_decreases', [False]) vg.add('reset_policy_std', [False]) vg.add('reinit_model_cycle', [0]) vg.add('num_models', [5]) vg.add('output_bias_range', [(0, 0.1), (0, 0.5), (0, 1)]) vg.add('output_noise_std', [0.0, 0.1]) vg.add('resample_output_bias', [True, False]) vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=10, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) if args.mode == 'mgpu': current_path = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(current_path, 'mgpu_model_ensemble_trpo_train.py') n_gpu = args.n_gpu if n_gpu == 0: n_gpu = len(os.listdir('/proc/driver/nvidia/gpus')) run_multi_gpu(script_path, default_dict, n_gpu=n_gpu, ctx_per_gpu=args.ctx) else: # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 else: n_parallel = 6 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print( "\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_trpo_train_env_%s_%i_%i_%i_%i_id_%i" % ( v['env'], v['path_length'], v['num_gradient_steps_per_iter'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command='python3', pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('env', ['WalkerEnvRandomParams', 'HopperEnvRandParams']) vg.add('n_itr', [301]) vg.add('fast_lr', [0.001, 0.01, 0.1]) vg.add('meta_batch_size', [40]) vg.add('num_grad_updates', [1]) vg.add('meta_step_size', [0.01]) vg.add('fast_batch_size', [20]) vg.add('seed', [1, 11, 21]) vg.add('discount', [0.99]) vg.add('path_length', [100]) vg.add('hidden_nonlinearity', ['tanh']) vg.add('hidden_sizes', [(64, 64)]) vg.add('trainable_step_size', [False]) vg.add('bias_transform', [False]) vg.add('policy', ['MAMLGaussianMLPPolicy']) vg.add('parallel_sampler', [True]) variants = vg.variants() # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': subnets = cheapest_subnets(ec2_instance, num_subnets=3) info = config.INSTANCE_TYPE_INFO[ec2_instance] config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) if args.mode == 'ec2': n_parallel = 1 # for MAML use smaller number of parallel worker since parallelization is also done over the meta batch size else: n_parallel = 1 # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "%s_%s_%i_%.3f_%i_id_%i" % ( EXP_PREFIX, v['env'], v['hidden_sizes'][0], v['meta_step_size'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': # configure instance subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="last_gap", snapshot_gap=50, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "yes | pip install --upgrade cloudpickle" ], seed=v["seed"], python_command="python3", mode=args.mode, use_cloudpickle=True, variant=v, )
def main(): global policy global baseline # Load pre-trained network if available args = parse_arguments() if args.network is not None: data = joblib.load(args.network) policy = data['policy'] baseline = data['baseline'] use_pretrained = True else: use_pretrained = False # Set up multiple experiments at once vg = VariantGenerator() # Non-configurable parameters (do not change) vg.add('trajectory', ['Circle']) vg.add('objective', ['TargetVelocity']) if args.algo == 'trpo': vg.add('algo', ['TRPO']) else: vg.add('algo', ['CPO']) # Configurable parameters # Options for model_type: 'BrushTireModel', 'LinearTireModel' # Options for robot_type: 'MRZR', 'RCCar' robot_type = 'RCCar' use_ros = False seeds = [100, 200] vg.add('seed', seeds) vg.add('target_velocity', [1.0]) vg.add('radius', [1.0]) vg.add('dt', [0.1]) vg.add('eps', [0.05]) vg.add('model_type', ['BrushTireModel']) vg.add('robot_type', [robot_type]) vg.add('use_ros', [use_ros]) vg.add('pretrained', [use_pretrained]) print('Number of Configurations: ', len(vg.variants())) # Run each experiment variant for vv in vg.variants(): run_experiment_lite(stub_method_call=run_task, variant=vv, n_parallel=4, snapshot_mode='last', seed=vv['seed'])
report.add_text( 'Outer iteration: {}, disc loss: {}, gen loss: {}'.format( outer_iter, dloss, gloss)) generated_samples, _ = gan.sample_generator(50) report.add_image(plot_samples(generated_samples)) report.add_image(plot_dicriminator(gan)) report.save() if __name__ == '__main__': vg = VariantGenerator() # vg.add('generator_init', ['xavier', 0.02, 0.1, 0.005]) # vg.add('generator_iters', [40, 20, 5, 2]) # vg.add('discriminator_iters', [20, 5, 1]) # vg.add('generator_learning_rate', [0.0003, 0.001, 0.003, 0.01, 0.1]) # vg.add('discriminator_learning_rate', [0.0003, 0.001, 0.003, 0.01, 0.1]) vg.add('outer_iters', [500]) for variant in vg.variants(randomized=False): run_experiment_lite( stub_method_call=run_task, mode='local', n_parallel=1, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", seed=int(time.time()), exp_prefix='debug_simple_circle_gan', variant=variant, # exp_name=exp_name, )
n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'maze-ant-goal-selfplay2' vg = VariantGenerator() vg.add('goal_size', [ 2 ]) # this is the ultimate goal we care about: getting the pendulum upright vg.add('terminal_eps', [0.5]) # vg.add('only_feasible', [True]) vg.add('goal_range', [5]) # this will be used also as bound of the state_space vg.add('goal_center', [(0, 0)]) # goal-algo params vg.add('min_reward', [0.1]) vg.add('max_reward', [0.9]) vg.add('distance_metric', ['L2']) vg.add('extend_dist_rew', [False]) # !!!! vg.add('persistence', [1]) vg.add( 'n_traj', [3] ) # only for labeling and plotting (for now, later it will have to be equal to persistence!)
env, seed_starts, report, distance_threshold=0.1, brownian_variance=1, size=8000, animate=True, limit=v['goal_range'], check_feasible=True, check_feasible_path_length=500, center=v['goal_center']) return vg = VariantGenerator() vg.add('seed', [2]) vg.add('maze_id', [0]) # default is 0 vg.add('terminal_eps', [0.3]) vg.add('start_size', [ 15 ]) # this is the ultimate start we care about: getting the pendulum upright vg.add('start_goal', [[ 0, 4, 0.55, 1, 0, 0, 0, 0, 1,
n_parallel = int(info["vCPU"] / 2) if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'new-oracle-maze-ant' vg = VariantGenerator() vg.add('goal_size', [ 2 ]) # this is the ultimate goal we care about: getting the pendulum upright vg.add('terminal_eps', [0.5, 1]) vg.add('only_feasible', [True]) vg.add('maze_id', [0]) # default is 0 vg.add('goal_range', lambda maze_id: [5] if maze_id == 0 else [7] ) # this will be used also as bound of the state_space vg.add('goal_center', lambda maze_id: [(2, 2)] if maze_id == 0 else [(0, 0)]) # goal-algo params vg.add('min_reward', [0]) vg.add('max_reward', [1]) vg.add('distance_metric', ['L2']) vg.add('extend_dist_rew', [False]) vg.add('persistence', [1]) vg.add(
if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() #exp_prefix = 'new-goalGAN-maze1' exp_prefix = 'goal-gan-maze11-run6' vg = VariantGenerator() vg.add('goal_size', [ 2 ]) # this is the ultimate goal we care about: getting the pendulum upright vg.add('terminal_eps', [0.3]) vg.add('only_feasible', [True]) vg.add('maze_id', [11]) vg.add('goal_range', lambda maze_id: [5] if maze_id == 0 else [7] ) # this will be used also as bound of the state_space vg.add('goal_center', lambda maze_id: [(2, 2)] if maze_id == 0 else [(0, 0)]) # goal-algo params vg.add('min_reward', [0]) vg.add('max_reward', [1]) vg.add('distance_metric', ['L2']) vg.add('extend_dist_rew', [False]) # !!!! vg.add('persistence', [1]) vg.add(
config.AWS_INSTANCE_TYPE = ec2_instance # config.AWS_SPOT_PRICE = str(info["price"]) config.AWS_SPOT_PRICE = '1.0' n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() vg = VariantGenerator() vg.add('maze_id', [0]) # default is 0 vg.add( 'start_size', [15] ) # this is the ultimate start we care about: getting the pendulum upright vg.add('start_goal', [[ 0, 4, 0.55, 1, 0, 0, 0, 0, 1, 0, -1,
n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'start-selfplay-arm3d-key8' vg = VariantGenerator() vg.add( 'start_size', [7] ) # this is the ultimate start we care about: getting the pendulum upright vg.add('start_bounds', [[(-2.2854, -.05236, -3.9, -2.3213, -3.15, -2.094, -3.15), (1.714602, 1.3963, 0.0, 0.0, 3.15, 0.0, 3.15)]]) # vg.add('start_goal', [(1.55, 0.4, -3.75, -1.15, -10.75, -2.09, 0.05)]) vg.add('start_goal', [(1.55, 0.4, -3.75, -1.15, 1.81, -2.09, 0.05)]) vg.add( 'ultimate_goal', [( 0.0, 0.3, -0.7, # first point 0.0, 0.3, -0.4, # second point
def main(train_bool, manual_edit_params, load_saved_params, saved_config_path): ################################# ######## Set parameters ######### ################################# #read in default config config = yaml.load(open("../config.yaml")) if load_saved_params: saved_config = yaml.load(open(saved_config_path, "r")) #IPython.embed() if train_bool: # replcae training config only config["training"] = recursive_dict_merge(config["training"], saved_config["training"]) else: # replace testing config only config["testing"] = recursive_dict_merge(config["testing"], saved_config["testing"]) IPython.embed() vg = VariantGenerator() vg.add('config', [config]) if manual_edit_params: # For testing, you must fill out: vg.add('previous_dynamics_model', [ "/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_11_optimization/_ubs_23_ulr_0.0num_updates1_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/model_aggIter0_epoch45" ]) #vg.add('previous_dynamics_model', ["/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_7_optimization/_ubs_23_ulr_2.0num_updates2_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/model_aggIter0_epoch45"]) vg.add('restore_previous_dynamics_model', [True]) # For testing, please customize these: vg.add('num_steps_per_rollout', [110]) vg.add('desired_shape_for_rollout', ["straight"]) vg.add('save_rollout_run_num', [1]) vg.add('dynamic_evaluation', [True]) #####~!!!!!!!!!!!!!!!!!! vg.add('meta_batch_size', [64]) vg.add('meta_lr', [0.001]) vg.add('update_batch_size', [23]) vg.add('max_runs_per_surface', [5]) #396 vg.add('num_updates', [1]) vg.add('update_lr', [0.1]) vg.add("task_list", [["all"]]) #"all" vg.add('max_epochs', [50]) vg.add('num_sgd_steps', [1]) # Aggregation vg.add('ratio_new', [0.9]) vg.add('curr_agg_iter', [0]) #0, 1, 2, etc # Misc vg.add('horizon', [5]) # vg.add('use_reg', [True]) vg.add('seed', [0]) vg.add('nonlinearity', ['relu']) if config['training']['use_reg']: vg.add('regularization_weight', [0.001]) vg.add('use_clip', [True]) vg.add("weight_initializer", ["xavier"]) vg.add("dim_hidden", [[500, 500]]) vg.add('optimizer', ["adam"]) vg.add('dim_bias', [5]) vg.add('use_momentum', [False]) vg.add('learn_inner_loss', [False]) for v in vg.variants(): time.sleep(1.) if manual_edit_params: _v = v.copy() del _v['config'], _v['_hidden_keys'] v['config'] = replace_in_dict(v['config'], _v) if train_bool: # Want the testing parameters to match the training parameters, so you can easily load this saved config for testing v['config']['testing'] = recursive_dict_merge( v['config']['testing'], v['config']['training']) # Example foldername if training #v['exp_name'] = "MAML_roach/9_11_optimization/" + "_ubs_" + str(v['config']['training']['update_batch_size']) + "_ulr_" + str(v['config']['training']['update_lr']) + "num_updates" + str(v['config']['training']['num_updates']) + "_layers_" + str(len(v['config']['model']['dim_hidden'])) + "_x" + str((v['config']['model']['dim_hidden'])[0]) + "_task_list_" + "_".join(v['config']['training']['task_list']) + "_mlr_" + str(v['config']['training']['meta_lr']) + "_mbs_" + str(v['config']['testing']['meta_batch_size']) + "_num-sgd-steps_" + str(v['config']['training']['num_sgd_steps']) + '_reg_weight_' + str(v['config']['training']['regularization_weight']) + "_dim_bias_" + str(v['config']['model']['dim_bias']) # Example foldername if testing (i.e. you can place the rollouts in the same folder as the model used) #v['exp_name'] = "/home/anagabandi/rllab-private/data/local/experiment/MAML_roach/9_11_optimization/_ubs_23_ulr_0.0num_updates1_layers_2_x500_task_list_turf_styrofoam_carpet_mlr_0.001_mbs_64_num-sgd-steps_1_reg_weight_0.001_dim_bias_5_metatrain_lr_False/video" v['exp_name'] = "/home/anagabandi/roach_workspace/src/gbac_roach/videos/de/shell_shift" v['train_bool'] = train_bool run_experiment_lite( run, sync_s3_pkl=True, periodic_sync=True, variant=v, snapshot_mode="all", mode="local", use_cloudpickle=True, exp_name=v['exp_name'], use_gpu=False, pre_commands=[ #"yes | pip install --upgrade pip", "yes | pip install tensorflow=='1.4.1'", "yes | pip install --upgrade cloudpickle" ], seed=v['config']['seed'])
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('n_itr', [5000]) vg.add('fixed_gains', [True, False]) vg.add('stability_cost_coef', [0.0, 0.01]) vg.add('ctrl_cost_coef', [0, 0.0005, 0.001, 0.005]) vg.add('alive_bonus', [0, 1]) vg.add('step_size', [0.02]) vg.add('seed', [1, 11]) vg.add('discount', [0.99]) vg.add('path_length', [200]) vg.add('batch_size', [50000]) vg.add('hidden_nonlinearity', ['tanh']) vg.add('hidden_sizes', [(64, 64)]) variants = vg.variants() # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = info['vCPU'] else: n_parallel = 12 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) print("\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "trpo_train_cassie_mujoco_%.3f_%i_%i_id_%i" % ( v['step_size'], v['batch_size'], v['seed'], exp_id) v = instantiate_class_stings(v) subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], #sync_all_data_node_to_s3=True, python_command="python3", pre_commands=[ "yes | pip install --upgrade pip", "yes | pip install tensorflow=='1.6.0'", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
config.AWS_SPOT_PRICE = str(info["price"]) n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'start-oracle-maze-debugged' vg = VariantGenerator() vg.add('maze_id', [11]) # default is 0, spiral is 11 vg.add('start_size', [4]) vg.add('start_range', lambda maze_id: [4] if maze_id == 0 else [7]) # this will be used also as bound of the state_space vg.add('start_center', lambda maze_id, start_size: [(2, 2)] if maze_id == 0 and start_size == 2 else [(2, 2, 0, 0)] if maze_id == 0 and start_size == 4 else [(0, 0)] if start_size == 2 else [(0, 0, 0, 0)]) vg.add('ultimate_goal', lambda maze_id: [(0, 4)] if maze_id == 0 else [(2, 4), (0, 0)] if maze_id == 12 else [(4, 4)]) vg.add('goal_size', [2]) # this is the ultimate goal we care about: getting the pendulum upright vg.add('terminal_eps', [0.3]) vg.add('only_feasible', [True]) vg.add('goal_range', lambda maze_id: [4] if maze_id == 0 else [7]) # this will be used also as bound of the state_space vg.add('goal_center', lambda maze_id: [(2, 2)] if maze_id == 0 else [(0, 0)]) # goal-algo params
n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() #exp_prefix = 'start-selfplay-maze0-run7' exp_prefix = 'start-selfplay-maze11-run18' vg = VariantGenerator() vg.add('maze_id', [11]) # default is 0 #vg.add('maze_id', [0]) # default is 0 vg.add( 'start_size', [2] ) # this is the ultimate start we care about: getting the pendulum upright vg.add('start_range', lambda maze_id: [4] if maze_id == 0 else [7] ) # this will be used also as bound of the state_space # vg.add('start_center', lambda maze_id: [(2, 2)] if maze_id == 0 else [(0, 0)]) vg.add( 'start_center', lambda maze_id, start_size: [(2, 2)] if maze_id == 0 and start_size == 2 else [(2, 2, 0, 0)] if maze_id == 0 and start_size == 4 else [(0, 0)] if start_size == 2 else [(0, 0, 0, 0)]) ultimate_goal = lambda maze_id: [(0, 4)] if maze_id == 0 else [( 2, 4), (0, 0)] if maze_id == 12 else [(4, 4)]
n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'start-trpo-unif-pr2key-bigBS-rad2' vg = VariantGenerator() vg.add( 'start_size', [7] ) # this is the ultimate start we care about: getting the pendulum upright vg.add( 'start_bounds', #TODO: get this from the env [[(-2.2854, -.05236, -3.9, -2.3213, -3.15, -2.094, -3.15), (1.714602, 1.3963, 0.0, 0.0, 3.15, 0.0, 3.15)]]) vg.add('start_goal', [(1.55, 0.4, -3.75, -1.15, 1.81, -2.09, 0.05)]) vg.add( 'start_out', [ # (1.55, 0.4, -3.75, -1.15, 1.81, -2.09, 0.05), # (0.57986085, 0.24922906, -2.09131438, -1.69772732, -0.00931115, -0.69625297, 1.0060919), # (0.68199678, 0.22109899, -2.21208568, -2.09289934, -0.16548432, - 0.05642514, 0.09468899), (0.36443675, 0.75535443, -1.88521387, -2.10336795, 2.56779867, -0.44073149, -1.70834555) ])
else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 default_prefix = 'point-nd-goal-sagg-riac' if args.prefix is None: exp_prefix = format_experiment_prefix(default_prefix) elif args.prefix == '': exp_prefix = default_prefix else: exp_prefix = '{}_{}'.format(default_prefix, args.prefix) vg = VariantGenerator() # # GeneratorEnv params #vg.add('goal_size', [2, 3, 4, 5, 6]) vg.add('goal_size', [2]) vg.add('terminal_eps', lambda goal_size: [math.sqrt(goal_size) / math.sqrt(2) * 0.3]) vg.add('only_feasible', [True]) vg.add('goal_range', [5]) # this will be used also as bound of the state_space vg.add( 'state_bounds', lambda goal_range, goal_size, terminal_eps: [(1, goal_range) + (0.3, ) * (goal_size - 2) + (goal_range, ) * goal_size]) vg.add('distance_metric', ['L2']) vg.add('extend_dist_rew', [False]) # !!!! vg.add('goal_weight', [1]) ############################################# # goal-algo params vg.add('min_reward', lambda goal_weight: [goal_weight * 0.1]
config.AWS_SPOT_PRICE = str(info["price"]) n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'gan-ant-goal2' vg = VariantGenerator() vg.add('goal_size', [2]) # this is the ultimate goal we care about: getting the pendulum upright vg.add('terminal_eps', [0.5]) # vg.add('only_feasible', [True]) vg.add('goal_range', [5]) # this will be used also as bound of the state_space vg.add('goal_center', [(0, 0)]) # goal-algo params vg.add('min_reward', [0.1]) vg.add('max_reward', [0.9]) vg.add('distance_metric', ['L2']) vg.add('extend_dist_rew', [False]) vg.add('persistence', [1]) vg.add('n_traj', [3]) # only for labeling and plotting (for now, later it will have to be equal to persistence!) vg.add('with_replacement', [False]) vg.add('smart_init', [True]) vg.add('label_with_variation', [False]) vg.add('use_trpo_paths', lambda label_with_variation: [False] if label_with_variation else [False])
n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'new2-goal-sagg-riac-maze-ant' vg = VariantGenerator() vg.add('goal_size', [ 2 ]) # this is the ultimate goal we care about: getting the pendulum upright vg.add('terminal_eps', [1]) vg.add('only_feasible', [True]) vg.add('maze_id', [0]) vg.add('goal_range', lambda maze_id: [5] if maze_id == 0 else [7] ) # this will be used also as bound of the state_space vg.add('goal_center', lambda maze_id: [(2, 2)] if maze_id == 0 else [(0, 0)]) # goal-algo params vg.add('min_reward', [0]) vg.add('max_reward', [1]) vg.add('distance_metric', ['L2']) vg.add('extend_dist_rew', [False]) # !!!! vg.add('use_competence_ratio', [False]) # !!!! vg.add('goal_weight', lambda extend_dist_rew: [0]
config.AWS_SPOT_PRICE = str(info["price"]) n_parallel = int(info["vCPU"] / 2) # make the default 4 if not using ec2 if args.ec2: mode = 'ec2' elif args.local_docker: mode = 'local_docker' n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 # n_parallel = multiprocessing.cpu_count() exp_prefix = 'start-brownian-arm3d-key-largeBS-allStartsNoFilter' vg = VariantGenerator() vg.add('start_size', [7]) # this is the ultimate start we care about: getting the pendulum upright vg.add('start_goal', lambda start_size: [(1.55, 0.4, -3.75, -1.15, 1.81, -2.09, 0.05)] if start_size == 7 else [(1.55, 0.4, -3.75, -1.15, 1.81, -2.09, 0.05, 0, 0, 0, 0, 0, 0, 0)]) vg.add('ultimate_goal', [(0.0, 0.3, -0.7, # first point --> hill 0.0, 0.3, -0.4, # second point --> top -0.15, 0.3, -0.55)]) # third point --> side vg.add('goal_size', [9]) vg.add('kill_radius', [None]) vg.add('terminal_eps', [0.03]) vg.add('ctrl_cost_coeff', [0]) # brownian params vg.add('seed_with', ['all_previous']) # good from brown, onPolicy, previousBrown (ie no good) # vg.add('seed_with', ['only_goods']) # good from brown, onPolicy, previousBrown (ie no good) vg.add('brownian_horizon', lambda seed_with: [50] if seed_with == 'on_policy' else [50]) vg.add('brownian_variance', [1])
n_parallel = cpu_count() if not args.debug else 1 else: mode = 'local' n_parallel = cpu_count() if not args.debug else 1 default_prefix = 'point-nd-goal-gan' if args.prefix is None: exp_prefix = format_experiment_prefix(default_prefix) elif args.prefix == '': exp_prefix = default_prefix else: exp_prefix = '{}_{}'.format(default_prefix, args.prefix) vg = VariantGenerator() # # GeneratorEnv params vg.add('goal_size', [2, 3, 4, 5, 6]) vg.add('terminal_eps', lambda goal_size: [math.sqrt(goal_size) / math.sqrt(2) * 0.3]) vg.add('only_feasible', [True]) vg.add('goal_range', [5]) # this will be used also as bound of the state_space vg.add( 'state_bounds', lambda goal_range, goal_size, terminal_eps: [(1, goal_range) + (0.3, ) * (goal_size - 2) + (goal_range, ) * goal_size]) vg.add('distance_metric', ['L2']) vg.add('goal_weight', [1]) ############################################# # goal-algo params vg.add('min_reward', lambda goal_weight: [goal_weight * 0.1] ) # now running it with only the terminal reward of 1!
def run_experiment(argv): # -------------------- Parse Arguments ----------------------------------- parser = argparse.ArgumentParser() parser.add_argument( '--mode', type=str, default='local', help='Mode for running the experiments - local: runs on local machine, ' 'ec2: runs on AWS ec2 cluster (requires a proper configuration file)') parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs') parser.add_argument('--ctx', type=int, default=4, help='Number of tasks per GPU') args = parser.parse_args(argv[1:]) # -------------------- Define Variants ----------------------------------- vg = VariantGenerator() vg.add('seed', [22, 23, 24]) # env spec vg.add('env', ['WalkerEnvRandomParams']) vg.add('log_scale_limit', [0.0]) vg.add('target_velocity', [None]) vg.add('path_length_env', [200]) # Model-based MAML algo spec vg.add('n_itr', [500]) vg.add('fast_lr', [0.001]) vg.add('outer_lr', [1e-3]) vg.add('meta_batch_size', [20]) # must be a multiple of num_models vg.add('discount', [0.99]) vg.add('entropy_bonus', [0]) vg.add('clip_eps', [0.5, 0.7]) vg.add('target_inner_step', [3e-3, 1e-2, 3e-2]) vg.add('init_kl_penalty', [1e-10]) vg.add('adaptive_kl_penalty', [True]) vg.add('max_epochs', [8]) vg.add('num_batches', [1]) vg.add('batch_size_env_samples', [1]) vg.add('batch_size_dynamics_samples', [50]) vg.add('initial_random_samples', [5000]) vg.add('num_maml_steps_per_iter', [5, 15, 25]) vg.add('retrain_model_when_reward_decreases', [False]) vg.add('reset_from_env_traj', [False]) vg.add('trainable_step_size', [False]) vg.add('num_models', [5]) # neural network configuration vg.add('hidden_nonlinearity_policy', ['tanh']) vg.add('hidden_nonlinearity_model', ['relu']) vg.add('hidden_sizes_policy', [(32, 32)]) vg.add('hidden_sizes_model', [(512, 512, 512)]) vg.add('weight_normalization_model', [True]) vg.add('reset_policy_std', [False]) vg.add('reinit_model_cycle', [0]) vg.add('optimizer_model', ['adam']) vg.add('policy', ['MAMLImprovedGaussianMLPPolicy']) vg.add('bias_transform', [False]) vg.add('param_noise_std', [0.0]) vg.add('dynamic_model_max_epochs', [(500, 500)]) vg.add('valid_split_ratio', [0.2]) vg.add('rolling_average_persitency', [0.95]) # other stuff vg.add('exp_prefix', [EXP_PREFIX]) variants = vg.variants() default_dict = dict(exp_prefix=EXP_PREFIX, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], use_cloudpickle=True, variants=variants) if args.mode == 'mgpu': current_path = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(current_path, 'gpu-mb-mpo-train.py') n_gpu = args.n_gpu if n_gpu == 0: n_gpu = len(os.listdir('/proc/driver/nvidia/gpus')) run_multi_gpu(script_path, default_dict, n_gpu=n_gpu, ctx_per_gpu=args.ctx) else: # ----------------------- AWS conficuration --------------------------------- if args.mode == 'ec2': info = config.INSTANCE_TYPE_INFO[ec2_instance] n_parallel = int(info["vCPU"]) else: n_parallel = 12 if args.mode == 'ec2': config.AWS_INSTANCE_TYPE = ec2_instance config.AWS_SPOT_PRICE = str(info["price"]) subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS) print( "\n" + "**********" * 10 + "\nexp_prefix: {}\nvariants: {}".format('PPO', len(variants))) print( 'Running on type {}, with price {}, on the subnets: '.format( config.AWS_INSTANCE_TYPE, config.AWS_SPOT_PRICE, ), str(subnets)) # ----------------------- TRAINING --------------------------------------- exp_ids = random.sample(range(1, 1000), len(variants)) for v, exp_id in zip(variants, exp_ids): exp_name = "model_ensemble_maml_train_env_%s_%i_%i_%i_%i_id_%i" % ( v['env'], v['path_length_env'], v['num_models'], v['batch_size_env_samples'], v['seed'], exp_id) v = instantiate_class_stings(v) if args.mode == 'ec2': subnet = random.choice(subnets) config.AWS_REGION_NAME = subnet[:-1] config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[ config.AWS_REGION_NAME] config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[ config.AWS_REGION_NAME] config.AWS_SECURITY_GROUP_IDS = \ config.ALL_REGION_AWS_SECURITY_GROUP_IDS[ config.AWS_REGION_NAME] run_experiment_lite( run_train_task, exp_prefix=EXP_PREFIX, exp_name=exp_name, # Number of parallel workers for sampling n_parallel=n_parallel, snapshot_mode="gap", snapshot_gap=5, periodic_sync=True, sync_s3_pkl=True, sync_s3_log=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], python_command="python3", pre_commands=[ "yes | pip install tensorflow=='1.6.0'", "pip list", "yes | pip install --upgrade cloudpickle" ], mode=args.mode, use_cloudpickle=True, variant=v, )
def experiment(variant): # we have to generate the combinations for the env_specs if variant['on_the_fly']: # we have to generate the combinations for the env_specs env_specs = variant['env_specs'] env_sampler = OnTheFlyEnvSampler(env_specs) else: env_specs = variant['env_specs'] env_specs_vg = VariantGenerator() env_spec_constants = {} env_spec_ranges = {} for k, v in env_specs.items(): if isinstance(v, list): env_specs_vg.add(k, v) env_spec_ranges[k] = v else: env_spec_constants[k] = v env_specs_list = [] for es in env_specs_vg.variants(): del es['_hidden_keys'] es.update(env_spec_constants) env_specs_list.append(es) env_sampler = EnvSampler(env_specs_list) # set up the neural process np_path = exp_specs['neural_process_load_path'] if np_path == '': raise NotImplementedError() else: neural_process = joblib.load(np_path)['neural_process'] # set up similar to non-meta version sample_env, _ = env_sampler() obs_dim = int(np.prod(sample_env.observation_space.shape)) action_dim = int(np.prod(sample_env.action_space.shape)) if variant['algo_params']['latent_repr_mode'] == 'concat_params': extra_obs_dim = 2 * neural_process.z_dim else: # concat samples extra_obs_dim = variant['algo_params']['num_latent_samples'] * neural_process.z_dim net_size = variant['net_size'] vf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + extra_obs_dim, output_size=1, ) policy = TanhGaussianPolicy( hidden_sizes=[net_size, net_size], obs_dim=obs_dim + extra_obs_dim, action_dim=action_dim, ) qf = FlattenMlp( hidden_sizes=[net_size, net_size], input_size=obs_dim + action_dim + extra_obs_dim, output_size=1, ) algorithm = NPMetaSoftActorCritic( env_sampler=env_sampler, neural_process=neural_process, policy=policy, qf=qf, vf=vf, **variant['algo_params'] ) if ptu.gpu_enabled(): algorithm.cuda() algorithm.train() return 1