def launch_experiments(variant_generator): variants = variant_generator.variants() num_experiments = len(variants) print('Launching {} experiments.'.format(num_experiments)) for i, variant in enumerate(variants): if variant['seed'] == 'random': variant['seed'] = np.random.randint(1, 100) print("Experiment: {}/{}".format(i, num_experiments)) experiment_prefix = variant['prefix'] + '/' + args.exp_name experiment_name = (variant['prefix'] + '-' + args.exp_name + '-' + str(i).zfill(2)) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, seed=variant['seed'], terminate_machine=True, log_dir=args.log_dir, snapshot_mode=variant['snapshot_mode'], snapshot_gap=variant['snapshot_gap'], sync_s3_pkl=variant['sync_pkl'], ) sys.exit()
def launch_experiments(variant_generator): variants = variant_generator.variants() num_experiments = len(variants) print('Launching {} experiments.'.format(num_experiments)) exp_name = 'acer_test1' for i, variant in enumerate(variants): print("Experiment: {}/{}".format(i, num_experiments)) experiment_prefix = 'acer_baselines_final/' + variant['env'] + '/' experiment_name = (exp_name + '-' + str(i).zfill(2)) run_sac_experiment( run_acer, mode='local', variant=variant, exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, seed=variant['seed'], terminate_machine=True, snapshot_mode='last', snapshot_gap=1000, sync_s3_pkl=False, )
def launch_experiments(variant_generator, args): variants = variant_generator.variants() # TODO: Remove unflatten. Our variant generator should support nested params variants = [unflatten(variant, separator='.') for variant in variants] num_experiments = len(variants) print('Launching {} experiments.'.format(num_experiments)) for i, variant in enumerate(variants): print("Experiment: {}/{}".format(i, num_experiments)) run_params = variant['run_params'] algo_params = variant['algorithm_params'] variant['algorithm_params']['scale'] = args.scale variant['algorithm_params']['cost_type'] = args.cost_type experiment_prefix = variant['prefix'] + '/' + args.exp_name experiment_name = '{prefix}-{exp_name}-{i:02}'.format( prefix=variant['prefix'], exp_name=args.exp_name, i=i) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, seed=run_params['seed'], terminate_machine=True, log_dir=args.log_dir, snapshot_mode=run_params['snapshot_mode'], snapshot_gap=run_params['snapshot_gap'], sync_s3_pkl=run_params['sync_pkl'], )
def launch_experiments(variant_generator): variants = variant_generator.variants() for i, variant in enumerate(variants): tag = 'finetune__' print(variant['snapshot_filename']) tag += variant['snapshot_filename'].split('/')[-2] tag += '____' tag += '__'.join(['%s_%s' % (key, variant[key]) for key in TAG_KEYS]) log_dir = os.path.join(args.log_dir, tag) variant['video_dir'] = os.path.join(log_dir, 'videos') print('Launching {} experiments.'.format(len(variants))) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=variant['prefix'] + '/' + args.exp_name, exp_name=variant['prefix'] + '-' + args.exp_name + '-' + str(i).zfill(2), n_parallel=1, # Increasing this barely effects performance, # but breaks learning of hierarchical policy. seed=variant['seed'], terminate_machine=True, log_dir=log_dir, snapshot_mode=variant['snapshot_mode'], snapshot_gap=variant['snapshot_gap'], sync_s3_pkl=variant['sync_pkl'], )
def launch_experiments(variant_generator, args): variants = variant_generator.variants() # TODO: Remove unflatten. Our variant generator should support nested params variants = [unflatten(variant, separator='.') for variant in variants] num_experiments = len(variants) print('Launching {} experiments.'.format(num_experiments)) for i, variant in enumerate(variants): print("Experiment: {}/{}".format(i, num_experiments)) run_params = variant['run_params'] algo_params = variant['algorithm_params'] experiment_prefix = variant['prefix'] + '/' + args.exp_name experiment_name = '{prefix}-{exp_name}-{i:02}'.format( prefix=variant['prefix'], exp_name=args.exp_name, i=args.seed) ## Hacks to get it to work while we figure out code! variant['algorithm_params']['base_kwargs'][ 'n_train_repeat'] = args.n_train_repeat variant['algorithm_params']['base_kwargs']['n_epochs'] = args.n_epochs variant['algorithm_params']['base_kwargs'][ 'gpu_fraction'] = args.gpu_fraction #algo_params['base_kwargs']['n_epochs'] = 2000.0 log_dir = os.path.join(args.log_dir, experiment_name) ## print('run params: {}'.format(variant['run_params'])) print('algorithm_params: {}'.format(variant['algorithm_params'])) print('env_params: {}'.format(variant['env_params'])) print('value_fn_params: {}'.format(variant['value_fn_params'])) print('replay_buffer_params: {}'.format( variant['replay_buffer_params'])) print('policy_params: {}'.format(variant['policy_params'])) print('sampler_params: {}'.format(variant['sampler_params'])) print('task: {}'.format(variant['task'])) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=args.n_parallel, seed=args.seed, #run_params['seed'], terminate_machine=True, log_dir=log_dir, # RC: TODO change back to args.logdir snapshot_mode=run_params['snapshot_mode'], snapshot_gap=run_params['snapshot_gap'], sync_s3_pkl=run_params['sync_pkl'], )
def launch_experiments(variant_generator, args): variants = variant_generator.variants() # TODO: Remove unflatten. Our variant generator should support nested params variants = [unflatten(variant, separator='.') for variant in variants] print('Launching seed={} experiment.'.format(args.seed)) variant = variants[args.seed - 1] variant['lr'] = args.lr variant['tau'] = args.tau variant['l1regpi'] = args.l1regpi variant['l2regpi'] = args.l2regpi variant['l1regvf'] = args.l1regvf variant['l2regvf'] = args.l2regvf variant['wclippi'] = args.wclippi variant['wclipvf'] = args.wclipvf variant['dropoutpi'] = args.dropoutpi variant['dropoutvf'] = args.dropoutvf variant['ent_coef'] = args.ent_coef variant['batchnormpi'] = args.batchnormpi variant['batchnormvf'] = args.batchnormvf variant['reward_scale'] = args.reward_scale variant['num_hidden'] = args.num_hidden variant['policypath'] = args.policypath variant['valuepath'] = args.valuepath print("Variant for this experiment:", variant) run_params = variant['run_params'] algo_params = variant['algorithm_params'] experiment_prefix = variant['prefix'] + '/' + args.exp_name experiment_name = '{prefix}-{exp_name}-{i:02}'.format( prefix=variant['prefix'], exp_name=args.exp_name, i=args.seed) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, seed=run_params['seed'], terminate_machine=True, log_dir=args.log_dir, snapshot_mode=run_params['snapshot_mode'], snapshot_gap=run_params['snapshot_gap'], sync_s3_pkl=run_params['sync_pkl'], )
def launch_experiments(args): num_experiments = 1 print('Launching {} experiments.'.format(num_experiments)) for i in range(num_experiments): print("Experiment: {}/{}".format(i + 1, num_experiments)) run_sac_experiment( run_experiment, mode='local', n_parallel=1, terminate_machine=True, log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()), snapshot_mode='gap', snapshot_gap=100, sync_s3_pkl=True, )
def launch_experiments(variant_generator): variants = variant_generator.variants() for i, variant in enumerate(variants): print('Launching {} experiments.'.format(len(variants))) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=variant['prefix'] + '/' + args.exp_name, exp_name=variant['prefix'] + '-' + args.exp_name + '-' + str(i).zfill(2), n_parallel=1, seed=variant['seed'], terminate_machine=True, log_dir=args.log_dir, # use_cloudpickle=True, snapshot_mode=variant['snapshot_mode'], snapshot_gap=variant['snapshot_gap'], sync_s3_pkl=variant['sync_pkl'], )
def launch_experiments(variant_generator): variants = variant_generator.variants() for i, variant in enumerate(variants): log_dir = get_logdir(args, variant) print('Launching {} experiments.'.format(len(variants))) run_sac_experiment( run_experiment, mode=args.mode, variant=variant, exp_prefix=variant['prefix'] + '/' + args.exp_name, exp_name=variant['prefix'] + '-' + args.exp_name + '-' + str(i).zfill(2), n_parallel=1, # Increasing this barely effects performance, # but breaks learning of hierarchical policy. seed=variant['seed'], terminate_machine=True, log_dir=log_dir, snapshot_mode=variant['snapshot_mode'], snapshot_gap=variant['snapshot_gap'], sync_s3_pkl=variant['sync_pkl'], )
def launch_experiments(): args = arg() num_experiments = 1 print('Launching {} experiments.'.format(num_experiments)) for i in range(num_experiments): print("Experiment: {}/{}".format(i + 1, num_experiments)) experiment_prefix = args.domain + '/' + args.exp_name experiment_name = '{prefix}-{exp_name}-{i:02}'.format( prefix=args.domain, exp_name=args.exp_name, i=0) run_sac_experiment( run_experiment, mode='local', exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, terminate_machine=True, log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()), snapshot_mode='gap', snapshot_gap=100, sync_s3_pkl=True, )
def launch_experiments(args): num_experiments = 5 print('Launching {} experiments.'.format(num_experiments)) i = 0 if i == 0: print("Experiment: {}/{}".format(i, num_experiments)) experiment_prefix = 'ant/cross-maze' + '/' + args.exp_name experiment_name = '{prefix}-{exp_name}-{i:02}'.format( prefix='ant/cross-maze', exp_name=args.exp_name, i=0) run_sac_experiment( run_experiment, mode=args.mode, exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, terminate_machine=True, log_dir=args.log_dir, snapshot_mode='gap', snapshot_gap=1000, sync_s3_pkl=True, )
env=env, policy=policy, initial_exploration_policy=initial_exploration_policy, pool=pool, qf1=qf1, qf2=qf2, vf=vf, lr=3e-4, scale_reward=20, discount=0.99, tau=0.005, reparameterize=True, target_update_interval=1, action_prior='uniform', save_full_state=False, ) algorithm._sess.run(tf.global_variables_initializer()) algorithm.train() if __name__ == "__main__": run_sac_experiment( run_experiment, mode='local', log_dir='/root/code/log/prim/reach/{0}'.format(timestamp()), snapshot_mode='gap', snapshot_gap=100, )