def build_experiment(): runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/scripts:create_dataset', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('env_name', FLAGS.env_name), ('load_dir', FLAGS.load_dir), ('save_dir', FLAGS.save_dir), ('tabular_obs', FLAGS.tabular_obs), ('force', True), ], platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('alpha', hyper.discrete([0.0, 1.0])), hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('num_trajectory', hyper.discrete([100])), hyper.sweep( 'max_trajectory_length', hyper.discrete([ 100, #5, #10, #20, #40, #50, 100, 200 ])), ]) experiment = xm.ParameterSweep(executable, parameters) return experiment
def build_experiment(): requirements = xm.Requirements(ram=10 * xm.GiB) overrides = xm.BorgOverrides() overrides.requirements.autopilot_params = ({'min_cpu': 1}) runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, requirements=requirements, overrides=overrides, ) save_dir = os.path.join( FLAGS.save_dir.format(CELL=FLAGS.cell), '{EXP}_gamma{GAMMA}'.format(EXP=FLAGS.exp_name, GAMMA=FLAGS.gamma)) executable = xm.BuildTarget( '//third_party/py/dice_rl/scripts:run_neural_dice', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('env_name', FLAGS.env_name), ('gamma', FLAGS.gamma), ('save_dir', save_dir), ('load_dir', FLAGS.load_dir), ], platform=xm.Platform.CPU, runtime=runtime_worker) max_traj_dict = { 'grid': 100, 'taxi': 200, 'Reacher-v2': 40, 'reacher': 200, 'cartpole': 250, } parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('zero_reward', hyper.categorical([False])), hyper.sweep('norm_regularizer', hyper.discrete([0.0, 1.0])), hyper.sweep('zeta_pos', hyper.categorical([True, False])), hyper.sweep('primal_form', hyper.categorical([False])), hyper.sweep('num_steps', hyper.discrete([200000])), hyper.sweep('f_exponent', hyper.discrete([2.0])), hyper.zipit([ hyper.sweep('primal_regularizer', hyper.discrete([0.0, 1.0])), hyper.sweep('dual_regularizer', hyper.discrete([1.0, 0.0])), ]), hyper.zipit([ hyper.sweep('nu_learning_rate', hyper.discrete([0.0001])), hyper.sweep('zeta_learning_rate', hyper.discrete([0.0001])), ]), hyper.sweep('alpha', hyper.discrete([0.0])), hyper.sweep('num_trajectory', hyper.discrete([100])), hyper.sweep( 'max_trajectory_length', hyper.discrete([ 100 #max_traj_dict[FLAGS.env_name] ])), ]) experiment = xm.ParameterSweep( executable, parameters, max_parallel_work_units=2000) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment
def build_experiment(): runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/google/scripts:run_tabular_teq_dice', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('env_name', FLAGS.env_name), ('load_dir', FLAGS.load_dir), ('save_dir', os.path.join(FLAGS.save_dir, FLAGS.exp_name)), ('max_trajectory_length_train', 50), ('num_trajectory', 1000), ], platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('step_encoding', hyper.categorical([None, 'one_hot'])), hyper.sweep('max_trajectory_length', hyper.discrete([5, 10, 20, 50])), ]) experiment = xm.ParameterSweep(executable, parameters) return experiment
def build_experiment(): save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name) requirements = xm.Requirements(ram=10 * xm.GiB) if FLAGS.worker_ram_fs_gb is not None: requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB overrides = xm.BorgOverrides() overrides.requirements.autopilot_params = ({'min_cpu': 1}) if FLAGS.avx2: overrides.requirements.constraints = AVX2_CONSTRAINTS runtime_worker = xm.Borg( cell=FLAGS.cell, priority=115, requirements=requirements, overrides=overrides, ) num_trajectory = 200 executable = xm.BuildTarget( '//third_party/py/dice_rl/scripts:run_tabular_coin_dice', build_flags=AVX2_BUILD_FLAGS if FLAGS.avx2 else AVX_BUILD_FLAGS, args=[ ('env_name', FLAGS.env_name), ('load_dir', FLAGS.load_dir), ('save_dir', save_dir), ('num_steps', 100), ('num_trajectory', num_trajectory), ('max_trajectory_length', 1), ], platform=xm.Platform.CPU, runtime=runtime_worker) num_samples = num_trajectory parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('alpha', [0.0]), hyper.sweep('gamma', [0.0]), hyper.sweep('divergence_limit', [ 1.0 * CHI2_PERCENTILE[p] / num_samples for p in [0, 50, 60, 70, 80, 90, 95] ]), hyper.sweep('algae_alpha', [0.01]), ]) experiment = xm.ParameterSweep(executable, parameters) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment
def build_experiment(): save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name) requirements = xm.Requirements(ram=10 * xm.GiB) if FLAGS.worker_ram_fs_gb is not None: requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB overrides = xm.BorgOverrides() overrides.requirements.autopilot_params = ({'min_cpu': 1}) if FLAGS.avx2: overrides.requirements.constraints = AVX2_CONSTRAINTS runtime_worker = xm.Borg( cell=FLAGS.cell, priority=115, requirements=requirements, overrides=overrides, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/google/scripts:run_q_estimator', build_flags=AVX2_BUILD_FLAGS if FLAGS.avx2 else AVX_BUILD_FLAGS, args=[ ('env_name', FLAGS.env_name), ('load_dir', FLAGS.load_dir), ('save_dir', save_dir), ('num_trajectory', 10000), ('max_trajectory_length', 1), ], platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), #hyper.sweep('alpha', [0.0, 0.33, 0.66, 1.]), hyper.sweep('alpha', [0.0, 0.1, 0.2, 0.9]), hyper.sweep('gamma', [0.0]), hyper.sweep('limit_episodes', [2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]), ]) experiment = xm.ParameterSweep(executable, parameters) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment
def build_experiment(): requirements = xm.Requirements() runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, requirements=requirements, ) gamma = 0.99 if FLAGS.env_name != 'bandit' else 0.0 save_dir = os.path.join( FLAGS.save_dir.format(CELL=FLAGS.cell), '{EXP}_gamma{GAMMA}'.format(EXP=FLAGS.exp_name, GAMMA=gamma)) executable = xm.BuildTarget( '//third_party/py/dice_rl/scripts:run_tabular_bayes_dice', build_flags=['-c', 'opt', '--copt=-mavx'], args=[('env_name', FLAGS.env_name), ('gamma', gamma), ('save_dir', save_dir), ('load_dir', FLAGS.load_dir), ('num_steps', 50000), ('solve_for_state_action_ratio', False if FLAGS.env_name == 'taxi' else True)], platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('alpha', hyper.discrete([0.])), hyper.sweep('alpha_target', hyper.discrete([i / 100 for i in range(86, 95)])), hyper.sweep('kl_regularizer', hyper.discrete([5.])), hyper.sweep('num_trajectory', hyper.discrete([5, 10, 25, 50])), hyper.sweep( 'max_trajectory_length', hyper.discrete([ #1, # bandit #500, # taxi 100, # frozenlake ])), ]) experiment = xm.ParameterSweep(executable, parameters) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment
def build_experiment(): runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/google/scripts:run_neural_teq_dice', build_flags=['-c', 'opt', '--copt=-mavx'], args=get_args(), platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep( 'max_trajectory_length', #hyper.discrete([5, 10, 20, 50, 100, 200]) # Grid hyper.discrete([5, 10, 20, 40]) # Reacher ), ]) experiment = xm.ParameterSweep(executable, parameters) return experiment
def build_experiment(): requirements = xm.Requirements() overrides = xm.BorgOverrides() overrides.requirements.autopilot_params = ({'min_cpu': 1}) runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, requirements=requirements, overrides=overrides, ) save_dir = os.path.join( FLAGS.save_dir.format(CELL=FLAGS.cell), '{EXP}_gamma{GAMMA}'.format(EXP=FLAGS.exp_name, GAMMA=FLAGS.gamma)) executable = xm.BuildTarget( '//third_party/py/dice_rl/scripts:run_neural_bayes_dice', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('env_name', FLAGS.env_name), ('gamma', FLAGS.gamma), ('save_dir', save_dir), ('load_dir', FLAGS.load_dir), ('num_steps', 50000), ], platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('kl_regularizer', hyper.discrete([5.])), hyper.sweep('alpha', hyper.discrete([i / 10 for i in range(6)])), hyper.sweep('alpha_target', hyper.discrete([0.75, 0.8, 0.85, 0.9, 0.95])), hyper.sweep('num_trajectory', hyper.discrete([10, 25, 50, 100])), hyper.sweep('max_trajectory_length', hyper.discrete([100])), ]) experiment = xm.ParameterSweep(executable, parameters) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment
def build_experiment(): requirements = xm.Requirements(ram=10 * xm.GiB, gpu_types=[xm.GpuType.P100]) overrides = xm.BorgOverrides() # overrides.requirements.autopilot_params = ({'min_cpu': 1}) overrides.env_vars.TMPDIR = '/tmp' runtime_worker = xm.Borg( cell=FLAGS.cell, priority=115, requirements=requirements, overrides=overrides, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/google/scripts:aggregate_numpy_is', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('gfs_user', 'mudcats'), ('mode', FLAGS.mode), ], platform=xm.Platform.GPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('directory', [ '/cns/pw-d/home/mudcats/dev/algae_ci/Taxi_IS/', '/cns/pw-d/home/mudcats/dev/algae_ci/FrozenLake_IS/', '/cns/pw-d/home/mudcats/dev/algae_ci/SmallTree_IS/' ]), ]) experiment = xm.ParameterSweep(executable, parameters) return experiment
def build_experiment(): """Create the jobs/config and return the constructed experiment.""" # ====== Argument creation ====== model_dir = FLAGS.model_dir.format( cell=FLAGS.cell, user=getpass.getuser(), trial=FLAGS.trial, ) # ====== Jobs and runtime creation ====== # Job: worker requirements = xm.Requirements(gpu_types=[xm.GpuType.V100], ) runtime_worker = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, requirements=requirements, ) exec_worker = xm.BuildTarget( '//experimental/users/gjt/his:mnist', name='worker', args=dict( gfs_user=FLAGS.gfs_user, logdir=model_dir, mode='train', ), platform=xm.Platform.GPU, runtime=runtime_worker, ) # Job: eval runtime_eval = xm.Borg( cell=FLAGS.cell, priority=FLAGS.priority, ) exec_eval = xm.BuildTarget( '//experimental/users/gjt/his:mnist', name='eval', args=dict( gfs_user=FLAGS.gfs_user, logdir=model_dir, mode='eval', split='train,valid,test', num_iwae_samples='1,1,1000', ), platform=xm.Platform.GPU, # Do we need GPU for eval? runtime=runtime_eval, ) # ====== Executable experiment creation ====== list_executables = [] list_executables.append(xm_helper.build_single_job(exec_worker)) list_executables.append(xm_helper.build_single_job(exec_eval)) experiment = xm.ParallelExecutable(list_executables, name='his_service') # Build experiments hyper_parameters = {} # SNIS vs LARS hyper_parameters['snis_vs_lars'] = hyper.product([ hyper.chainit([ hyper.product([ hyper.fixed('proposal', 'gaussian', length=1), hyper.fixed('model', 'bernoulli_vae', length=1), ]), hyper.product([ hyper.fixed('proposal', 'bernoulli_vae', length=1), hyper.fixed('model', 'nis', length=1), ]), hyper.product([ hyper.fixed('proposal', 'nis', length=1), hyper.fixed('model', 'bernoulli_vae', length=1), ]), ]), hyper.sweep('run', hyper.discrete([0])), hyper.fixed('dataset', 'static_mnist', length=1), hyper.fixed('reparameterize_proposal', False, length=1), hyper.fixed('anneal_kl_step', 100000, length=1), ]) # Continuous comparisons: HIS, NIS, VAE hyper_parameters['continuous'] = hyper.product([ hyper.chainit([ hyper.product([ hyper.fixed('proposal', 'gaussian', length=1), hyper.fixed('model', 'gaussian_vae', length=1), ]), hyper.product([ hyper.fixed('proposal', 'gaussian_vae', length=1), hyper.fixed('model', 'nis', length=1), ]), hyper.product([ hyper.fixed('proposal', 'gaussian', length=1), hyper.fixed('model', 'hisvae', length=1), hyper.sweep('his_T', hyper.discrete([5, 10, 15])), ]), ]), hyper.sweep('run', hyper.discrete([0])), hyper.fixed('dataset', 'jittered_mnist', length=1), hyper.fixed('reparameterize_proposal', True, length=1), hyper.fixed('squash', True, length=1), ]) hyper_parameters['celeba'] = hyper.product([ hyper.chainit([ hyper.product([ hyper.fixed('proposal', 'gaussian', length=1), hyper.fixed('model', 'conv_gaussian_vae', length=1), ]), ]), hyper.sweep('run', hyper.discrete([0])), hyper.fixed('dataset', 'jittered_celeba', length=1), hyper.fixed('reparameterize_proposal', True, length=1), hyper.fixed('squash', True, length=1), hyper.fixed('latent_dim', 16, length=1), hyper.fixed('batch_size', 36, length=1), ]) experiment = xm.ParameterSweep(experiment, hyper_parameters[FLAGS.exp_type]) experiment = xm.WithTensorBoard(experiment, model_dir) return experiment
def build_experiment(): save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name) requirements = xm.Requirements(ram=10 * xm.GiB) if FLAGS.worker_ram_fs_gb is not None: requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB overrides = xm.BorgOverrides() overrides.requirements.autopilot_params = ({'min_cpu': 1}) if FLAGS.avx2: overrides.requirements.constraints = AVX2_CONSTRAINTS runtime_worker = xm.Borg( cell=FLAGS.cell, priority=115, requirements=requirements, overrides=overrides, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/google/scripts:run_neural_robust', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('env_name', FLAGS.env_name), ('load_dir', FLAGS.load_dir), ('save_dir', save_dir), ('num_steps', 100000), ('batch_size', 128), #('num_trajectory', 200), #('max_trajectory_length', 250), ('num_trajectory', 100), ('max_trajectory_length', 100), ], platform=xm.Platform.CPU, runtime=runtime_worker) num_samples = 100 * 100 parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), #hyper.sweep('seed', [0]), #hyper.sweep('bootstrap_seed', hyper.discrete(list(range(FLAGS.num_seeds)))), hyper.sweep('alpha', [0.0]), #, 0.33, 0.66]), hyper.sweep('nu_learning_rate', [0.001, 0.0003, 0.0001]), hyper.sweep('zeta_learning_rate', [0.001, 0.0003, 0.0001]), #hyper.sweep('nu_learning_rate', [0.0001, 0.0003]), #hyper.sweep('zeta_learning_rate', [0.0003]), #hyper.sweep('nu_learning_rate', [0.001]), #hyper.sweep('zeta_learning_rate', [0.0001, 0.0003]), hyper.sweep('gamma', [0.99]), hyper.zipit([ hyper.sweep('nu_regularizer', [0.0]), hyper.sweep('zeta_regularizer', [0.0]) ]), #hyper.sweep('divergence_limit', [0.002, 0.005, 0.01]), #hyper.sweep('algae_alpha', [0.001]), hyper.sweep('divergence_limit', [ 0.5 * CHI2_PERCENTILE[p] / num_samples for p in [0, 50, 60, 70, 80] ]), hyper.sweep('algae_alpha', [0.01]), hyper.sweep('primal_form', [True]), ]) experiment = xm.ParameterSweep(executable, parameters) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment
def build_experiment(): save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name) requirements = xm.Requirements(ram=10 * xm.GiB) if FLAGS.worker_ram_fs_gb is not None: requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB overrides = xm.BorgOverrides() overrides.requirements.autopilot_params = ({'min_cpu': 1}) if FLAGS.avx2: overrides.requirements.constraints = AVX2_CONSTRAINTS runtime_worker = xm.Borg( cell=FLAGS.cell, priority=115, requirements=requirements, overrides=overrides, ) executable = xm.BuildTarget( '//third_party/py/dice_rl/google/scripts:run_importance_sampling_ci', build_flags=['-c', 'opt', '--copt=-mavx'], args=[ ('gfs_user', 'brain-ofirnachum'), ('env_name', FLAGS.env_name), ('load_dir', FLAGS.load_dir), ('num_trajectory_data', FLAGS.num_trajectory_data), ('save_dir', save_dir), ('num_steps', 10000), ('alpha', -1.0), ('ci_method', FLAGS.ci_method), ('mode', FLAGS.mode), ('tabular_obs', FLAGS.tabular_obs), ('use_trained_policy', False), ('use_doubly_robust', False), ], platform=xm.Platform.CPU, runtime=runtime_worker) parameters = hyper.product([ hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))), ## Reacher #hyper.sweep('gamma', [0.99]), #hyper.sweep('num_trajectory', [25]), #hyper.sweep('max_trajectory_length', [100]), ## FrozenLake # hyper.sweep('gamma', [0.99]), # hyper.sweep('num_trajectory', [50, 100, 200, 500, 1000]), # hyper.sweep('max_trajectory_length', [100]), ## SmallTree hyper.sweep('gamma', [0.0]), hyper.sweep('num_trajectory', [50, 100, 200]), hyper.sweep('max_trajectory_length', [1]), ## Taxi # hyper.sweep('gamma', [0.99]), # hyper.sweep('num_trajectory', [20, 50, 100]), # hyper.sweep('max_trajectory_length', [500]), ## universally needed hyper.sweep('delta', [0.5, 0.6, 0.7, 0.8, 0.9, 0.95]), ]) experiment = xm.ParameterSweep(executable, parameters) experiment = xm.WithTensorBoard(experiment, save_dir) return experiment