Пример #1
0
def build_experiment():
  requirements = xm.Requirements(ram=10 * xm.GiB)
  overrides = xm.BorgOverrides()
  overrides.requirements.autopilot_params = ({'min_cpu': 1})
  runtime_worker = xm.Borg(
      cell=FLAGS.cell,
      priority=FLAGS.priority,
      requirements=requirements,
      overrides=overrides,
  )
  save_dir = os.path.join(
      FLAGS.save_dir.format(CELL=FLAGS.cell),
      '{EXP}_gamma{GAMMA}'.format(EXP=FLAGS.exp_name, GAMMA=FLAGS.gamma))
  executable = xm.BuildTarget(
      '//third_party/py/dice_rl/scripts:run_neural_dice',
      build_flags=['-c', 'opt', '--copt=-mavx'],
      args=[
          ('env_name', FLAGS.env_name),
          ('gamma', FLAGS.gamma),
          ('save_dir', save_dir),
          ('load_dir', FLAGS.load_dir),
      ],
      platform=xm.Platform.CPU,
      runtime=runtime_worker)

  max_traj_dict = {
      'grid': 100,
      'taxi': 200,
      'Reacher-v2': 40,
      'reacher': 200,
      'cartpole': 250,
  }
  parameters = hyper.product([
      hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
      hyper.sweep('zero_reward', hyper.categorical([False])),
      hyper.sweep('norm_regularizer', hyper.discrete([0.0, 1.0])),
      hyper.sweep('zeta_pos', hyper.categorical([True, False])),
      hyper.sweep('primal_form', hyper.categorical([False])),
      hyper.sweep('num_steps', hyper.discrete([200000])),
      hyper.sweep('f_exponent', hyper.discrete([2.0])),
      hyper.zipit([
          hyper.sweep('primal_regularizer', hyper.discrete([0.0, 1.0])),
          hyper.sweep('dual_regularizer', hyper.discrete([1.0, 0.0])),
      ]),
      hyper.zipit([
          hyper.sweep('nu_learning_rate', hyper.discrete([0.0001])),
          hyper.sweep('zeta_learning_rate', hyper.discrete([0.0001])),
      ]),
      hyper.sweep('alpha', hyper.discrete([0.0])),
      hyper.sweep('num_trajectory', hyper.discrete([100])),
      hyper.sweep(
          'max_trajectory_length',
          hyper.discrete([
              100  #max_traj_dict[FLAGS.env_name]
          ])),
  ])
  experiment = xm.ParameterSweep(
      executable, parameters, max_parallel_work_units=2000)
  experiment = xm.WithTensorBoard(experiment, save_dir)
  return experiment
Пример #2
0
def build_experiment():
    save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name)

    requirements = xm.Requirements(ram=10 * xm.GiB)
    if FLAGS.worker_ram_fs_gb is not None:
        requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB

    overrides = xm.BorgOverrides()
    overrides.requirements.autopilot_params = ({'min_cpu': 1})

    if FLAGS.avx2:
        overrides.requirements.constraints = AVX2_CONSTRAINTS

    runtime_worker = xm.Borg(
        cell=FLAGS.cell,
        priority=115,
        requirements=requirements,
        overrides=overrides,
    )

    num_trajectory = 200
    executable = xm.BuildTarget(
        '//third_party/py/dice_rl/scripts:run_tabular_coin_dice',
        build_flags=AVX2_BUILD_FLAGS if FLAGS.avx2 else AVX_BUILD_FLAGS,
        args=[
            ('env_name', FLAGS.env_name),
            ('load_dir', FLAGS.load_dir),
            ('save_dir', save_dir),
            ('num_steps', 100),
            ('num_trajectory', num_trajectory),
            ('max_trajectory_length', 1),
        ],
        platform=xm.Platform.CPU,
        runtime=runtime_worker)
    num_samples = num_trajectory

    parameters = hyper.product([
        hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
        hyper.sweep('alpha', [0.0]),
        hyper.sweep('gamma', [0.0]),
        hyper.sweep('divergence_limit', [
            1.0 * CHI2_PERCENTILE[p] / num_samples
            for p in [0, 50, 60, 70, 80, 90, 95]
        ]),
        hyper.sweep('algae_alpha', [0.01]),
    ])
    experiment = xm.ParameterSweep(executable, parameters)
    experiment = xm.WithTensorBoard(experiment, save_dir)
    return experiment
Пример #3
0
def build_experiment():
  save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name)

  requirements = xm.Requirements(ram=10 * xm.GiB)
  if FLAGS.worker_ram_fs_gb is not None:
    requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB

  overrides = xm.BorgOverrides()
  overrides.requirements.autopilot_params = ({'min_cpu': 1})

  if FLAGS.avx2:
    overrides.requirements.constraints = AVX2_CONSTRAINTS

  runtime_worker = xm.Borg(
      cell=FLAGS.cell,
      priority=115,
      requirements=requirements,
      overrides=overrides,
  )

  executable = xm.BuildTarget(
      '//third_party/py/dice_rl/google/scripts:run_q_estimator',
      build_flags=AVX2_BUILD_FLAGS if FLAGS.avx2 else AVX_BUILD_FLAGS,
      args=[
          ('env_name', FLAGS.env_name),
          ('load_dir', FLAGS.load_dir),
          ('save_dir', save_dir),
          ('num_trajectory', 10000),
          ('max_trajectory_length', 1),
      ],
      platform=xm.Platform.CPU,
      runtime=runtime_worker)

  parameters = hyper.product([
      hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
      #hyper.sweep('alpha', [0.0, 0.33, 0.66, 1.]),
      hyper.sweep('alpha', [0.0, 0.1, 0.2, 0.9]),
      hyper.sweep('gamma', [0.0]),
      hyper.sweep('limit_episodes', [2, 5, 10, 20, 50, 100, 200, 500,
                                     1000, 2000, 5000, 10000]),
  ])
  experiment = xm.ParameterSweep(executable, parameters)
  experiment = xm.WithTensorBoard(experiment, save_dir)
  return experiment
Пример #4
0
def build_experiment():
    requirements = xm.Requirements()
    overrides = xm.BorgOverrides()
    overrides.requirements.autopilot_params = ({'min_cpu': 1})
    runtime_worker = xm.Borg(
        cell=FLAGS.cell,
        priority=FLAGS.priority,
        requirements=requirements,
        overrides=overrides,
    )
    save_dir = os.path.join(
        FLAGS.save_dir.format(CELL=FLAGS.cell),
        '{EXP}_gamma{GAMMA}'.format(EXP=FLAGS.exp_name, GAMMA=FLAGS.gamma))
    executable = xm.BuildTarget(
        '//third_party/py/dice_rl/scripts:run_neural_bayes_dice',
        build_flags=['-c', 'opt', '--copt=-mavx'],
        args=[
            ('env_name', FLAGS.env_name),
            ('gamma', FLAGS.gamma),
            ('save_dir', save_dir),
            ('load_dir', FLAGS.load_dir),
            ('num_steps', 50000),
        ],
        platform=xm.Platform.CPU,
        runtime=runtime_worker)

    parameters = hyper.product([
        hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
        hyper.sweep('kl_regularizer', hyper.discrete([5.])),
        hyper.sweep('alpha', hyper.discrete([i / 10 for i in range(6)])),
        hyper.sweep('alpha_target',
                    hyper.discrete([0.75, 0.8, 0.85, 0.9, 0.95])),
        hyper.sweep('num_trajectory', hyper.discrete([10, 25, 50, 100])),
        hyper.sweep('max_trajectory_length', hyper.discrete([100])),
    ])
    experiment = xm.ParameterSweep(executable, parameters)
    experiment = xm.WithTensorBoard(experiment, save_dir)
    return experiment
Пример #5
0
def build_experiment():

    requirements = xm.Requirements(ram=10 * xm.GiB,
                                   gpu_types=[xm.GpuType.P100])

    overrides = xm.BorgOverrides()
    # overrides.requirements.autopilot_params = ({'min_cpu': 1})
    overrides.env_vars.TMPDIR = '/tmp'

    runtime_worker = xm.Borg(
        cell=FLAGS.cell,
        priority=115,
        requirements=requirements,
        overrides=overrides,
    )

    executable = xm.BuildTarget(
        '//third_party/py/dice_rl/google/scripts:aggregate_numpy_is',
        build_flags=['-c', 'opt', '--copt=-mavx'],
        args=[
            ('gfs_user', 'mudcats'),
            ('mode', FLAGS.mode),
        ],
        platform=xm.Platform.GPU,
        runtime=runtime_worker)

    parameters = hyper.product([
        hyper.sweep('directory', [
            '/cns/pw-d/home/mudcats/dev/algae_ci/Taxi_IS/',
            '/cns/pw-d/home/mudcats/dev/algae_ci/FrozenLake_IS/',
            '/cns/pw-d/home/mudcats/dev/algae_ci/SmallTree_IS/'
        ]),
    ])

    experiment = xm.ParameterSweep(executable, parameters)

    return experiment
def build_experiment():
    save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name)

    requirements = xm.Requirements(ram=10 * xm.GiB)
    if FLAGS.worker_ram_fs_gb is not None:
        requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB

    overrides = xm.BorgOverrides()
    overrides.requirements.autopilot_params = ({'min_cpu': 1})

    if FLAGS.avx2:
        overrides.requirements.constraints = AVX2_CONSTRAINTS

    runtime_worker = xm.Borg(
        cell=FLAGS.cell,
        priority=115,
        requirements=requirements,
        overrides=overrides,
    )
    executable = xm.BuildTarget(
        '//third_party/py/dice_rl/google/scripts:run_neural_robust',
        build_flags=['-c', 'opt', '--copt=-mavx'],
        args=[
            ('env_name', FLAGS.env_name),
            ('load_dir', FLAGS.load_dir),
            ('save_dir', save_dir),
            ('num_steps', 100000),
            ('batch_size', 128),
            #('num_trajectory', 200),
            #('max_trajectory_length', 250),
            ('num_trajectory', 100),
            ('max_trajectory_length', 100),
        ],
        platform=xm.Platform.CPU,
        runtime=runtime_worker)

    num_samples = 100 * 100
    parameters = hyper.product([
        hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
        #hyper.sweep('seed', [0]),
        #hyper.sweep('bootstrap_seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
        hyper.sweep('alpha', [0.0]),  #, 0.33, 0.66]),
        hyper.sweep('nu_learning_rate', [0.001, 0.0003, 0.0001]),
        hyper.sweep('zeta_learning_rate', [0.001, 0.0003, 0.0001]),
        #hyper.sweep('nu_learning_rate', [0.0001, 0.0003]),
        #hyper.sweep('zeta_learning_rate', [0.0003]),
        #hyper.sweep('nu_learning_rate', [0.001]),
        #hyper.sweep('zeta_learning_rate', [0.0001, 0.0003]),
        hyper.sweep('gamma', [0.99]),
        hyper.zipit([
            hyper.sweep('nu_regularizer', [0.0]),
            hyper.sweep('zeta_regularizer', [0.0])
        ]),
        #hyper.sweep('divergence_limit', [0.002, 0.005, 0.01]),
        #hyper.sweep('algae_alpha', [0.001]),
        hyper.sweep('divergence_limit', [
            0.5 * CHI2_PERCENTILE[p] / num_samples
            for p in [0, 50, 60, 70, 80]
        ]),
        hyper.sweep('algae_alpha', [0.01]),
        hyper.sweep('primal_form', [True]),
    ])
    experiment = xm.ParameterSweep(executable, parameters)
    experiment = xm.WithTensorBoard(experiment, save_dir)
    return experiment
Пример #7
0
def build_experiment():
  save_dir = os.path.join(FLAGS.save_dir, FLAGS.exp_name)

  requirements = xm.Requirements(ram=10 * xm.GiB)
  if FLAGS.worker_ram_fs_gb is not None:
    requirements.tmp_ram_fs_size = FLAGS.worker_ram_fs_gb * xm.GiB

  overrides = xm.BorgOverrides()
  overrides.requirements.autopilot_params = ({'min_cpu': 1})

  if FLAGS.avx2:
    overrides.requirements.constraints = AVX2_CONSTRAINTS

  runtime_worker = xm.Borg(
      cell=FLAGS.cell,
      priority=115,
      requirements=requirements,
      overrides=overrides,
  )
  executable = xm.BuildTarget(
      '//third_party/py/dice_rl/google/scripts:run_importance_sampling_ci',
      build_flags=['-c', 'opt', '--copt=-mavx'],
      args=[
          ('gfs_user', 'brain-ofirnachum'),
          ('env_name', FLAGS.env_name),
          ('load_dir', FLAGS.load_dir),
          ('num_trajectory_data', FLAGS.num_trajectory_data),
          ('save_dir', save_dir),
          ('num_steps', 10000),
          ('alpha', -1.0),
          ('ci_method', FLAGS.ci_method),
          ('mode', FLAGS.mode),
          ('tabular_obs', FLAGS.tabular_obs),
          ('use_trained_policy', False),
          ('use_doubly_robust', False),
      ],
      platform=xm.Platform.CPU,
      runtime=runtime_worker)

  parameters = hyper.product([
      hyper.sweep('seed', hyper.discrete(list(range(FLAGS.num_seeds)))),
      ## Reacher
      #hyper.sweep('gamma', [0.99]),
      #hyper.sweep('num_trajectory', [25]),
      #hyper.sweep('max_trajectory_length', [100]),
      ## FrozenLake
      # hyper.sweep('gamma', [0.99]),
      # hyper.sweep('num_trajectory', [50, 100, 200, 500, 1000]),
      # hyper.sweep('max_trajectory_length', [100]),
      ## SmallTree
       hyper.sweep('gamma', [0.0]),
       hyper.sweep('num_trajectory', [50, 100, 200]),
       hyper.sweep('max_trajectory_length', [1]),
      ## Taxi
      # hyper.sweep('gamma', [0.99]),
      # hyper.sweep('num_trajectory', [20, 50, 100]),
      # hyper.sweep('max_trajectory_length', [500]),
      ## universally needed
      hyper.sweep('delta', [0.5, 0.6, 0.7, 0.8, 0.9, 0.95]),
  ])
  experiment = xm.ParameterSweep(executable, parameters)
  experiment = xm.WithTensorBoard(experiment, save_dir)
  return experiment