Пример #1
0
def main(_):
    # Parses whether to run a single bsuite_id, or multiprocess sweep.
    bsuite_id = FLAGS.bsuite_id

    if bsuite_id in sweep.SWEEP:
        print(f'Running single experiment: bsuite_id={bsuite_id}.')
        run(bsuite_id)

    elif hasattr(sweep, bsuite_id):
        bsuite_sweep = getattr(sweep, bsuite_id)
        print(f'Running sweep over bsuite_id in sweep.{bsuite_sweep}')
        FLAGS.verbose = False
        pool.map_mpi(run, bsuite_sweep)

    else:
        raise ValueError(f'Invalid flag: bsuite_id={bsuite_id}.')
Пример #2
0
def main(_):
    """Parses whether to run a single bsuite_id, or multiprocess sweep."""
    bsuite_id = FLAGS.bsuite_id

    if bsuite_id in sweep.SWEEP:
        print('Running a single bsuite_id={}'.format(bsuite_id))
        run(bsuite_id)

    elif hasattr(sweep, bsuite_id):
        bsuite_sweep = getattr(sweep, bsuite_id)
        print(
            'Running a sweep over bsuite_id in sweep.{}'.format(bsuite_sweep))
        FLAGS.verbose = False
        pool.map_mpi(run, bsuite_sweep)

    else:
        raise ValueError('Invalid flag bsuite_id={}'.format(bsuite_id))
Пример #3
0
        'qnet_settings': qnet_settings,
        'start_optimization': 64,
        'update_qnet_every': 2,
        'update_target_every': 50,
        'ddqn': True,
        'n_steps': 4,
        'duelling_dqn': True,
        'prioritized_buffer': True,
        'alpha': 0.6,
        'beta0': 0.4,
        'beta_increment': 1e-6
    }

    agent = Agent(action_spec=env.action_spec(),
                  observation_spec=env.observation_spec(),
                  device=device,
                  settings=settings)

    experiment.run(agent=agent,
                   environment=env,
                   num_episodes=env.bsuite_num_episodes,
                   verbose=False)
    return bsuite_id


bsuite_sweep = getattr(sweep, 'CARTPOLE_SCALE')
pool.map_mpi(run, bsuite_sweep, 6)

#bsuite_sweep = getattr(sweep, 'SWEEP')
#pool.map_mpi(run, bsuite_sweep, 6)