Exemplo n.º 1
0
def test_hyper_time_limited(test_config):
    config = DEFAULT_CONFIG.copy()
    config.update(a2c.config)
    config.update(simple_addition.config)
    config.update(test_config)
    config['max_steps'] = 100000
    config['checkpoint_step'] = 43

    distributions = dict(n_train=2**np.array([5, 6, 7]))
    n_repeats = 2

    session = build_and_submit(
        name="test_hyper", config=config, distributions=distributions,
        n_repeats=n_repeats, kind='parallel', host_pool=':', wall_time='1min',
        cleanup_time='10seconds', slack_time='5seconds', ppn=2, load_avg_threshold=1e6)

    path = session.job_path
    files = os.listdir(path)
    assert set(files) == set(
        ['orig.zip', 'experiments', 'os_environ.txt', 'results.zip', 'pip_freeze.txt',
         'dps_git_summary.txt', 'nodefile.txt', 'results.txt', 'job_log.txt', 'uname.txt', 'lscpu.txt']
    )
    experiments = os.listdir(os.path.join(path, 'experiments'))
    for exp in experiments:
        assert exp.startswith('exp_')
        assert os.path.isdir(os.path.join(path, 'experiments', exp))
    assert len(experiments) == n_repeats * len(distributions['n_train'])

    with open(os.path.join(path, 'results.txt'), 'r') as f:
        results = f.read()

    assert "n_ops: 6" in results
    assert "n_completed_ops: 6" in results
    assert "n_partially_completed_ops: 0" in results
    assert "n_ready_incomplete_ops: 0" in results
    assert "n_not_ready_incomplete_ops: 0" in results
Exemplo n.º 2
0
def run_experiment(name,
                   config,
                   readme,
                   distributions=None,
                   durations=None,
                   alg=None,
                   task="grid",
                   name_variables=None,
                   env_kwargs=None):

    name = sanitize(name)
    durations = durations or {}

    parser = argparse.ArgumentParser()
    parser.add_argument("duration", choices=list(durations.keys()) + ["local"])

    args, _ = parser.parse_known_args()

    _config = DEFAULT_CONFIG.copy()

    env_kwargs = env_kwargs or {}

    env_kwargs['task'] = task
    env_config = get_env_config(**env_kwargs)
    _config.update(env_config)

    if alg:
        alg_config = getattr(alg_module, "{}_config".format(alg))
        _config.update(alg_config)
        alg_name = sanitize(alg_config.alg_name)
    else:
        alg_name = ""

    _config.update(config)
    _config.update_from_command_line()

    _config.env_name = "{}_env={}".format(name, sanitize(env_config.env_name))

    if args.duration == "local":
        _config.exp_name = "alg={}".format(alg_name)
        with _config:
            return training_loop()
    else:
        run_kwargs = Config(
            kind="slurm",
            pmem=5000,
            ignore_gpu=False,
        )

        duration_args = durations[args.duration]

        if 'config' in duration_args:
            _config.update(duration_args['config'])
            del duration_args['config']

        run_kwargs.update(durations[args.duration])
        run_kwargs.update_from_command_line()

    if name_variables is not None:
        name_variables_str = "_".join("{}={}".format(
            sanitize(str(k)), sanitize(str(getattr(_config, k))))
                                      for k in name_variables.split(","))
        _config.env_name = "{}_{}".format(_config.env_name, name_variables_str)

    exp_name = "{}_alg={}_duration={}".format(_config.env_name, alg_name,
                                              args.duration)

    build_and_submit(name=exp_name,
                     config=_config,
                     distributions=distributions,
                     **run_kwargs)