Exemplo n.º 1
0
def test_on_policy_batch_memory(request):
    spec = spec_util.get('experimental/misc/base.json',
                         'base_on_policy_batch_memory')
    spec_util.tick(spec, 'trial')
    agent, env = make_agent_env(spec)
    res = (agent.body.memory, ) + request.param
    return res
Exemplo n.º 2
0
def test_prioritized_replay_memory(request):
    spec = spec_util.get('experimental/misc/base.json',
                         'base_prioritized_replay_memory')
    spec_util.tick(spec, 'trial')
    agent, env = make_agent_env(spec)
    res = (agent.body.memory, ) + request.param
    return res
Exemplo n.º 3
0
def test_session(test_spec):
    spec_util.tick(test_spec, 'trial')
    spec_util.tick(test_spec, 'session')
    spec_util.save(test_spec, unit='trial')
    session = Session(test_spec)
    session_metrics = session.run()
    assert isinstance(session_metrics, dict)
Exemplo n.º 4
0
def run_trial_test(spec_file, spec_name=False):
    spec = spec_util.get(spec_file, spec_name)
    spec = spec_util.override_test_spec(spec)
    spec_util.tick(spec, 'trial')
    trial = Trial(spec)
    trial_metrics = trial.run()
    assert isinstance(trial_metrics, dict)
Exemplo n.º 5
0
def test_trial_demo():
    spec = spec_util.get('demo.json', 'dqn_cartpole')
    spec_util.save(spec, unit='experiment')
    spec = spec_util.override_spec(spec, 'test')
    spec_util.tick(spec, 'trial')
    trial_metrics = Trial(spec).run()
    assert isinstance(trial_metrics, dict)
Exemplo n.º 6
0
def test_experiment():
    spec = spec_util.get('demo.json', 'dqn_cartpole')
    spec_util.save(spec, unit='experiment')
    spec = spec_util.override_spec(spec, 'test')
    spec_util.tick(spec, 'experiment')
    experiment_df = Experiment(spec).run()
    assert isinstance(experiment_df, pd.DataFrame)
Exemplo n.º 7
0
def ray_trainable(config, reporter):
    '''
    Create an instance of a trainable function for ray: https://ray.readthedocs.io/en/latest/tune-usage.html#training-api
    Lab needs a spec and a trial_index to be carried through config, pass them with config in ray.run() like so:
    config = {
        'spec': spec,
        'trial_index': tune.sample_from(lambda spec: gen_trial_index()),
        ... # normal ray config with sample, grid search etc.
    }
    '''
    import os
    os.environ.pop('CUDA_VISIBLE_DEVICES',
                   None)  # remove CUDA id restriction from ray
    from slm_lab.experiment.control import Trial
    # restore data carried from ray.run() config
    spec = config.pop('spec')
    spec = inject_config(spec, config)
    # tick trial_index with proper offset
    trial_index = config.pop('trial_index')
    spec['meta']['trial'] = trial_index - 1
    spec_util.tick(spec, 'trial')
    # run SLM Lab trial
    metrics = Trial(spec).run()
    metrics.update(config)  # carry config for analysis too
    # ray report to carry data in ray trial.last_result
    reporter(trial_data={trial_index: metrics})
Exemplo n.º 8
0
 def run_sessions(self):
     logger.info('Running sessions')
     if self.spec['meta']['max_session'] == 1:
         spec = deepcopy(self.spec)
         spec_util.tick(spec, 'session')
         session_metrics_list = [Session(spec).run()]
     else:
         session_metrics_list = self.parallelize_sessions()
     return session_metrics_list
Exemplo n.º 9
0
 def train(self, specfile, specname):
     "enjoy + optimizes agent + periodic eval"
     logger.info(
         f'Running lab mode:train with specfile:{specfile} specname:{specname}'
     )
     spec = spec_util.get(specfile, specname)
     # FIXME Why does this need to be in env?
     os.environ['lab_mode'] = 'train'
     spec_util.save(spec)  # first save the new spec
     spec_util.tick(spec, 'trial')
     Trial(spec).run()
Exemplo n.º 10
0
def test_demo_performance():
    spec = spec_util.get('demo.json', 'dqn_cartpole')
    spec_util.save(spec, unit='experiment')
    for env_spec in spec['env']:
        env_spec['max_frame'] = 2000
    spec_util.tick(spec, 'trial')
    trial = Trial(spec)
    spec_util.tick(spec, 'session')
    session = Session(spec)
    session.run()
    last_reward = session.agent.body.train_df.iloc[-1]['total_reward']
    assert last_reward > 50, f'last_reward is too low: {last_reward}'
Exemplo n.º 11
0
 def parallelize_sessions(self, global_nets=None):
     mp_dict = mp.Manager().dict()
     workers = []
     for _s in range(self.spec['meta']['max_session']):
         spec_util.tick(self.spec, 'session')
         w = mp.Process(target=mp_run_session,
                        args=(deepcopy(self.spec), global_nets, mp_dict))
         w.start()
         workers.append(w)
     for w in workers:
         w.join()
     session_metrics_list = [mp_dict[idx] for idx in sorted(mp_dict.keys())]
     return session_metrics_list
Exemplo n.º 12
0
 def dev(self, specfile, specname):
     "train + limit the number of trials & sessions. Useful for iterative development."
     logger.info(
         f'Running lab mode:dev with specfile:{specfile} specname:{specname}'
     )
     spec = spec_util.get(specfile, specname)
     # FIXME Why does this need to be in env?
     os.environ['lab_mode'] = 'dev'
     spec_util.save(spec)  # first save the new spec
     # spec = spec_util.override_dev_spec(spec)
     spec['meta']['max_session'] = 1
     spec['meta']['max_trial'] = 2
     spec_util.tick(spec, 'trial')
     Trial(spec).run()
Exemplo n.º 13
0
def run_spec(spec, lab_mode):
    '''Run a spec in lab_mode'''
    os.environ['lab_mode'] = lab_mode  # set lab_mode
    spec = spec_util.override_spec(spec, lab_mode)  # conditionally override spec
    if lab_mode in TRAIN_MODES:
        spec_util.save(spec)  # first save the new spec
        if lab_mode == 'search':
            spec_util.tick(spec, 'experiment')
            Experiment(spec).run()
        else:
            spec_util.tick(spec, 'trial')
            Trial(spec).run()
    elif lab_mode in EVAL_MODES:
        Session(spec).run()
    else:
        raise ValueError(f'Unrecognizable lab_mode not of {TRAIN_MODES} or {EVAL_MODES}')
Exemplo n.º 14
0
def run_trial_test_dist(spec_file, spec_name=False):
    spec = spec_util.get(spec_file, spec_name)
    spec = spec_util.override_spec(spec, 'test')
    spec_util.tick(spec, 'trial')
    spec['meta']['distributed'] = 'synced'
    spec['meta']['max_session'] = 2

    trial = Trial(spec)
    # manually run the logic to obtain global nets for testing to ensure global net gets updated
    global_nets = trial.init_global_nets()
    # only test first network
    if ps.is_list(global_nets):  # multiagent only test first
        net = list(global_nets[0].values())[0]
    else:
        net = list(global_nets.values())[0]
    session_metrics_list = trial.parallelize_sessions(global_nets)
    trial_metrics = analysis.analyze_trial(spec, session_metrics_list)
    trial.close()
    assert isinstance(trial_metrics, dict)
Exemplo n.º 15
0
def test_spec():
    spec = spec_util.get('experimental/misc/base.json', 'base_case_openai')
    spec_util.tick(spec, 'trial')
    spec = spec_util.override_spec(spec, 'test')
    return spec
Exemplo n.º 16
0
def test_trial(test_spec):
    spec_util.tick(test_spec, 'trial')
    spec_util.save(test_spec, unit='trial')
    trial = Trial(test_spec)
    trial_metrics = trial.run()
    assert isinstance(trial_metrics, dict)