def learn(env_id, **kwargs) -> tuple: defaults = utils.get_defaults_kwargs(alg='npg', env_id=env_id) defaults.update(**kwargs) alg = NaturalPolicyGradientAlgorithm(env_id=env_id, **defaults) ac, env = alg.learn() return ac, env
def learn( env_id, **kwargs ) -> tuple: defaults = utils.get_defaults_kwargs(alg='trpo', env_id=env_id) defaults.update(**kwargs) alg = TRPOAlgorithm( env_id=env_id, **defaults ) ac, env = alg.learn() return ac, env
def _fill_scheduler(self): """ Create tasks for multi-process execution. will be called if model.compile(multi_thread=True) is enabled.""" ts = list() _seed = self.init_seed for env_id in self.env_ids: for alg_name, param_grid in self.alg_setup.items(): param_grid = self._convert_to_dict(param_grid) exp_name = os.path.join(env_id, alg_name) for param_set in product(*param_grid.values()): grid_kwargs = dict(zip(param_grid.keys(), param_set)) for i in range(self.num_runs): kwargs = utils.get_defaults_kwargs(alg=alg_name, env_id=env_id) logger_kwargs = setup_logger_kwargs( base_dir=self.log_dir, exp_name=exp_name, seed=_seed, level=0, use_tensor_board=True, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=_seed, alg=alg_name, env_id=env_id) # firstly, update environment specifics kwargs.update(**self.env_specific_kwargs[env_id]) # secondly, pass the grid search parameters... kwargs.update(**grid_kwargs) t = Task(id=_seed, target_function=run_training, kwargs=kwargs) ts.append(t) _seed += 1 self.scheduler.fill(tasks=ts) # reduce number of cores if necessary self.scheduler.num_cores = min(_seed, self.num_cores)
def check_alg(alg_name, env_id, cores): """" Run one epoch update with algorithm.""" defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id) defaults['epochs'] = 1 defaults['num_mini_batches'] = 4 defaults['steps_per_epoch'] = 1000 * mpi_tools.num_procs() defaults['verbose'] = False print(defaults['steps_per_epoch']) defaults['logger_kwargs'] = setup_logger_kwargs(exp_name='unittest', seed=0, base_dir='/var/tmp/', datestamp=True, level=0, use_tensor_board=False, verbose=False) alg = U.get_alg_class(alg_name, env_id, **defaults) # sanity check of argument passing assert alg.alg == alg_name, f'Expected {alg_name} but got {alg.alg}' # return learn_fn(env_id, **defaults) ac, env = alg.learn() return ac, env
def __init__(self, alg: str, env_id: str, log_dir: str, init_seed: int, unparsed_args: list = (), use_mpi: bool = False, ) -> None: """ Class Constructor """ self.alg = alg self.env_id = env_id self.log_dir = log_dir self.init_seed = init_seed # if MPI is not used, use Python's multi-processing self.multiple_individual_processes = False self.num_runs = 1 self.num_cores = 1 # set by compile()-method self.training = False self.compiled = False self.trained = False self.use_mpi = use_mpi self.default_kwargs = utils.get_defaults_kwargs(alg=alg, env_id=env_id) self.kwargs = self.default_kwargs.copy() self.kwargs['seed'] = init_seed # update algorithm kwargs with unparsed arguments from command line keys = [k[2:] for k in unparsed_args[0::2]] # remove -- from argument values = [eval(v) for v in unparsed_args[1::2]] unparsed_dict = {k: v for k, v in zip(keys, values)} self.kwargs.update(**unparsed_dict) self.logger_kwargs = None # defined by compile (a specific seed might be passed) self.exp_name = os.path.join(self.env_id, self.alg) # assigned by class methods self.model = None self.env = None self.scheduler = None
def mpi_run(self): """Run parameter grid over all MPI processes. No scheduling required.""" init_seed = self.init_seed for env_id in self.env_ids: for alg_name, param_grid in self.alg_setup.items(): param_grid = self._convert_to_dict(param_grid) exp_name = os.path.join(env_id, alg_name) for param_set in product(*param_grid.values()): grid_kwargs = dict(zip(param_grid.keys(), param_set)) for i in range(self.num_runs): if mpi_tools.is_root_process(): print( f'Run #{i} (with seed={init_seed}) and kwargs:' ) print(grid_kwargs) kwargs = utils.get_defaults_kwargs(alg=alg_name, env_id=env_id) logger_kwargs = setup_logger_kwargs( base_dir=self.log_dir, exp_name=exp_name, seed=init_seed, level=0, use_tensor_board=True, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=init_seed, alg=alg_name, env_id=env_id) # firstly, update environment specifics kwargs.update(**self.env_specific_kwargs[env_id]) # secondly, pass the grid search parameters... kwargs.update(**grid_kwargs) run_training(**kwargs) init_seed += 1
def get_alg(env_id, **kwargs) -> CPOAlgorithm: defaults = utils.get_defaults_kwargs(alg='cpo', env_id=env_id) defaults.update(**kwargs) return CPOAlgorithm(env_id=env_id, **defaults)
def learn(env_id, **kwargs) -> tuple: defaults = utils.get_defaults_kwargs(alg='pdo', env_id=env_id) defaults.update(**kwargs) alg = PrimalDualOptimizationAlgorithm(env_id=env_id, **defaults) ac, env = alg.learn() return ac, env