Exemplo n.º 1
0
def learn(env_id, **kwargs) -> tuple:
    defaults = utils.get_defaults_kwargs(alg='npg', env_id=env_id)
    defaults.update(**kwargs)
    alg = NaturalPolicyGradientAlgorithm(env_id=env_id, **defaults)
    ac, env = alg.learn()

    return ac, env
Exemplo n.º 2
0
def learn(
        env_id,
        **kwargs
) -> tuple:
    defaults = utils.get_defaults_kwargs(alg='trpo', env_id=env_id)
    defaults.update(**kwargs)
    alg = TRPOAlgorithm(
        env_id=env_id,
        **defaults
    )

    ac, env = alg.learn()

    return ac, env
    def _fill_scheduler(self):
        """ Create tasks for multi-process execution.

        will be called if model.compile(multi_thread=True) is enabled."""
        ts = list()
        _seed = self.init_seed

        for env_id in self.env_ids:
            for alg_name, param_grid in self.alg_setup.items():
                param_grid = self._convert_to_dict(param_grid)
                exp_name = os.path.join(env_id, alg_name)

                for param_set in product(*param_grid.values()):
                    grid_kwargs = dict(zip(param_grid.keys(), param_set))

                    for i in range(self.num_runs):
                        kwargs = utils.get_defaults_kwargs(alg=alg_name,
                                                           env_id=env_id)
                        logger_kwargs = setup_logger_kwargs(
                            base_dir=self.log_dir,
                            exp_name=exp_name,
                            seed=_seed,
                            level=0,
                            use_tensor_board=True,
                            verbose=False)
                        kwargs.update(logger_kwargs=logger_kwargs,
                                      seed=_seed,
                                      alg=alg_name,
                                      env_id=env_id)
                        # firstly, update environment specifics
                        kwargs.update(**self.env_specific_kwargs[env_id])
                        # secondly, pass the grid search parameters...
                        kwargs.update(**grid_kwargs)

                        t = Task(id=_seed,
                                 target_function=run_training,
                                 kwargs=kwargs)
                        ts.append(t)
                        _seed += 1
        self.scheduler.fill(tasks=ts)
        # reduce number of cores if necessary
        self.scheduler.num_cores = min(_seed, self.num_cores)
Exemplo n.º 4
0
    def check_alg(alg_name, env_id, cores):
        """" Run one epoch update with algorithm."""
        defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id)
        defaults['epochs'] = 1
        defaults['num_mini_batches'] = 4
        defaults['steps_per_epoch'] = 1000 * mpi_tools.num_procs()
        defaults['verbose'] = False
        print(defaults['steps_per_epoch'])

        defaults['logger_kwargs'] = setup_logger_kwargs(exp_name='unittest',
                                                        seed=0,
                                                        base_dir='/var/tmp/',
                                                        datestamp=True,
                                                        level=0,
                                                        use_tensor_board=False,
                                                        verbose=False)
        alg = U.get_alg_class(alg_name, env_id, **defaults)
        # sanity check of argument passing
        assert alg.alg == alg_name, f'Expected {alg_name} but got {alg.alg}'
        # return learn_fn(env_id, **defaults)
        ac, env = alg.learn()
        return ac, env
Exemplo n.º 5
0
    def __init__(self,
                 alg: str,
                 env_id: str,
                 log_dir: str,
                 init_seed: int,
                 unparsed_args: list = (),
                 use_mpi: bool = False,
                 ) -> None:
        """ Class Constructor  """
        self.alg = alg
        self.env_id = env_id
        self.log_dir = log_dir
        self.init_seed = init_seed
        # if MPI is not used, use Python's multi-processing
        self.multiple_individual_processes = False
        self.num_runs = 1
        self.num_cores = 1  # set by compile()-method
        self.training = False
        self.compiled = False
        self.trained = False
        self.use_mpi = use_mpi

        self.default_kwargs = utils.get_defaults_kwargs(alg=alg,
                                                        env_id=env_id)
        self.kwargs = self.default_kwargs.copy()
        self.kwargs['seed'] = init_seed
        # update algorithm kwargs with unparsed arguments from command line
        keys = [k[2:] for k in unparsed_args[0::2]]  # remove -- from argument
        values = [eval(v) for v in unparsed_args[1::2]]
        unparsed_dict = {k: v for k, v in zip(keys, values)}
        self.kwargs.update(**unparsed_dict)
        self.logger_kwargs = None  # defined by compile (a specific seed might be passed)
        self.exp_name = os.path.join(self.env_id, self.alg)

        # assigned by class methods
        self.model = None
        self.env = None
        self.scheduler = None
    def mpi_run(self):
        """Run parameter grid over all MPI processes. No scheduling required."""
        init_seed = self.init_seed
        for env_id in self.env_ids:
            for alg_name, param_grid in self.alg_setup.items():
                param_grid = self._convert_to_dict(param_grid)
                exp_name = os.path.join(env_id, alg_name)

                for param_set in product(*param_grid.values()):
                    grid_kwargs = dict(zip(param_grid.keys(), param_set))

                    for i in range(self.num_runs):
                        if mpi_tools.is_root_process():
                            print(
                                f'Run #{i} (with seed={init_seed}) and kwargs:'
                            )
                            print(grid_kwargs)

                        kwargs = utils.get_defaults_kwargs(alg=alg_name,
                                                           env_id=env_id)
                        logger_kwargs = setup_logger_kwargs(
                            base_dir=self.log_dir,
                            exp_name=exp_name,
                            seed=init_seed,
                            level=0,
                            use_tensor_board=True,
                            verbose=False)
                        kwargs.update(logger_kwargs=logger_kwargs,
                                      seed=init_seed,
                                      alg=alg_name,
                                      env_id=env_id)
                        # firstly, update environment specifics
                        kwargs.update(**self.env_specific_kwargs[env_id])
                        # secondly, pass the grid search parameters...
                        kwargs.update(**grid_kwargs)
                        run_training(**kwargs)
                        init_seed += 1
Exemplo n.º 7
0
def get_alg(env_id, **kwargs) -> CPOAlgorithm:
    defaults = utils.get_defaults_kwargs(alg='cpo', env_id=env_id)
    defaults.update(**kwargs)
    return CPOAlgorithm(env_id=env_id, **defaults)
Exemplo n.º 8
0
def learn(env_id, **kwargs) -> tuple:
    defaults = utils.get_defaults_kwargs(alg='pdo', env_id=env_id)
    defaults.update(**kwargs)
    alg = PrimalDualOptimizationAlgorithm(env_id=env_id, **defaults)
    ac, env = alg.learn()
    return ac, env