Exemplo n.º 1
0
    class HParams(FCNet.HParams, OutputHead.HParams):
        """ Hyper-parameters of the OutputHead used for classification. """

        # NOTE: These hparams were basically copied over from FCNet.HParams, just so its a
        # bit more visible.

        available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "elu":
            nn.ELU,  # No idea what these do, but hey, they are available!
            "gelu": nn.GELU,
            "relu6": nn.ReLU6,
        }
        # Number of hidden layers in the output head.
        hidden_layers: int = uniform(0, 3, default=0)
        # Number of neurons in each hidden layer of the output head.
        # If a single value is given, than each of the `hidden_layers` layers
        # will have that number of neurons.
        # If `n > 1` values are given, then `hidden_layers` must either be 0 or
        # `n`, otherwise a RuntimeError will be raised.
        hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
        activation: Type[nn.Module] = categorical(available_activations,
                                                  default=nn.Tanh)
        # Dropout probability. Dropout is applied after each layer.
        # Set to None or 0 for no dropout.
        # TODO: Not sure if this is how it's typically used. Need to check.
        dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)
Exemplo n.º 2
0
    class HParams(HyperParameters):
        """ Hyper-parameters of the Settings. """

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
        # Batch size
        batch_size: int = categorical(16, 32, 64, 128, default=128)
        # weight/importance of the task embedding to the gate function
        s_hat: float = uniform(1.0, 100.0, default=50.0)
        # Maximum number of training epochs per task
        max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True)
Exemplo n.º 3
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the A2C Model.

        TODO: Set actual 'good' priors for these hyper-parameters, as these were set
        somewhat randomly.
        """

        # Discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)
        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = uniform(0.5, 1.0, default=1.0)
        # Entropy coefficient for the loss calculation
        ent_coef: float = uniform(0.0, 1.0, default=0.0)
        # Value function coefficient for the loss calculation
        vf_coef: float = uniform(0.01, 1.0, default=0.5)
        # The maximum value for the gradient clipping
        max_grad_norm: float = uniform(0.1, 10, default=0.5)
        # RMSProp epsilon. It stabilizes square root computation in denominator of
        # RMSProp update.
        rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5)
        # :param use_rms_prop: Whether to use RMSprop (default) or Adam as optimizer
        use_rms_prop: bool = categorical(True, False, default=True)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Whether to normalize or not the advantage
        normalize_advantage: bool = categorical(True, False, default=False)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 0

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Exemplo n.º 4
0
    class HParams(HyperParameters):
        """ Hyper-parameters of a fully-connected network. """

        available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "elu":
            nn.ELU,  # No idea what these do, but hey, they are available!
            "gelu": nn.GELU,
            "relu6": nn.ReLU6,
        }
        # Number of hidden layers in the output head.
        hidden_layers: int = uniform(0, 10, default=3)
        # Number of neurons in each hidden layer of the output head.
        # If a single value is given, than each of the `hidden_layers` layers
        # will have that number of neurons.
        # If `n > 1` values are given, then `hidden_layers` must either be 0 or
        # `n`, otherwise a RuntimeError will be raised.
        hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
        activation: Type[nn.Module] = categorical(available_activations,
                                                  default=nn.Tanh)
        # Dropout probability. Dropout is applied after each layer.
        # Set to None or 0 for no dropout.
        # TODO: Not sure if this is how it's typically used. Need to check.
        dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)

        def __post_init__(self):
            super().__post_init__()
            if isinstance(self.activation, str):
                self.activation = self.available_activations[
                    self.activation.lower()]

            if isinstance(self.hidden_neurons, int):
                self.hidden_neurons = [self.hidden_neurons]

            # no value passed to --hidden_layers
            if self.hidden_layers == 0:
                if len(self.hidden_neurons) == 1:
                    # Default Setting: No hidden layers.
                    self.hidden_neurons = []
                elif len(self.hidden_neurons) > 1:
                    # Set the number of hidden layers to the number of passed values.
                    self.hidden_layers = len(self.hidden_neurons)
            elif self.hidden_layers > 0 and len(self.hidden_neurons) == 1:
                # Duplicate that value for each of the `hidden_layers` layers.
                self.hidden_neurons *= self.hidden_layers
            elif self.hidden_layers == 1 and not self.hidden_neurons:
                self.hidden_layers = 0

            if self.hidden_layers != len(self.hidden_neurons):
                raise RuntimeError(
                    f"Invalid values: hidden_layers ({self.hidden_layers}) != "
                    f"len(hidden_neurons) ({len(self.hidden_neurons)}).")
Exemplo n.º 5
0
 class HParams(HyperParameters):
     """ Hyper-parameters of the Pnn method. """
     # Learning rate of the optimizer. Defauts to 0.0001 when in SL.
     learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4)
     num_steps: int = 200  # (only applicable in RL settings.)
     # Discount factor (Only used in RL settings).
     gamma: float = uniform(0.9, 0.999, default=0.99)
     # Number of hidden units (only used in RL settings.)
     hidden_size: int = categorical(64, 128, 256, default=256)
     # Batch size in SL, and number of parallel environments in RL.
     # Defaults to None in RL, and 32 when in SL.
     batch_size: Optional[int] = None
     # Maximum number of training epochs per task. (only used in SL Settings)
     max_epochs_per_task: int = uniform(1, 20, default=10)
Exemplo n.º 6
0
    class HParams(PolicyHead.HParams):
        """ Hyper-parameters of the episodic A2C output head. """
        # Wether to normalize the advantages for each episode.
        normalize_advantages: bool = categorical(True, False, default=False)

        actor_loss_coef: float = uniform(0.1, 1, default=0.5)
        critic_loss_coef: float = uniform(0.1, 1, default=0.5)
        entropy_loss_coef: float = uniform(0, 1, default=0.1)

        # Maximum norm of the policy gradient.
        max_policy_grad_norm: Optional[float] = None

        # The discount factor.
        gamma: float = uniform(0.9, 0.999, default=0.99)
Exemplo n.º 7
0
    class Options(AuxiliaryTask.Options):
        """ Options of the EWC auxiliary task. """

        # Coefficient of the EWC auxilary task.
        # NOTE: It seems to be the case that, at least just for EWC, the coefficient
        # can be often be much greater than 1, hence why we overwrite the prior over
        # that hyper-parameter here.
        coefficient: float = uniform(0.0, 100.0, default=1.0)
        # Batchsize to be used when computing FIM (unused atm)
        batch_size_fim: int = 32
        # Number of observations to use for FIM calculation
        sample_size_fim: int = categorical(2,
                                           4,
                                           8,
                                           16,
                                           32,
                                           64,
                                           128,
                                           256,
                                           512,
                                           default=8)
        # Fisher information representation type  (diagonal or block diagonal).
        fim_representation: Type[PMatAbstract] = choice(
            {
                "diagonal": PMatDiag,
                "block_diagonal": PMatKFAC
            },
            default=PMatDiag,
        )
    class HParams(MultiHeadClassifier.HParams):
        """ Hyperparameters of this improved method.

        Adds the hyper-parameters related the 'ewc-like' regularization to those of the
        ExampleMethod.

        NOTE: These `uniform()` and `log_uniform` and `HyperParameters` are just there
        to make it easier to run HPO sweeps for your Method, which isn't required for
        the competition.
        """

        # Coefficient of the ewc-like loss.
        reg_coefficient: float = uniform(0.0, 10.0, default=1.0)
        # Distance norm used in the regularization loss.
        reg_p_norm: int = 2
Exemplo n.º 9
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the PPO Model. """

        # # The policy model to use (MlpPolicy, CnnPolicy, ...)
        # policy: Union[str, Type[ActorCriticPolicy]]

        # # The environment to learn from (if registered in Gym, can be str)
        # env: Union[GymEnv, str]

        # The learning rate, it can be a function of the current progress remaining
        # (from 1 to 0)
        learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)

        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # TODO: Limit this, as is done in A2C, based on the value of setting.max steps.
        n_steps: int = categorical(32,
                                   128,
                                   256,
                                   1024,
                                   2048,
                                   4096,
                                   8192,
                                   default=2048)

        # Minibatch size
        batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64)

        # Number of epoch when optimizing the surrogate loss
        n_epochs: int = 10

        # Discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator
        gae_lambda: float = uniform(0.8, 1.0, default=0.95)

        # Clipping parameter, it can be a function of the current progress remaining
        # (from 1 to 0).
        clip_range: float = uniform(0.05, 0.4, default=0.2)

        # Clipping parameter for the value function, it can be a function of the current
        # progress remaining (from 1 to 0). This is a parameter specific to the OpenAI
        # implementation. If None is passed (default), no clipping will be done on the
        # value function. IMPORTANT: this clipping depends on the reward scaling.
        clip_range_vf: Optional[float] = None

        # Entropy coefficient for the loss calculation
        ent_coef: float = uniform(0., 1., default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE Default: -1 (only
        # sample at the beginning of the rollout)
        sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Limit the KL divergence between updates, because the clipping is not enough to
        # prevent large update see issue #213
        # (cf https://github.com/hill-a/stable-baselines/issues/213)
        # By default, there is no limit on the kl div.
        target_kl: Optional[float] = None

        # the log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run. Setting it to auto,
        # the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Exemplo n.º 10
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters common to all on-policy algos from SB3. """

        # learning rate for the optimizer, it can be a function of the current
        # progress remaining (from 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-3)
        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # NOTE: Default value here is much lower than in PPO, which might indicate
        # that this A2C is more "on-policy"? (i.e. that it requires data to be super
        # "fresh")?
        n_steps: int = uniform(3, 64, default=5, discrete=True)
        # Discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = 1.0
        # gae_lambda: float = uniform(0.5, 1.0, default=1.0)

        # Entropy coefficient for the loss calculation
        ent_coef: float = 0.0
        # ent_coef: float = uniform(0.0, 1.0, default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = 0.5
        # vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = 0.5
        # max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = False
        # use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = -1
        # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Exemplo n.º 11
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the DQN model from `stable_baselines3`.

        The command-line arguments for these are created with simple-parsing.
        """
        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6,
                                                            1e-2,
                                                            default=1e-4)
        # size of the replay buffer
        buffer_size: int = uniform(100, 10_000_000, default=1_000_000)
        # How many steps of the model to collect transitions for before learning
        # starts.
        learning_starts: int = uniform(1_000, 100_000, default=50_000)
        # Minibatch size for each gradient update
        batch_size: Optional[int] = categorical(1,
                                                2,
                                                4,
                                                8,
                                                16,
                                                32,
                                                128,
                                                default=32)
        # The soft update coefficient ("Polyak update", between 0 and 1) default
        # 1 for hard update
        tau: float = uniform(0., 1., default=1.0)
        # The discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)
        # Update the model every ``train_freq`` steps. Set to `-1` to disable.
        train_freq: int = uniform(1, 100, default=4)
        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = categorical(1, -1, default=1)
        # Enable a memory efficient variant of the replay buffer at a cost of
        # more complexity.
        # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
        optimize_memory_usage: bool = False
        # Update the target network every ``target_update_interval`` environment
        # steps.
        target_update_interval: int = categorical(1_000,
                                                  5_000,
                                                  10_000,
                                                  50_000,
                                                  default=10_000)
        # Fraction of entire training period over which the exploration rate is
        # reduced.
        exploration_fraction: float = uniform(0.05, 0.3, default=0.1)
        # Initial value of random action probability.
        exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0)
        # final value of random action probability.
        exploration_final_eps: float = uniform(0, 0.1, default=0.05)
        # The maximum value for the gradient clipping.
        max_grad_norm: float = uniform(1, 100, default=10)
        # Whether to create a second environment that will be used for
        # evaluating the agent periodically. (Only available when passing string
        # for the environment)
        create_eval_env: bool = False
        # Whether or not to build the network at the creation
        # of the instance
        _init_setup_model: bool = True
Exemplo n.º 12
0
class TrainerConfig(HyperParameters):
    """ Configuration options for the pytorch-lightning Trainer.
    
    TODO: Pytorch Lightning already has a mechanism for adding argparse
    arguments for the Trainer.. Would there be a better way of merging the
    simple-parsing and pytorch-lightning approaches ?
    """

    gpus: int = torch.cuda.device_count()
    overfit_batches: float = 0.0
    fast_dev_run: bool = field(default=False, nargs=0, action="store_true")

    # Maximum number of epochs to train for.
    max_epochs: int = uniform(1, 100, default=10)

    # Number of nodes to use.
    num_nodes: int = 1
    distributed_backend: Optional[str] = "dp" if gpus != 0 else None
    log_gpu_memory: bool = False

    val_check_interval: Union[int, float] = 1.0

    auto_scale_batch_size: Optional[str] = None
    auto_lr_find: bool = False
    # Floating point precision to use in the model. (See pl.Trainer)
    precision: int = choice(16, 32, default=32)
    default_root_dir: Path = Path(os.getcwd()) / "results"

    # How much of training dataset to check (floats = percent, int = num_batches)
    limit_train_batches: Union[int, float] = 1.0
    # How much of validation dataset to check (floats = percent, int = num_batches)
    limit_val_batches: Union[int, float] = 1.0
    # How much of test dataset to check (floats = percent, int = num_batches)
    limit_test_batches: Union[int, float] = 1.0

    def make_trainer(
        self,
        config: Config,
        callbacks: Optional[List[Callback]] = None,
        loggers: Iterable[LightningLoggerBase] = None,
    ) -> Trainer:
        """ Create a Trainer object from the command-line args.
        Adds the given loggers and callbacks as well.
        """
        return Trainer(
            logger=loggers,
            callbacks=callbacks,
            gpus=self.gpus,
            num_nodes=self.num_nodes,
            max_epochs=self.max_epochs,
            distributed_backend=self.distributed_backend,
            log_gpu_memory=self.log_gpu_memory,
            overfit_batches=self.overfit_batches,
            fast_dev_run=self.fast_dev_run,
            auto_scale_batch_size=self.auto_scale_batch_size,
            auto_lr_find=self.auto_lr_find,
            # TODO: Either move the log-dir-related stuff from Config to this
            # class, or figure out a way to pass the value from Config to this
            # function
            default_root_dir=self.default_root_dir,
            limit_train_batches=self.limit_train_batches,
            limit_val_batches=self.limit_val_batches,
            limit_test_batches=self.limit_train_batches,
        )
Exemplo n.º 13
0
 class Options(HyperParameters):
     """Settings for this Auxiliary Task. """
     # Coefficient used to scale the task loss before adding it to the total.
     coefficient: float = uniform(0., 1., default=1.)