Пример #1
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the A2C Model.

        TODO: Set actual 'good' priors for these hyper-parameters, as these were set
        somewhat randomly.
        """

        # Discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)
        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = uniform(0.5, 1.0, default=1.0)
        # Entropy coefficient for the loss calculation
        ent_coef: float = uniform(0.0, 1.0, default=0.0)
        # Value function coefficient for the loss calculation
        vf_coef: float = uniform(0.01, 1.0, default=0.5)
        # The maximum value for the gradient clipping
        max_grad_norm: float = uniform(0.1, 10, default=0.5)
        # RMSProp epsilon. It stabilizes square root computation in denominator of
        # RMSProp update.
        rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5)
        # :param use_rms_prop: Whether to use RMSprop (default) or Adam as optimizer
        use_rms_prop: bool = categorical(True, False, default=True)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Whether to normalize or not the advantage
        normalize_advantage: bool = categorical(True, False, default=False)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 0

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Пример #2
0
    class HParams(FCNet.HParams, OutputHead.HParams):
        """ Hyper-parameters of the OutputHead used for classification. """

        # NOTE: These hparams were basically copied over from FCNet.HParams, just so its a
        # bit more visible.

        available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "elu":
            nn.ELU,  # No idea what these do, but hey, they are available!
            "gelu": nn.GELU,
            "relu6": nn.ReLU6,
        }
        # Number of hidden layers in the output head.
        hidden_layers: int = uniform(0, 3, default=0)
        # Number of neurons in each hidden layer of the output head.
        # If a single value is given, than each of the `hidden_layers` layers
        # will have that number of neurons.
        # If `n > 1` values are given, then `hidden_layers` must either be 0 or
        # `n`, otherwise a RuntimeError will be raised.
        hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
        activation: Type[nn.Module] = categorical(available_activations,
                                                  default=nn.Tanh)
        # Dropout probability. Dropout is applied after each layer.
        # Set to None or 0 for no dropout.
        # TODO: Not sure if this is how it's typically used. Need to check.
        dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)
Пример #3
0
    class Options(AuxiliaryTask.Options):
        """ Options of the EWC auxiliary task. """

        # Coefficient of the EWC auxilary task.
        # NOTE: It seems to be the case that, at least just for EWC, the coefficient
        # can be often be much greater than 1, hence why we overwrite the prior over
        # that hyper-parameter here.
        coefficient: float = uniform(0.0, 100.0, default=1.0)
        # Batchsize to be used when computing FIM (unused atm)
        batch_size_fim: int = 32
        # Number of observations to use for FIM calculation
        sample_size_fim: int = categorical(2,
                                           4,
                                           8,
                                           16,
                                           32,
                                           64,
                                           128,
                                           256,
                                           512,
                                           default=8)
        # Fisher information representation type  (diagonal or block diagonal).
        fim_representation: Type[PMatAbstract] = choice(
            {
                "diagonal": PMatDiag,
                "block_diagonal": PMatKFAC
            },
            default=PMatDiag,
        )
Пример #4
0
    class HParams(HyperParameters):
        """ Hyper-parameters of the Settings. """

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
        # Batch size
        batch_size: int = categorical(16, 32, 64, 128, default=128)
        # weight/importance of the task embedding to the gate function
        s_hat: float = uniform(1.0, 100.0, default=50.0)
        # Maximum number of training epochs per task
        max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True)
Пример #5
0
    class HParams(HyperParameters):
        """ Hyper-parameters of a fully-connected network. """

        available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "elu":
            nn.ELU,  # No idea what these do, but hey, they are available!
            "gelu": nn.GELU,
            "relu6": nn.ReLU6,
        }
        # Number of hidden layers in the output head.
        hidden_layers: int = uniform(0, 10, default=3)
        # Number of neurons in each hidden layer of the output head.
        # If a single value is given, than each of the `hidden_layers` layers
        # will have that number of neurons.
        # If `n > 1` values are given, then `hidden_layers` must either be 0 or
        # `n`, otherwise a RuntimeError will be raised.
        hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
        activation: Type[nn.Module] = categorical(available_activations,
                                                  default=nn.Tanh)
        # Dropout probability. Dropout is applied after each layer.
        # Set to None or 0 for no dropout.
        # TODO: Not sure if this is how it's typically used. Need to check.
        dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)

        def __post_init__(self):
            super().__post_init__()
            if isinstance(self.activation, str):
                self.activation = self.available_activations[
                    self.activation.lower()]

            if isinstance(self.hidden_neurons, int):
                self.hidden_neurons = [self.hidden_neurons]

            # no value passed to --hidden_layers
            if self.hidden_layers == 0:
                if len(self.hidden_neurons) == 1:
                    # Default Setting: No hidden layers.
                    self.hidden_neurons = []
                elif len(self.hidden_neurons) > 1:
                    # Set the number of hidden layers to the number of passed values.
                    self.hidden_layers = len(self.hidden_neurons)
            elif self.hidden_layers > 0 and len(self.hidden_neurons) == 1:
                # Duplicate that value for each of the `hidden_layers` layers.
                self.hidden_neurons *= self.hidden_layers
            elif self.hidden_layers == 1 and not self.hidden_neurons:
                self.hidden_layers = 0

            if self.hidden_layers != len(self.hidden_neurons):
                raise RuntimeError(
                    f"Invalid values: hidden_layers ({self.hidden_layers}) != "
                    f"len(hidden_neurons) ({len(self.hidden_neurons)}).")
Пример #6
0
 class HParams(HyperParameters):
     """ Hyper-parameters of the Pnn method. """
     # Learning rate of the optimizer. Defauts to 0.0001 when in SL.
     learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4)
     num_steps: int = 200  # (only applicable in RL settings.)
     # Discount factor (Only used in RL settings).
     gamma: float = uniform(0.9, 0.999, default=0.99)
     # Number of hidden units (only used in RL settings.)
     hidden_size: int = categorical(64, 128, 256, default=256)
     # Batch size in SL, and number of parallel environments in RL.
     # Defaults to None in RL, and 32 when in SL.
     batch_size: Optional[int] = None
     # Maximum number of training epochs per task. (only used in SL Settings)
     max_epochs_per_task: int = uniform(1, 20, default=10)
Пример #7
0
    class HParams(PolicyHead.HParams):
        """ Hyper-parameters of the episodic A2C output head. """
        # Wether to normalize the advantages for each episode.
        normalize_advantages: bool = categorical(True, False, default=False)

        actor_loss_coef: float = uniform(0.1, 1, default=0.5)
        critic_loss_coef: float = uniform(0.1, 1, default=0.5)
        entropy_loss_coef: float = uniform(0, 1, default=0.1)

        # Maximum norm of the policy gradient.
        max_policy_grad_norm: Optional[float] = None

        # The discount factor.
        gamma: float = uniform(0.9, 0.999, default=0.99)
Пример #8
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the PPO Model. """

        # # The policy model to use (MlpPolicy, CnnPolicy, ...)
        # policy: Union[str, Type[ActorCriticPolicy]]

        # # The environment to learn from (if registered in Gym, can be str)
        # env: Union[GymEnv, str]

        # The learning rate, it can be a function of the current progress remaining
        # (from 1 to 0)
        learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)

        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # TODO: Limit this, as is done in A2C, based on the value of setting.max steps.
        n_steps: int = categorical(32,
                                   128,
                                   256,
                                   1024,
                                   2048,
                                   4096,
                                   8192,
                                   default=2048)

        # Minibatch size
        batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64)

        # Number of epoch when optimizing the surrogate loss
        n_epochs: int = 10

        # Discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator
        gae_lambda: float = uniform(0.8, 1.0, default=0.95)

        # Clipping parameter, it can be a function of the current progress remaining
        # (from 1 to 0).
        clip_range: float = uniform(0.05, 0.4, default=0.2)

        # Clipping parameter for the value function, it can be a function of the current
        # progress remaining (from 1 to 0). This is a parameter specific to the OpenAI
        # implementation. If None is passed (default), no clipping will be done on the
        # value function. IMPORTANT: this clipping depends on the reward scaling.
        clip_range_vf: Optional[float] = None

        # Entropy coefficient for the loss calculation
        ent_coef: float = uniform(0., 1., default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE Default: -1 (only
        # sample at the beginning of the rollout)
        sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Limit the KL divergence between updates, because the clipping is not enough to
        # prevent large update see issue #213
        # (cf https://github.com/hill-a/stable-baselines/issues/213)
        # By default, there is no limit on the kl div.
        target_kl: Optional[float] = None

        # the log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run. Setting it to auto,
        # the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Пример #9
0
class BaseHParams(HyperParameters):
    """ Set of 'base' Hyperparameters for the 'base' LightningModule. """
    # Class variable versions of the above dicts, for easier subclassing.
    # NOTE: These don't get parsed from the command-line.
    available_optimizers: ClassVar[Dict[
        str, Type[Optimizer]]] = available_optimizers.copy()
    available_encoders: ClassVar[Dict[
        str, Type[nn.Module]]] = available_encoders.copy()

    # Learning rate of the optimizer.
    learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
    # L2 regularization term for the model weights.
    weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
    # Which optimizer to use.
    optimizer: Type[Optimizer] = categorical(available_optimizers,
                                             default=optim.Adam)
    # Use an encoder architecture from the torchvision.models package.
    encoder: Type[nn.Module] = categorical(
        available_encoders,
        default=tv_models.resnet18,
        # TODO: Only using these two by default when performing a sweep.
        probabilities={
            "resnet18": 0.5,
            "simple_convnet": 0.5
        },
    )

    # Batch size to use during training and evaluation.
    batch_size: Optional[int] = None

    # Number of hidden units (before the output head).
    # When left to None (default), the hidden size from the pretrained
    # encoder model will be used. When set to an integer value, an
    # additional Linear layer will be placed between the outputs of the
    # encoder in order to map from the pretrained encoder's output size H_e
    # to this new hidden size `new_hidden_size`.
    new_hidden_size: Optional[int] = None
    # Retrain the encoder from scratch.
    train_from_scratch: bool = False
    # Wether we should keep the weights of the pretrained encoder frozen.
    freeze_pretrained_encoder_weights: bool = False

    # Settings for the output head.
    # TODO: This could be overwritten in a subclass to do classification or
    # regression or RL, etc.
    output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

    # Wether the output head should be detached from the representations.
    # In other words, if the gradients from the downstream task should be
    # allowed to affect the representations.
    detach_output_head: bool = False

    def __post_init__(self):
        """Use this to initialize (or fix) any fields parsed from the
        command-line.
        """
        super().__post_init__()

    def make_optimizer(self, *args, **kwargs) -> Optimizer:
        """ Creates the Optimizer object from the options. """
        optimizer_class = self.optimizer
        options = {
            "lr": self.learning_rate,
            "weight_decay": self.weight_decay,
        }
        options.update(kwargs)
        return optimizer_class(*args, **options)

    @property
    def encoder_model(self) -> Type[nn.Module]:
        return self.encoder

    def make_encoder(self, encoder_name: str = None) -> Tuple[nn.Module, int]:
        """Creates an Encoder model and returns the resulting hidden size.

        Returns:
            Tuple[nn.Module, int]: the encoder and the hidden size.
        """
        if encoder_name and encoder_name not in self.available_encoders:
            raise KeyError(
                f"No encoder with name {encoder_name} found! "
                f"(available encoders: {list(self.available_encoders.keys())}."
            )
            encoder_model = self.available_encoders[encoder_name]
        else:
            encoder_model = self.encoder
        encoder, hidden_size = get_pretrained_encoder(
            encoder_model=encoder_model,
            pretrained=not self.train_from_scratch,
            freeze_pretrained_weights=self.freeze_pretrained_encoder_weights,
            new_hidden_size=self.new_hidden_size,
        )
        return encoder, hidden_size
Пример #10
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the DQN model from `stable_baselines3`.

        The command-line arguments for these are created with simple-parsing.
        """
        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6,
                                                            1e-2,
                                                            default=1e-4)
        # size of the replay buffer
        buffer_size: int = uniform(100, 10_000_000, default=1_000_000)
        # How many steps of the model to collect transitions for before learning
        # starts.
        learning_starts: int = uniform(1_000, 100_000, default=50_000)
        # Minibatch size for each gradient update
        batch_size: Optional[int] = categorical(1,
                                                2,
                                                4,
                                                8,
                                                16,
                                                32,
                                                128,
                                                default=32)
        # The soft update coefficient ("Polyak update", between 0 and 1) default
        # 1 for hard update
        tau: float = uniform(0., 1., default=1.0)
        # The discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)
        # Update the model every ``train_freq`` steps. Set to `-1` to disable.
        train_freq: int = uniform(1, 100, default=4)
        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = categorical(1, -1, default=1)
        # Enable a memory efficient variant of the replay buffer at a cost of
        # more complexity.
        # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
        optimize_memory_usage: bool = False
        # Update the target network every ``target_update_interval`` environment
        # steps.
        target_update_interval: int = categorical(1_000,
                                                  5_000,
                                                  10_000,
                                                  50_000,
                                                  default=10_000)
        # Fraction of entire training period over which the exploration rate is
        # reduced.
        exploration_fraction: float = uniform(0.05, 0.3, default=0.1)
        # Initial value of random action probability.
        exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0)
        # final value of random action probability.
        exploration_final_eps: float = uniform(0, 0.1, default=0.05)
        # The maximum value for the gradient clipping.
        max_grad_norm: float = uniform(1, 100, default=10)
        # Whether to create a second environment that will be used for
        # evaluating the agent periodically. (Only available when passing string
        # for the environment)
        create_eval_env: bool = False
        # Whether or not to build the network at the creation
        # of the instance
        _init_setup_model: bool = True
Пример #11
0
    class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams,
                  MultiHeadModel.HParams):
        """ HParams of the Model. """
        # NOTE: All the fields below were just copied from the BaseHParams class, just
        # to improve visibility a bit.

        # Class variables that hold the available optimizers and encoders.
        # NOTE: These don't get parsed from the command-line.
        available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
            "sgd": optim.SGD,
            "adam": optim.Adam,
            "rmsprop": optim.RMSprop,
        }

        # Which optimizer to use.
        optimizer: Type[Optimizer] = categorical(available_optimizers,
                                                 default=optim.Adam)

        available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = {
            "vgg16": tv_models.vgg16,
            "resnet18": tv_models.resnet18,
            "resnet34": tv_models.resnet34,
            "resnet50": tv_models.resnet50,
            "resnet101": tv_models.resnet101,
            "resnet152": tv_models.resnet152,
            "alexnet": tv_models.alexnet,
            "densenet": tv_models.densenet161,
            # TODO: Add the self-supervised pl modules here!
            "simple_convnet": SimpleConvNet,
        }
        # Which encoder to use.
        encoder: Type[nn.Module] = choice(
            available_encoders,
            default=SimpleConvNet,
            # # TODO: Only considering these two for now when performing an HPO sweep.
            # probabilities={"resnet18": 0., "simple_convnet": 1.0},
        )

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
        # L2 regularization term for the model weights.
        weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)

        # Batch size to use during training and evaluation.
        batch_size: Optional[int] = None

        # Number of hidden units (before the output head).
        # When left to None (default), the hidden size from the pretrained
        # encoder model will be used. When set to an integer value, an
        # additional Linear layer will be placed between the outputs of the
        # encoder in order to map from the pretrained encoder's output size H_e
        # to this new hidden size `new_hidden_size`.
        new_hidden_size: Optional[int] = None
        # Retrain the encoder from scratch.
        train_from_scratch: bool = False
        # Wether we should keep the weights of the pretrained encoder frozen.
        freeze_pretrained_encoder_weights: bool = False

        # Hyper-parameters of the output head.
        output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

        # Wether the output head should be detached from the representations.
        # In other words, if the gradients from the downstream task should be
        # allowed to affect the representations.
        detach_output_head: bool = False