Exemplo n.º 1
0
class HParams(HyperParameters):
    """ Hyper-parameters of the demo model. """

    # Learning rate of the optimizer.
    learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
    # L2 regularization coefficient.
    weight_decay: float = log_uniform(1e-9, 1e-3, default=1e-6)

    # Maximum number of training epochs per task.
    max_epochs_per_task: int = 10
    # Number of epochs with increasing validation loss after which we stop training.
    early_stop_patience: int = 2
Exemplo n.º 2
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the DQN model from `stable_baselines3`.

        The command-line arguments for these are created with simple-parsing.
        """

        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4)
        # size of the replay buffer
        buffer_size: int = uniform(100, 10_000_000, default=1_000_000)
        # How many steps of the model to collect transitions for before learning
        # starts.
        learning_starts: int = 50_000
        # learning_starts: int = uniform(1_000, 100_000, default=50_000)
        # Minibatch size for each gradient update
        batch_size: int = 32
        # batch_size: Optional[int] = categorical(1, 2, 4, 8, 16, 32, 128, default=32)
        # The soft update coefficient ("Polyak update", between 0 and 1) default
        # 1 for hard update
        tau: float = 1.0
        # tau: float = uniform(0., 1., default=1.0)
        # The discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)
        # Update the model every ``train_freq`` steps. Set to `-1` to disable.
        train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10)
        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = 1
        # gradient_steps: int = categorical(1, -1, default=1)
        # Enable a memory efficient variant of the replay buffer at a cost of
        # more complexity.
        # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
        optimize_memory_usage: bool = False
        # Update the target network every ``target_update_interval`` environment
        # steps.
        target_update_interval: int = categorical(1, 10, 100, 1_000, 10_000, default=10_000)
        # Fraction of entire training period over which the exploration rate is
        # reduced.
        exploration_fraction: float = 0.1
        # exploration_fraction: float = uniform(0.05, 0.3, default=0.1)
        # Initial value of random action probability.
        exploration_initial_eps: float = 1.0
        # exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0)
        # final value of random action probability.
        exploration_final_eps: float = 0.05
        # exploration_final_eps: float = uniform(0, 0.1, default=0.05)
        # The maximum value for the gradient clipping.
        max_grad_norm: float = 10
        # max_grad_norm: float = uniform(1, 100, default=10)
        # Whether to create a second environment that will be used for
        # evaluating the agent periodically. (Only available when passing string
        # for the environment)
        create_eval_env: bool = False
        # Whether or not to build the network at the creation
        # of the instance
        _init_setup_model: bool = True
Exemplo n.º 3
0
    class HParams(HyperParameters):
        """ Hyper-parameters of the Settings. """

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
        # Batch size
        batch_size: int = categorical(16, 32, 64, 128, default=128)
        # weight/importance of the task embedding to the gate function
        s_hat: float = uniform(1.0, 100.0, default=50.0)
        # Maximum number of training epochs per task
        max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True)
Exemplo n.º 4
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the A2C Model.

        TODO: Set actual 'good' priors for these hyper-parameters, as these were set
        somewhat randomly.
        """

        # Discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)
        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = uniform(0.5, 1.0, default=1.0)
        # Entropy coefficient for the loss calculation
        ent_coef: float = uniform(0.0, 1.0, default=0.0)
        # Value function coefficient for the loss calculation
        vf_coef: float = uniform(0.01, 1.0, default=0.5)
        # The maximum value for the gradient clipping
        max_grad_norm: float = uniform(0.1, 10, default=0.5)
        # RMSProp epsilon. It stabilizes square root computation in denominator of
        # RMSProp update.
        rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5)
        # :param use_rms_prop: Whether to use RMSprop (default) or Adam as optimizer
        use_rms_prop: bool = categorical(True, False, default=True)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Whether to normalize or not the advantage
        normalize_advantage: bool = categorical(True, False, default=False)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 0

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Exemplo n.º 5
0
 class HParams(HyperParameters):
     """ Hyper-parameters of the Pnn method. """
     # Learning rate of the optimizer. Defauts to 0.0001 when in SL.
     learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4)
     num_steps: int = 200  # (only applicable in RL settings.)
     # Discount factor (Only used in RL settings).
     gamma: float = uniform(0.9, 0.999, default=0.99)
     # Number of hidden units (only used in RL settings.)
     hidden_size: int = categorical(64, 128, 256, default=256)
     # Batch size in SL, and number of parallel environments in RL.
     # Defaults to None in RL, and 32 when in SL.
     batch_size: Optional[int] = None
     # Maximum number of training epochs per task. (only used in SL Settings)
     max_epochs_per_task: int = uniform(1, 20, default=10)
Exemplo n.º 6
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters common to all off-policy algos from SB3. """

        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6,
                                                            1e-2,
                                                            default=1e-4)
        # size of the replay buffer
        buffer_size: int = uniform(100, 10_000_000, default=1_000_000)

        # How many steps of the model to collect transitions for before learning
        # starts.
        learning_starts: int = 100

        # Minibatch size for each gradient update
        batch_size: int = 256
        # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32)

        # The soft update coefficient ("Polyak update", between 0 and 1) default
        # 1 for hard update
        tau: float = 0.005
        # tau: float = uniform(0., 1., default=1.0)

        # The discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Update the model every ``train_freq`` steps. Set to `-1` to disable.
        train_freq: int = 1
        # train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10)

        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = 1
        # gradient_steps: int = categorical(1, -1, default=1)

        # Enable a memory efficient variant of the replay buffer at a cost of
        # more complexity.
        # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
        optimize_memory_usage: bool = False

        # Whether to create a second environment that will be used for
        # evaluating the agent periodically. (Only available when passing string
        # for the environment)
        create_eval_env: bool = False

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1
Exemplo n.º 7
0
    class HParams(HyperParameters):
        """ Hyper-Parameters of the model, as a dataclass.

        Fields get command-line arguments with simple-parsing.
        """

        # Hidden size (representation size).
        hidden_size: int = 256
        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)
        # Discount factor
        gamma: float = 0.99
        # Coefficient for the entropy term in the loss formula.
        entropy_term_coefficient: float = 0.001
        # Maximum length of an episode, when desired. (Generally not needed).
        max_episode_steps: Optional[int] = None
Exemplo n.º 8
0
    class HParams(OffPolicyModel.HParams):
        """ Hyper-parameters of the TD3 model. """
        # TODO: Add HParams specific to TD3 here, if any, and also check that the
        # default values are correct.

        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3)

        # Minibatch size for each gradient update
        batch_size: int = 100
        # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32)

        train_freq: TrainFreq = (1, "episode")

        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = -1
Exemplo n.º 9
0
class SB3BaseHParams(HyperParameters):
    """ Hyper-parameters of a model from the `stable_baselines3` package.

    The command-line arguments for these are created with simple-parsing.
    """

    # The policy model to use (MlpPolicy, CnnPolicy, ...)
    policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy",
                                                            "CnnPolicy",
                                                            default=None)
    # # The base policy used by this method
    # policy_base: Type[BasePolicy]

    # learning rate for the optimizer, it can be a function of the current
    # progress remaining (from 1 to 0)
    learning_rate: Union[float, Callable] = log_uniform(1e-7,
                                                        1e-2,
                                                        default=1e-4)
    # Additional arguments to be passed to the policy on creation
    policy_kwargs: Optional[Dict[str, Any]] = None
    # the log location for tensorboard (if None, no logging)
    tensorboard_log: Optional[str] = None
    # The verbosity level: 0 none, 1 training information, 2 debug
    verbose: int = 1
    # Device on which the code should run. By default, it will try to use a Cuda
    # compatible device and fallback to cpu if it is not possible.
    device: Union[torch.device, str] = "auto"

    # # Whether the algorithm supports training with multiple environments (as in A2C)
    # support_multi_env: bool = False

    # Whether to create a second environment that will be used for evaluating
    # the agent periodically. (Only available when passing string for the
    # environment)
    create_eval_env: bool = False

    # # When creating an environment, whether to wrap it or not in a Monitor wrapper.
    # monitor_wrapper: bool = True

    # Seed for the pseudo random generators
    seed: Optional[int] = None
Exemplo n.º 10
0
 class HParams(OffPolicyModel.HParams):
     """ Hyper-parameters of the SAC Model. """
     # The learning rate, it can be a function of the current progress (from
     # 1 to 0)
     learning_rate: Union[float, Callable] = log_uniform(1e-6,
                                                         1e-2,
                                                         default=3e-4)
     buffer_size: int = 1_000_000
     learning_starts: int = 100
     batch_size: int = 256
     tau: float = 0.005
     gamma: float = 0.99
     train_freq = 1
     gradient_steps: int = 1
     # action_noise: Optional[ActionNoise] = None
     optimize_memory_usage: bool = False
     ent_coef: Union[str, float] = "auto"
     target_update_interval: int = 1
     target_entropy: Union[str, float] = "auto"
     use_sde: bool = False
     sde_sample_freq: int = -1
Exemplo n.º 11
0
    class HParams(OffPolicyModel.HParams):
        """ Hyper-parameters of the DDPG Model. """
        # TODO: Add hparams specific to DDPG here.
        # The learning rate, it can be a function of the current progress (from
        # 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-6,
                                                            1e-2,
                                                            default=1e-3)

        # The verbosity level: 0 none, 1 training information, 2 debug
        verbose: int = 0

        train_freq: TrainFreq = TrainFreq(frequency=1, unit="episode")

        # Minibatch size for each gradient update
        batch_size: int = 100

        # How many gradient steps to do after each rollout (see ``train_freq``
        # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
        # steps as steps done in the environment during the rollout.
        gradient_steps: int = -1
Exemplo n.º 12
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters of the PPO Model. """

        # # The policy model to use (MlpPolicy, CnnPolicy, ...)
        # policy: Union[str, Type[ActorCriticPolicy]]

        # # The environment to learn from (if registered in Gym, can be str)
        # env: Union[GymEnv, str]

        # The learning rate, it can be a function of the current progress remaining
        # (from 1 to 0)
        learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)

        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # TODO: Limit this, as is done in A2C, based on the value of setting.max steps.
        n_steps: int = categorical(32,
                                   128,
                                   256,
                                   1024,
                                   2048,
                                   4096,
                                   8192,
                                   default=2048)

        # Minibatch size
        batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64)

        # Number of epoch when optimizing the surrogate loss
        n_epochs: int = 10

        # Discount factor
        gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator
        gae_lambda: float = uniform(0.8, 1.0, default=0.95)

        # Clipping parameter, it can be a function of the current progress remaining
        # (from 1 to 0).
        clip_range: float = uniform(0.05, 0.4, default=0.2)

        # Clipping parameter for the value function, it can be a function of the current
        # progress remaining (from 1 to 0). This is a parameter specific to the OpenAI
        # implementation. If None is passed (default), no clipping will be done on the
        # value function. IMPORTANT: this clipping depends on the reward scaling.
        clip_range_vf: Optional[float] = None

        # Entropy coefficient for the loss calculation
        ent_coef: float = uniform(0., 1., default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE Default: -1 (only
        # sample at the beginning of the rollout)
        sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # Limit the KL divergence between updates, because the clipping is not enough to
        # prevent large update see issue #213
        # (cf https://github.com/hill-a/stable-baselines/issues/213)
        # By default, there is no limit on the kl div.
        target_kl: Optional[float] = None

        # the log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run. Setting it to auto,
        # the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Exemplo n.º 13
0
    class HParams(SB3BaseHParams):
        """ Hyper-parameters common to all on-policy algos from SB3. """

        # learning rate for the optimizer, it can be a function of the current
        # progress remaining (from 1 to 0)
        learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-3)
        # The number of steps to run for each environment per update (i.e. batch size
        # is n_steps * n_env where n_env is number of environment copies running in
        # parallel)
        # NOTE: Default value here is much lower than in PPO, which might indicate
        # that this A2C is more "on-policy"? (i.e. that it requires data to be super
        # "fresh")?
        n_steps: int = uniform(3, 64, default=5, discrete=True)
        # Discount factor
        gamma: float = 0.99
        # gamma: float = uniform(0.9, 0.9999, default=0.99)

        # Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
        # Equivalent to classic advantage when set to 1.
        gae_lambda: float = 1.0
        # gae_lambda: float = uniform(0.5, 1.0, default=1.0)

        # Entropy coefficient for the loss calculation
        ent_coef: float = 0.0
        # ent_coef: float = uniform(0.0, 1.0, default=0.0)

        # Value function coefficient for the loss calculation
        vf_coef: float = 0.5
        # vf_coef: float = uniform(0.01, 1.0, default=0.5)

        # The maximum value for the gradient clipping
        max_grad_norm: float = 0.5
        # max_grad_norm: float = uniform(0.1, 10, default=0.5)

        # Whether to use generalized State Dependent Exploration (gSDE) instead of
        # action noise exploration (default: False)
        use_sde: bool = False
        # use_sde: bool = categorical(True, False, default=False)

        # Sample a new noise matrix every n steps when using gSDE.
        # Default: -1 (only sample at the beginning of the rollout)
        sde_sample_freq: int = -1
        # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)

        # The log location for tensorboard (if None, no logging)
        tensorboard_log: Optional[str] = None

        # # Whether to create a second environment that will be used for evaluating the
        # # agent periodically. (Only available when passing string for the environment)
        # create_eval_env: bool = False

        # # Additional arguments to be passed to the policy on creation
        # policy_kwargs: Optional[Dict[str, Any]] = None

        # The verbosity level: 0 no output, 1 info, 2 debug
        verbose: int = 1

        # Seed for the pseudo random generators
        seed: Optional[int] = None

        # Device (cpu, cuda, ...) on which the code should be run.
        # Setting it to auto, the code will be run on the GPU if possible.
        device: Union[torch.device, str] = "auto"
Exemplo n.º 14
0
class BaseHParams(HyperParameters):
    """ Set of 'base' Hyperparameters for the 'base' LightningModule. """
    # Class variable versions of the above dicts, for easier subclassing.
    # NOTE: These don't get parsed from the command-line.
    available_optimizers: ClassVar[Dict[
        str, Type[Optimizer]]] = available_optimizers.copy()
    available_encoders: ClassVar[Dict[
        str, Type[nn.Module]]] = available_encoders.copy()

    # Learning rate of the optimizer.
    learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
    # L2 regularization term for the model weights.
    weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
    # Which optimizer to use.
    optimizer: Type[Optimizer] = categorical(available_optimizers,
                                             default=optim.Adam)
    # Use an encoder architecture from the torchvision.models package.
    encoder: Type[nn.Module] = categorical(
        available_encoders,
        default=tv_models.resnet18,
        # TODO: Only using these two by default when performing a sweep.
        probabilities={
            "resnet18": 0.5,
            "simple_convnet": 0.5
        },
    )

    # Batch size to use during training and evaluation.
    batch_size: Optional[int] = None

    # Number of hidden units (before the output head).
    # When left to None (default), the hidden size from the pretrained
    # encoder model will be used. When set to an integer value, an
    # additional Linear layer will be placed between the outputs of the
    # encoder in order to map from the pretrained encoder's output size H_e
    # to this new hidden size `new_hidden_size`.
    new_hidden_size: Optional[int] = None
    # Retrain the encoder from scratch.
    train_from_scratch: bool = False
    # Wether we should keep the weights of the pretrained encoder frozen.
    freeze_pretrained_encoder_weights: bool = False

    # Settings for the output head.
    # TODO: This could be overwritten in a subclass to do classification or
    # regression or RL, etc.
    output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

    # Wether the output head should be detached from the representations.
    # In other words, if the gradients from the downstream task should be
    # allowed to affect the representations.
    detach_output_head: bool = False

    def __post_init__(self):
        """Use this to initialize (or fix) any fields parsed from the
        command-line.
        """
        super().__post_init__()

    def make_optimizer(self, *args, **kwargs) -> Optimizer:
        """ Creates the Optimizer object from the options. """
        optimizer_class = self.optimizer
        options = {
            "lr": self.learning_rate,
            "weight_decay": self.weight_decay,
        }
        options.update(kwargs)
        return optimizer_class(*args, **options)

    @property
    def encoder_model(self) -> Type[nn.Module]:
        return self.encoder

    def make_encoder(self, encoder_name: str = None) -> Tuple[nn.Module, int]:
        """Creates an Encoder model and returns the resulting hidden size.

        Returns:
            Tuple[nn.Module, int]: the encoder and the hidden size.
        """
        if encoder_name and encoder_name not in self.available_encoders:
            raise KeyError(
                f"No encoder with name {encoder_name} found! "
                f"(available encoders: {list(self.available_encoders.keys())}."
            )
            encoder_model = self.available_encoders[encoder_name]
        else:
            encoder_model = self.encoder
        encoder, hidden_size = get_pretrained_encoder(
            encoder_model=encoder_model,
            pretrained=not self.train_from_scratch,
            freeze_pretrained_weights=self.freeze_pretrained_encoder_weights,
            new_hidden_size=self.new_hidden_size,
        )
        return encoder, hidden_size
Exemplo n.º 15
0
    class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams,
                  MultiHeadModel.HParams):
        """ HParams of the Model. """
        # NOTE: All the fields below were just copied from the BaseHParams class, just
        # to improve visibility a bit.

        # Class variables that hold the available optimizers and encoders.
        # NOTE: These don't get parsed from the command-line.
        available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
            "sgd": optim.SGD,
            "adam": optim.Adam,
            "rmsprop": optim.RMSprop,
        }

        # Which optimizer to use.
        optimizer: Type[Optimizer] = categorical(available_optimizers,
                                                 default=optim.Adam)

        available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = {
            "vgg16": tv_models.vgg16,
            "resnet18": tv_models.resnet18,
            "resnet34": tv_models.resnet34,
            "resnet50": tv_models.resnet50,
            "resnet101": tv_models.resnet101,
            "resnet152": tv_models.resnet152,
            "alexnet": tv_models.alexnet,
            "densenet": tv_models.densenet161,
            # TODO: Add the self-supervised pl modules here!
            "simple_convnet": SimpleConvNet,
        }
        # Which encoder to use.
        encoder: Type[nn.Module] = choice(
            available_encoders,
            default=SimpleConvNet,
            # # TODO: Only considering these two for now when performing an HPO sweep.
            # probabilities={"resnet18": 0., "simple_convnet": 1.0},
        )

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
        # L2 regularization term for the model weights.
        weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)

        # Batch size to use during training and evaluation.
        batch_size: Optional[int] = None

        # Number of hidden units (before the output head).
        # When left to None (default), the hidden size from the pretrained
        # encoder model will be used. When set to an integer value, an
        # additional Linear layer will be placed between the outputs of the
        # encoder in order to map from the pretrained encoder's output size H_e
        # to this new hidden size `new_hidden_size`.
        new_hidden_size: Optional[int] = None
        # Retrain the encoder from scratch.
        train_from_scratch: bool = False
        # Wether we should keep the weights of the pretrained encoder frozen.
        freeze_pretrained_encoder_weights: bool = False

        # Hyper-parameters of the output head.
        output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)

        # Wether the output head should be detached from the representations.
        # In other words, if the gradients from the downstream task should be
        # allowed to affect the representations.
        detach_output_head: bool = False
Exemplo n.º 16
0
    class HParams(HyperParameters):
        """ Hyper-parameters of the demo model. """

        # Learning rate of the optimizer.
        learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)