Пример #1
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):
        super().__init__(hparams=hparams)
        self.load_pretrained_config(pretrained_model_name, cache_dir)

        num_layers = self._hparams.num_layers
        num_heads = self._hparams.num_heads
        head_dim = self._hparams.head_dim

        self.word_embed = nn.Embedding(self._hparams.vocab_size,
                                       self._hparams.hidden_dim)
        self.pos_embed = RelativePositionalEncoding(
            hparams={
                "dim": self._hparams.hidden_dim,
                "max_seq_len": self._hparams.max_seq_length,
            })
        self.dropout = nn.Dropout(self._hparams.dropout)

        self.r_r_bias = None
        self.r_w_bias = None
        self.r_s_bias = None

        if not self._hparams.untie_r:
            self.r_r_bias = nn.Parameter(torch.Tensor(num_heads, head_dim))
            self.r_w_bias = nn.Parameter(torch.Tensor(num_heads, head_dim))
            self.r_s_bias = (nn.Parameter(torch.Tensor(num_heads, head_dim))
                             if self._hparams.use_segments else None)

        self.attn_layers = nn.ModuleList()
        self.ff_layers = nn.ModuleList()
        rel_attn_hparams = dict_fetch(
            self._hparams, RelativeMultiheadAttention.default_hparams())
        ff_hparams = dict_fetch(self._hparams,
                                PositionWiseFF.default_hparams())
        for _ in range(num_layers):
            self.attn_layers.append(
                RelativeMultiheadAttention(self.r_r_bias,
                                           self.r_w_bias,
                                           self.r_s_bias,
                                           hparams=rel_attn_hparams))
            self.ff_layers.append(PositionWiseFF(hparams=ff_hparams))

        self.mask_emb = nn.Parameter(
            torch.Tensor(1, 1, self._hparams.hidden_dim))

        if self._IS_DECODE:
            self.lm_bias = nn.Parameter(torch.zeros(self._hparams.vocab_size))

        self.init_pretrained_weights()
Пример #2
0
    def __init__(self,
                 in_channels: int,
                 in_features: Optional[int] = None,
                 hparams: Optional[Union[HParams, Dict[str, Any]]] = None):
        super().__init__(hparams=hparams)

        encoder_hparams = utils.dict_fetch(hparams,
                                           Conv1DEncoder.default_hparams())
        self._encoder = Conv1DEncoder(in_channels=in_channels,
                                      in_features=in_features,
                                      hparams=encoder_hparams)

        # Add an additional dense layer if needed
        self._num_classes = self._hparams.num_classes
        if self._num_classes > 0:
            if self._hparams.num_dense_layers <= 0:
                self._encoder.append_layer({"type": "Flatten"})

            logit_kwargs = self._hparams.logit_layer_kwargs
            if logit_kwargs is None:
                logit_kwargs = {}
            elif not isinstance(logit_kwargs, HParams):
                raise ValueError(
                    "hparams['logit_layer_kwargs'] must be a dict.")
            else:
                logit_kwargs = logit_kwargs.todict()
            logit_kwargs.update({"out_features": self._num_classes})

            self._encoder.append_layer({
                "type": "Linear",
                "kwargs": logit_kwargs
            })
Пример #3
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):

        super().__init__(hparams=hparams)

        # Create the underlying encoder
        encoder_hparams = dict_fetch(hparams, XLNetEncoder.default_hparams())

        self._encoder = XLNetEncoder(
            pretrained_model_name=pretrained_model_name,
            cache_dir=cache_dir,
            hparams=encoder_hparams)

        # TODO: The logic here is very similar to that in XLNetClassifier.
        #  We need to reduce the code redundancy.
        if self._hparams.use_projection:
            if self._hparams.regr_strategy == 'all_time':
                self.projection = nn.Linear(
                    self._encoder.output_size * self._hparams.max_seq_length,
                    self._encoder.output_size * self._hparams.max_seq_length)
            else:
                self.projection = nn.Linear(self._encoder.output_size,
                                            self._encoder.output_size)
        self.dropout = nn.Dropout(self._hparams.dropout)

        logit_kwargs = self._hparams.logit_layer_kwargs
        if logit_kwargs is None:
            logit_kwargs = {}
        elif not isinstance(logit_kwargs, HParams):
            raise ValueError("hparams['logit_layer_kwargs'] "
                             "must be a dict.")
        else:
            logit_kwargs = logit_kwargs.todict()

        if self._hparams.regr_strategy == 'all_time':
            self.hidden_to_logits = nn.Linear(
                self._encoder.output_size * self._hparams.max_seq_length,
                1, **logit_kwargs)
        else:
            self.hidden_to_logits = nn.Linear(
                self._encoder.output_size, 1, **logit_kwargs)

        if self._hparams.initializer:
            initialize = get_initializer(self._hparams.initializer)
            assert initialize is not None
            if self._hparams.use_projection:
                initialize(self.projection.weight)
                initialize(self.projection.bias)
            initialize(self.hidden_to_logits.weight)
            if self.hidden_to_logits.bias:
                initialize(self.hidden_to_logits.bias)
        else:
            if self._hparams.use_projection:
                self.projection.apply(init_weights)
            self.hidden_to_logits.apply(init_weights)
Пример #4
0
    def __init__(self,
                 pretrained_model_name: Optional[str] = None,
                 cache_dir: Optional[str] = None,
                 hparams=None):

        super().__init__(hparams=hparams)

        # Create the underlying encoder
        encoder_hparams = dict_fetch(hparams,
                                     self._ENCODER_CLASS.default_hparams())

        self._encoder = self._ENCODER_CLASS(
            pretrained_model_name=pretrained_model_name,
            cache_dir=cache_dir,
            hparams=encoder_hparams)

        # Create a dropout layer
        self._dropout_layer = nn.Dropout(self._hparams.dropout)

        # Create an additional classification layer if needed
        self.num_classes = self._hparams.num_classes
        if self.num_classes <= 0:
            self._logits_layer = None
        else:
            logit_kwargs = self._hparams.logit_layer_kwargs
            if logit_kwargs is None:
                logit_kwargs = {}
            elif not isinstance(logit_kwargs, HParams):
                raise ValueError("hparams['logit_layer_kwargs'] "
                                 "must be a dict.")
            else:
                logit_kwargs = logit_kwargs.todict()

            if self._hparams.clas_strategy == 'all_time':
                self._logits_layer = nn.Linear(
                    self._encoder.output_size *
                    self._hparams.max_seq_length,
                    self.num_classes,
                    **logit_kwargs)
            else:
                self._logits_layer = nn.Linear(
                    self._encoder.output_size, self.num_classes,
                    **logit_kwargs)

        if self._hparams.initializer:
            initialize = get_initializer(self._hparams.initializer)
            assert initialize is not None
            if self._logits_layer:
                initialize(self._logits_layer.weight)
                if self._logits_layer.bias:
                    initialize(self._logits_layer.bias)

        self.is_binary = (self.num_classes == 1) or \
                         (self.num_classes <= 0 and
                          self._hparams.encoder.dim == 1)
Пример #5
0
    def __init__(self,
                 input_size: int,
                 cell: Optional[RNNCellBase[State]] = None,
                 output_layer: Optional[nn.Module] = None,
                 hparams=None):

        super().__init__(hparams=hparams)

        # Create the underlying encoder
        encoder_hparams = dict_fetch(
            hparams, UnidirectionalRNNEncoder.default_hparams())

        self._encoder = UnidirectionalRNNEncoder(input_size=input_size,
                                                 cell=cell,
                                                 output_layer=output_layer,
                                                 hparams=encoder_hparams)

        # Create an additional classification layer if needed
        self.num_classes = self._hparams.num_classes
        if self.num_classes <= 0:
            self._logits_layer = None
        else:
            logit_kwargs = self._hparams.logit_layer_kwargs
            if logit_kwargs is None:
                logit_kwargs = {}
            elif not isinstance(logit_kwargs, HParams):
                raise ValueError("hparams['logit_layer_kwargs'] "
                                 "must be a dict.")
            else:
                logit_kwargs = logit_kwargs.todict()

            if self._hparams.clas_strategy == 'all_time':
                self._logits_layer = nn.Linear(
                    self._encoder.output_size * self._hparams.max_seq_length,
                    self.num_classes, **logit_kwargs)
            else:
                self._logits_layer = nn.Linear(self._encoder.output_size,
                                               self.num_classes,
                                               **logit_kwargs)

        self.is_binary = (self.num_classes == 1) or \
                         (self.num_classes <= 0 and
                          self._encoder.output_size == 1)
    def __init__(self,
                 in_channels: int,
                 in_features: Optional[int] = None,
                 hparams: Optional[Union[HParams, Dict[str, Any]]] = None):
        super().__init__(hparams=hparams)

        encoder_hparams = utils.dict_fetch(hparams,
                                           Conv1DEncoder.default_hparams())
        self._encoder = Conv1DEncoder(in_channels=in_channels,
                                      in_features=in_features,
                                      hparams=encoder_hparams)

        # Add an additional dense layer if needed
        self._num_classes = self._hparams.num_classes
        if self._num_classes > 0:
            if self._hparams.num_dense_layers <= 0:
                if in_features is None:
                    raise ValueError("'in_features' is required for logits "
                                     "layer when 'num_dense_layers' <= 0")
                self._encoder.append_layer({"type": "Flatten"})
                ones = torch.ones(1, in_channels, in_features)
                input_size = self._encoder._infer_dense_layer_input_size(ones)  # pylint: disable=protected-access
                self.hparams.logit_layer_kwargs.in_features = input_size[1]

            logit_kwargs = self._hparams.logit_layer_kwargs
            if logit_kwargs is None:
                logit_kwargs = {}
            elif not isinstance(logit_kwargs, HParams):
                raise ValueError(
                    "hparams['logit_layer_kwargs'] must be a dict.")
            else:
                logit_kwargs = logit_kwargs.todict()
            logit_kwargs.update({"out_features": self._num_classes})

            self._encoder.append_layer({
                "type": "Linear",
                "kwargs": logit_kwargs
            })