Exemplo n.º 1
0
 def __init__(
     self,
     language_model,
     prediction_heads,
     embeds_dropout_prob,
     lm_output_types,
     device,
     loss_aggregation_fn=None,
     head_feats=False,
     freeze_model=False,
     custom_pooling_strategy=None,
     prediction_layer=-1,
 ):
     self.head_feats = head_feats
     self.pooler = None
     super(CustomAdaptiveModel,
           self).__init__(language_model, prediction_heads,
                          embeds_dropout_prob, lm_output_types, device,
                          loss_aggregation_fn)
     if freeze_model:
         for p in self.language_model.parameters():
             p.requires_grad = False
     if custom_pooling_strategy is not None:
         config = self.language_model.model.config
         config.summary_type = custom_pooling_strategy
         self.pooler = SequenceSummary(config)
         self.pooler.apply(self.language_model.model._init_weights)
         logger.info(
             f"Using custom pooling strategy: {custom_pooling_strategy}")
     self.prediction_layer = prediction_layer
Exemplo n.º 2
0
    def __init__(self, hparams):
        super().__init__()
        self.hparams = hparams
        self.save_hyperparameters()
        config = AutoConfig.from_pretrained(self.hparams.model_name)
        self.model = AutoModel.from_pretrained(self.hparams.model_name)
        self.pooler = SequenceSummary(config)

        self.classifier = nn.Linear(config.d_model, self.hparams.num_classes)

        self.concept_store = torch.load(self.hparams.concept_store)

        self.phrase_logits = TimeDistributed(
            nn.Linear(config.d_model, self.hparams.num_classes))
        self.sequence_summary = SequenceSummary(config)

        self.topk = self.hparams.topk
        # self.topk_gil_mlp = TimeDistributed(nn.Linear(config.d_model,
        #                                               self.hparams.num_classes))

        self.topk_gil_mlp = nn.Linear(config.d_model, self.hparams.num_classes)

        self.multihead_attention = torch.nn.MultiheadAttention(config.d_model,
                                                               dropout=0.2,
                                                               num_heads=8)

        self.activation = nn.ReLU()

        self.lamda = self.hparams.lamda
        self.gamma = self.hparams.gamma

        self.dropout = nn.Dropout(config.dropout)
        self.loss = nn.CrossEntropyLoss()
    def __init__(self, config):
        super().__init__(config)

        config.num_labels = 1
        self.transformer = OpenAIGPTModel(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.multiple_choice_head = SequenceSummary(config)
        self.persona_head = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 4
0
 def __init__(self, transformer_model: PreTrainedModel,
              config: PretrainedConfig, pad_idx: int, cls_idx: int):
     super(GPT2Classifier, self).__init__()
     self.transformer = transformer_model
     self.head = SequenceSummary(config)
     self.pad_idx = pad_idx
     self.cls_idx = cls_idx
Exemplo n.º 5
0
 def __init__(self, config: transformers.PretrainedConfig):
     super().__init__(config)
     self.config = config
     assert hasattr(config, NUM_SEQUENCE_LABELS)
     self.num_labels = getattr(config, NUM_SEQUENCE_LABELS)
     self.sequence_summary = SequenceSummary(config)
     self.classifier = nn.Linear(config.hidden_size, self.num_labels)
Exemplo n.º 6
0
 def __init__(self, config):
     super(XLNetForXMLC, self).__init__(config)
     self.num_labels = config.num_labels
     self.transformer = XLNetModel(config)
     self.sequence_summary = SequenceSummary(config)
     self.logits_proj = nn.Linear(config.d_model, config.num_labels)
     self.init_weights()
Exemplo n.º 7
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = 3
        # RTE Task
        self.num_labels_3way = 3
        # RTE SPs multi-label task
        self.num_labels_multi = 5
        # RTE span detection task
        self.start_n_top = config.start_n_top
        self.end_n_top = config.end_n_top

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj_3way = nn.Linear(config.d_model, self.num_labels_3way)
        self.logits_proj_multi = nn.Linear(config.d_model,
                                           self.num_labels_multi)
        self.weights_3way = [1, 1.3, 3.3]
        self.weights_multi = [15, 10, 15, 5, 5]
        self.class_weights_3way = torch.FloatTensor(
            self.weights_3way).to(device)
        self.class_weights_multi = torch.FloatTensor(
            self.weights_multi).to(device)

        # RTE span detection task
        self.start_logits = PoolerStartLogits(config)
        self.end_logits = PoolerEndLogits(config)
        self.answer_class = PoolerAnswerClass(config)

        self.init_weights()
Exemplo n.º 8
0
    def __init__(self, config):
        super(XLMForMultiLabelSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLMModel(config)
        self.sequence_summary = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 9
0
    def __init__(self, config):
        print("************ THIS MODEL COMES FROM CS224N PROJECT ************")
        super().__init__(config)
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.multiple_choice_head = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 10
0
    def __init__(self, config):
        super(GPT2DoubleHeadsModel, self).__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.multiple_choice_head = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 11
0
    def __init__(self, config):
        super(XLNetForXMC, self).__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 12
0
    def __init__(self, config):
        super(XLNetForSequenceClassificationGivenEmbedding, self).__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModelWithoutEmbedding(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, 1)
        self.init_weights()
    def __init__(self, config):
        super().__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.cls_head = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 14
0
 def __init__(self, model, device, pretrained_config):
     super(modified_XLNet, self).__init__()
     self.model = model
     self.cls_linear_1 = nn.Linear(768, 300)
     self.cls_linear_2 = nn.Linear(300, 2)
     self.device = device
     self.dropout_1 = nn.Dropout(0.5)
     self.dropout_2 = nn.Dropout(0.5)
     self.sequence_summary = SequenceSummary(pretrained_config)
Exemplo n.º 15
0
    def __init__(self, config, weight=None):
        super(FlaubertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.weight = weight

        self.transformer = FlaubertModel(config)
        self.sequence_summary = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 16
0
 def __init__(self, dropout_rate=0.3, n_outputs=2):
     super(XLNETClassifier, self).__init__()
     self.pretrained_model = XLNetModel.from_pretrained("xlnet-base-cased")
     self.sequence_summary = SequenceSummary(self.pretrained_model.config)
     self.d1 = torch.nn.Dropout(dropout_rate)
     self.l1 = torch.nn.Linear(768, 64)
     self.bn1 = torch.nn.LayerNorm(64)
     self.d2 = torch.nn.Dropout(dropout_rate)
     self.l2 = torch.nn.Linear(64, n_outputs)
Exemplo n.º 17
0
    def __init__(self, config):
        super().__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.debias_head = nn.functional.linear
        self.multiple_choice_head = SequenceSummary(config)

        self.init_weights()
Exemplo n.º 18
0
 def __init__(self, config):
     super(XLNet_Reader, self).__init__(config)
     self.config = config
     self.xlnet = XLNetModel(config)
     self.sentence_summary = SequenceSummary(config)
     # self.dropout =nn.Dropout(0.3)
     # self.aggregation = nn.LSTM(config.hidden_size,config.hidden_size,batch_first=True,num_layers=1,bidirectional=True)
     # self.batch_norm_for_rnn = nn.BatchNorm1d(config.hidden_size)
     self.dropout = nn.Dropout(0.1)
    def __init__(self,
                 pretrained_model_type,
                 pretrained_model,
                 tagset_size,
                 class_size,
                 dropout=0.,
                 device=None,
                 extFeats_dim=None,
                 multi_class=False,
                 task_st='NN',
                 task_sc='CLS'):
        """Initialize model."""
        super(Transformers_joint_slot_and_intent, self).__init__()
        self.tagset_size = tagset_size
        self.class_size = class_size
        self.dropout = dropout
        self.device = device
        self.extFeats_dim = extFeats_dim
        self.multi_class = multi_class
        self.task_st = task_st  # 'NN', 'NN_crf'
        self.task_sc = task_sc  # None, 'CLS', 'max', 'CLS_max'

        self.dropout_layer = nn.Dropout(p=self.dropout)

        self.pretrained_model_type = pretrained_model_type
        self.pretrained_model = pretrained_model
        if self.pretrained_model_type == 'xlnet':
            self.sequence_summary = SequenceSummary(
                self.pretrained_model.config)
        self.embedding_dim = self.pretrained_model.config.hidden_size

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        self.append_feature_dim = 0
        if self.extFeats_dim:
            self.append_feature_dim += self.extFeats_dim
            self.extFeats_linear = nn.Linear(self.append_feature_dim,
                                             self.append_feature_dim)
        else:
            self.extFeats_linear = None

        # The linear layer that maps from hidden state space to tag space
        if self.task_st == 'NN':
            self.hidden2tag = nn.Linear(
                self.embedding_dim + self.append_feature_dim, self.tagset_size)
        else:
            self.hidden2tag = nn.Linear(
                self.embedding_dim + self.append_feature_dim,
                self.tagset_size + 2)
            self.crf_layer = crf.CRF(self.tagset_size, self.device)
        if self.task_sc == 'CLS' or self.task_sc == 'max':
            self.hidden2class = nn.Linear(self.embedding_dim, self.class_size)
        elif self.task_sc == 'CLS_max':
            self.hidden2class = nn.Linear(self.embedding_dim * 2,
                                          self.class_size)
        else:
            pass
Exemplo n.º 20
0
    def __init__(self, config, *args, **kwargs):
        super().__init__(config)
        self.logits_device = torch.device(kwargs.pop("logits_device"))
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config, **kwargs)
        self.sequence_summary = SequenceSummary(config).to(self.logits_device)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels).to(self.logits_device)

        self.init_weights()
Exemplo n.º 21
0
    def __init__(self, config, pos_weight=None):
        super(XLNetForMultiLabelSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.pos_weight = pos_weight

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels)

        self.init_weights()
Exemplo n.º 22
0
    def __init__(self, config):
        super(XLNetForXMLC, self).__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels)
        self.loss_fct = HingeLoss(margin=1.0, squared=True)

        self.init_weights()
Exemplo n.º 23
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        hidden_size = config.d_model + self.num_size
        self.logits_proj = nn.Linear(hidden_size, config.num_labels)

        self.init_weights()
Exemplo n.º 24
0
    def __init__(self, model_name, num_fine_labels, num_coarse_labels):
        super().__init__()

        self.transformer = XLNetModel.from_pretrained(
            model_name, num_labels=num_fine_labels)
        self.sequence_summary = SequenceSummary(self.transformer.config)
        self.classifier_coarse = nn.Linear(self.transformer.config.d_model,
                                           num_coarse_labels)
        self.classifier_fine = nn.Linear(self.transformer.config.d_model,
                                         num_fine_labels)
Exemplo n.º 25
0
    def __init__(self, config):
        super(GPT2ForSequenceRanking, self).__init__(config)
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        config.summary_type = 'mean'
        self.good_head = SequenceSummary(config)

        self.size = config.n_embd

        self.init_weights()
Exemplo n.º 26
0
    def __init__(self, config):
        super(GPT2MultiHeadsAdversarialClModel, self).__init__(config)
        config.num_labels = 1
        self.transformer = GPT2Model(config)
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.multiple_choice_head = SequenceSummary(config)

        if not hasattr(config, 'cls'):
            config.cls = {}

        # set default values
        if 'default' not in config.cls:
            config.cls['default'] = {
                "summary_first_dropout": 0.1,
                "summary_proj_to_labels": True,
                "summary_type": 'cls_index',
                "summary_use_proj": True,
                "is_adversarial": False,
            }

        self.cl_heads = {}
        for cl_name, cl_config in config.cls.items():
            if cl_name != 'default':
                assert 'labels' in cl_config, f'no labels set in config for classifier {cl_name}'
                _cl_config = copy.deepcopy(config.cls['default'])
                _cl_config.update(cl_config)
                _cl_config['num_labels'] = len(_cl_config['labels'])
                _cl_config['hidden_size'] = config.hidden_size
                self.cl_heads[cl_name] = SequenceSummary(
                    PretrainedConfig(**_cl_config))
                setattr(self.cl_heads[cl_name], 'is_adversarial',
                        _cl_config.get('is_adversarial', False))
                self.add_module(name=f'cl_head_{cl_name}',
                                module=self.cl_heads[cl_name])

        config.num_labels = 1

        self.init_weights()
Exemplo n.º 27
0
    def __init__(self, config, lambd, mean_pool=False):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.transformer = XLNetModel(config)
        self.sequence_summary = SequenceSummary(config)
        self.logits_proj = nn.Linear(config.d_model, config.num_labels)

        self.lambd = lambd
        print("Gradient reversal Prarameter is: {}".format(self.lambd))
        self.grl = GradientReversal(self.lambd)
        self.domain_classifier = nn.Linear(config.hidden_size, 2)

        self.init_weights()
Exemplo n.º 28
0
 def __init__(self,
              config,
              lossfct=None,
              CEL_type='mean',
              quick_return=False):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.lossfct = lossfct
     self.transformer = XLNetModel(config)
     self.sequence_summary = SequenceSummary(config)
     #self.dropout = nn.Dropout(0.1)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.CEL_type = CEL_type
     self.quick_return = quick_return
     self.init_weights()
Exemplo n.º 29
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a pretrained model by supplying

        * the name of a remote model on s3 ("distilbert-base-german-cased" ...)
        * OR a local path of a model trained via transformers ("some_dir/huggingface_model")
        * OR a local path of a model trained via FARM ("some_dir/farm_model")

        :param pretrained_model_name_or_path: The path of the saved pretrained model or its name.
        :type pretrained_model_name_or_path: str

        """

        distilbert = cls()
        if "farm_lm_name" in kwargs:
            distilbert.name = kwargs["farm_lm_name"]
        else:
            distilbert.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = os.path.join(pretrained_model_name_or_path,
                                      "language_model_config.json")
        if os.path.exists(farm_lm_config):
            # FARM style
            distilbert_config = DistilBertConfig.from_pretrained(
                farm_lm_config)
            farm_lm_model = os.path.join(pretrained_model_name_or_path,
                                         "language_model.bin")
            distilbert.model = DistilBertModel.from_pretrained(
                farm_lm_model, config=distilbert_config, **kwargs)
            distilbert.language = distilbert.model.config.language
        else:
            # Pytorch-transformer Style
            distilbert.model = DistilBertModel.from_pretrained(
                pretrained_model_name_or_path, **kwargs)
            distilbert.language = cls._infer_language_from_name(
                pretrained_model_name_or_path)
        config = distilbert.model.config

        # DistilBERT does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler.
        # The pooler takes the first hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim).
        # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we
        # feed everything to the prediction head
        config.summary_last_dropout = 0
        config.summary_type = 'first'
        config.summary_activation = 'tanh'
        distilbert.pooler = SequenceSummary(config)
        distilbert.pooler.apply(distilbert.model._init_weights)
        return distilbert
Exemplo n.º 30
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a language model either by supplying

        * the name of a remote model on s3 ("xlnet-base-cased" ...)
        * or a local path of a model trained via transformers ("some_dir/huggingface_model")
        * or a local path of a model trained via FARM ("some_dir/farm_model")

        :param pretrained_model_name_or_path: name or path of a model
        :param language: (Optional) Name of language the model was trained for (e.g. "german").
                         If not supplied, FARM will try to infer it from the model name.
        :return: Language Model

        """
        xlnet = cls()
        if "farm_lm_name" in kwargs:
            xlnet.name = kwargs["farm_lm_name"]
        else:
            xlnet.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = os.path.join(pretrained_model_name_or_path,
                                      "language_model_config.json")
        if os.path.exists(farm_lm_config):
            # FARM style
            config = XLNetConfig.from_pretrained(farm_lm_config)
            farm_lm_model = os.path.join(pretrained_model_name_or_path,
                                         "language_model.bin")
            xlnet.model = XLNetModel.from_pretrained(farm_lm_model,
                                                     config=config,
                                                     **kwargs)
            xlnet.language = xlnet.model.config.language
        else:
            # Pytorch-transformer Style
            xlnet.model = XLNetModel.from_pretrained(
                pretrained_model_name_or_path, **kwargs)
            xlnet.language = cls._infer_language_from_name(
                pretrained_model_name_or_path)
            config = xlnet.model.config
        # XLNet does not provide a pooled_output by default. Therefore, we need to initialize an extra pooler.
        # The pooler takes the last hidden representation & feeds it to a dense layer of (hidden_dim x hidden_dim).
        # We don't want a dropout in the end of the pooler, since we do that already in the adaptive model before we
        # feed everything to the prediction head
        config.summary_last_dropout = 0
        xlnet.pooler = SequenceSummary(config)
        xlnet.pooler.apply(xlnet.model._init_weights)
        return xlnet