Python BertOnlyMLMHead示例，transformers.modeling_bert.BertOnlyMLMHead Python示例

示例#1

0

显示文件

 def __init__(self, config):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.bert = BertModel(config, add_pooling_layer=False)
     self.cls = BertOnlyMLMHead(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.activation = nn.Tanh()
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.init_weights()
     # MLM head is not trained
     for param in self.cls.parameters():
         param.requires_grad = False

示例#2

0

显示文件

    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)
        self.loss_fct = CrossEntropyLoss()  # -100 index = padding token; initialize once to speed up.

        self.init_weights()

示例#3

0

显示文件

class LOTClassModel(BertPreTrainedModel):

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config, add_pooling_layer=False)
        self.cls = BertOnlyMLMHead(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.init_weights()
        # MLM head is not trained
        for param in self.cls.parameters():
            param.requires_grad = False
    
    def forward(self, input_ids, pred_mode, attention_mask=None, token_type_ids=None, 
                position_ids=None, head_mask=None, inputs_embeds=None):
        bert_outputs = self.bert(input_ids,
                                 attention_mask=attention_mask,
                                 token_type_ids=token_type_ids,
                                 position_ids=position_ids,
                                 head_mask=head_mask,
                                 inputs_embeds=inputs_embeds)
        last_hidden_states = bert_outputs[0]
        if pred_mode == "classification":
            trans_states = self.dense(last_hidden_states)
            trans_states = self.activation(trans_states)
            trans_states = self.dropout(trans_states)
            logits = self.classifier(trans_states)
        elif pred_mode == "mlm":
            logits = self.cls(last_hidden_states)
        else:
            sys.exit("Wrong pred_mode!")
        return logits

示例#4

0

显示文件

文件： modeling_glycebert.py 项目： zijunsun/glyce_pinyin

    def __init__(self, config):
        super(GlyceBertForMaskedLM, self).__init__(config)

        self.bert = GlyceBertModel(config)
        self.cls = BertOnlyMLMHead(config)

        self.init_weights()

示例#5

0

显示文件

文件： model.py 项目： bcaitech1/p3-dst-chatting-day

    def __init__(self, config, tokenized_slot_meta, pad_idx=0):
        super(TRADE, self).__init__()
        self.model_type = config.model_type
        if self.model_type == "BERT":  ### model_type 에 맞는 Encoder 사용
            self.encoder = BERTEncoder(config.model_name_or_path, )
        elif self.model_type == "GRU":
            self.encoder = GRUEncoder(
                config.vocab_size,
                config.hidden_size,
                1,
                config.hidden_dropout_prob,
                config.proj_dim,
                pad_idx,
            )

        self.decoder = SlotGenerator(
            config.vocab_size,
            config.hidden_size,
            config.hidden_dropout_prob,
            config.n_gate,
            config.proj_dim,
            pad_idx,
        )

        self.decoder.set_slot_idx(tokenized_slot_meta)

        self.mlm_head = BertOnlyMLMHead(config)
        self.tie_weight()

示例#6

0

显示文件

文件： refiner.py 项目： facebookresearch/mmf

    def __init__(self, config: Config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self.cls = BertOnlyMLMHead(self.config)
        loss_dict = dict(
            mse=torch.nn.MSELoss(),
            cosine=torch.nn.CosineSimilarity(dim=1),
            contrastive=RefinerContrastiveLoss(),
            ms=RefinerMSLoss(),
        )
        self.refiner_loss = loss_dict.get(self.config.loss_type)

        self.refiner_decoder = {}
        self.weights = {}
        for i, modality in enumerate(self.config.modalities):
            self.refiner_decoder[modality] = MLP(
                input_dim=self.config.hidden_size,
                mlp_dims=[self.config.hidden_size],
                dropout=self.config.hidden_dropout_prob,
                nonlinearity=torch.nn.ReLU,
                normalization=torch.nn.LayerNorm,
            )
            self.weights[modality] = self.config.weights[i]

        self.modalities = self.config.modalities
        self.tol = self.config.tol
        self.refiner_target_pooler = self.config.refiner_target_pooler
        self.refiner_target_layer_depth = self.config.refiner_target_layer_depth
        self.loss_name = self.config.loss_name

        pool_class = registry.get_pool_class(self.refiner_target_pooler)
        if pool_class is None:
            raise ValueError(f"No pooler {self.refiner_target_pooler} is\
                             registered to registry")
        self.pooler = pool_class(self.refiner_target_layer_depth)

示例#7

0

显示文件

 def __init__(self,
              config,
              mask_word_id=0,
              search_beam_size=1,
              length_penalty=1.0,
              eos_id=0,
              sos_id=0,
              forbid_duplicate_ngrams=False,
              forbid_ignore_set=None,
              ngram_size=3,
              min_len=0):
     super(UnilmForSeq2SeqDecode, self).__init__(config)
     self.bert = UnilmModelIncr(config)
     self.cls = BertOnlyMLMHead(config)
     self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none')
     self.mask_word_id = mask_word_id
     self.search_beam_size = search_beam_size
     self.length_penalty = length_penalty
     self.eos_id = eos_id
     self.sos_id = sos_id
     self.forbid_duplicate_ngrams = forbid_duplicate_ngrams
     self.forbid_ignore_set = forbid_ignore_set
     self.ngram_size = ngram_size
     self.min_len = min_len
     self.init_weights()
     self.tie_weights()

示例#8

0

显示文件

文件： utils_parsing.py 项目： qingqing01/droidlet

 def __init__(self, config, args, tokenizer):
     super(DecoderWithLoss, self).__init__()
     # model components
     print("initializing decoder with params {}".format(args))
     self.bert = BertModel(config)
     self.lm_head = BertOnlyMLMHead(config)
     self.span_b_proj = nn.ModuleList([
         HighwayLayer(config.hidden_size) for _ in range(args.num_highway)
     ])
     self.span_e_proj = nn.ModuleList([
         HighwayLayer(config.hidden_size) for _ in range(args.num_highway)
     ])
     # predict text span beginning and end
     self.text_span_start_head = nn.Linear(config.hidden_size,
                                           config.hidden_size)
     self.text_span_end_head = nn.Linear(config.hidden_size,
                                         config.hidden_size)
     # loss functions
     if args.node_label_smoothing > 0:
         self.lm_ce_loss = LabelSmoothingLoss(
             args.node_label_smoothing,
             config.vocab_size,
             ignore_index=tokenizer.pad_token_id)
     else:
         self.lm_ce_loss = torch.nn.CrossEntropyLoss(
             ignore_index=tokenizer.pad_token_id, reduction="none")
     self.span_ce_loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                   reduction="none")
     self.span_loss_lb = args.lambda_span_loss
     self.text_span_loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                     reduction="none")
     self.tree_to_text = args.tree_to_text

示例#9

0

显示文件

文件： model_bert_similar_dist.py 项目： msobroza/distil_to_bert

    def __init__(self, config):
        super().__init__(config)

        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)

        self.init_weights()

示例#10

0

显示文件

    def __init__(self, config: BertConfig, **kwargs: Any):
        """The classification init is a super set of LM init"""
        super().__init__(config, **kwargs)
        self.config = config
        self.bert = BertModel(config=self.config)

        self.lm_head = BertOnlyMLMHead(self.config)
        self.lm_head.apply(self._init_weights)

        self.qa_head = BertOnlyMLMHead(self.config)
        self.qa_head.apply(self._init_weights)

        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
        self.classifier = nn.Linear(self.config.hidden_size,
                                    self.config.num_labels)
        self.classifier.apply(self._init_weights)

示例#11

0

显示文件

文件： BERT_model_Multitask.py 项目： EMBEDDIA/NER_BERT_Multitask

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.use_crf = False
        self.predict_masked = False
        if hasattr(config, "task_specific_params"):
            other_config = config.task_specific_params
            self.num_labels_boundaries = len(
                other_config["labels_boundaries"]) + 1
            self.classifier_boundaries = nn.Linear(config.hidden_size,
                                                   self.num_labels_boundaries)
            if other_config["crf"]:
                self.use_crf = True
                crf_constraints = allowed_transitions(
                    other_config["type_crf_constraints"],
                    dict(map(reversed, config.label2id.items())))
                self.crf = ConditionalRandomField(config.num_labels,
                                                  constraints=crf_constraints)
                crf_constraints = allowed_transitions(
                    other_config["type_crf_constraints"],
                    dict(
                        map(reversed,
                            other_config["labels_boundaries"].items())))
                self.crf_boundaries = ConditionalRandomField(
                    self.num_labels_boundaries, constraints=crf_constraints)
            if other_config["predict_masked"]:
                self.predict_masked = True
                self.cls = BertOnlyMLMHead(config)
        self.init_weights()

示例#12

0

显示文件

文件： mmfusionnlg.py 项目： scheiblr/fairseq

 def __init__(self, config):
     super().__init__(config)
     self.bert = MMBertModel(config)
     self.videomlp = VideoTokenMLP(config)
     # we do not use `BertGenerationOnlyLMHead`
     # because we can reuse pretraining.
     self.cls = BertOnlyMLMHead(config)
     self.hidden_size = config.hidden_size
     self.init_weights()

示例#13

0

显示文件

文件： modeling.py 项目： zijianan/BERT-for-RRC-ABSA

    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)
        self.domain_cls = DomBertDomainHead(config)

        self.loss_fct = nn.CrossEntropyLoss()
        self.sim_fn = nn.CosineSimilarity(-1)
        self.eye = torch.eye(4680, device=0)
        self.init_weights()

示例#14

0

显示文件

    def __init__(self, config: Config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)

        # Head modules
        self.cls = BertOnlyMLMHead(self.config)
        self.vocab_size = self.config.vocab_size

        # Loss
        self.ce_loss = torch.nn.CrossEntropyLoss(
            ignore_index=self.config.ignore_index)

示例#15

0

显示文件

文件： modeling_unilm.py 项目： suparek/Unilm-1

 def __init__(self, config):
     super(UnilmForSeq2Seq, self).__init__(config)
     self.bert = UnilmModel(config)
     self.cls = BertOnlyMLMHead(config)
     self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none')
     if hasattr(config, 'label_smoothing') and config.label_smoothing:
         self.crit_mask_lm_smoothed = LabelSmoothingLoss(
             config.label_smoothing, config.vocab_size, ignore_index=0, reduction='none')
     else:
         self.crit_mask_lm_smoothed = None
     self.init_weights()
     self.tie_weights()

示例#16

0

显示文件

文件： TestModels.py 项目： JinJackson/ParaphraseIdentification

 def __init__(self, config):
     super(TestModel, self).__init__(config)
     self.bert = BertModel(config)
     self.input_size = config.hidden_size
     self.GRU_Layer = nn.GRU(input_size=self.input_size,
                             hidden_size=self.input_size // 2,
                             num_layers=2,
                             bias=True,
                             batch_first=True,
                             dropout=config.hidden_dropout_prob,
                             bidirectional=True)
     self.cls = BertOnlyMLMHead(config)

示例#17

0

显示文件

文件： span_bert.py 项目： taoshen58/glm-codes

    def __init__(self, config):
        super(SpanBertForPreTraining, self).__init__(config)

        self.bert = BertModel(config)
        # self.mlm = BertLMPredictionHead(config)
        self.cls = BertOnlyMLMHead(config)
        self.sbo = SpanBertSboHead(config)

        self.apply(self.init_weights)

        # tie the weights of input and output
        self.tie_weights()

示例#18

0

显示文件

    def __init__(self, num_classes=1, freeze_bert=False):
        super().__init__()
        self.config = BertConfig()
        self.bert_layer = BertModel.from_pretrained('bert-base-uncased')
        self.mlm = BertOnlyMLMHead(self.config)
        self.cls = nn.Linear(768, num_classes)
        self._init_weights_bert(self.mlm)
        self._init_weights_bert(self.cls)

        # Freeze bert layers
        if freeze_bert:
            for p in self.bert_layer.parameters():
                p.requires_grad = False

示例#19

0

显示文件

文件： trelm_bert.py 项目： agcbi2017/TreLM

    def __init__(self, config):
        super().__init__(config)

        if config.is_decoder:
            logger.warning(
                "If you want to use `TrelmBertForMaskedLM` make sure `config.is_decoder=False` for "
                "bi-directional self-attention."
            )

        self.trelm_bert = TrelmBertModel(config)
        self.cls = BertOnlyMLMHead(config)

        self.init_weights()

示例#20

0

显示文件

文件： glm.py 项目： taoshen58/glm-codes

    def __init__(self, config):
        super(BertPreTrainedModel, self).__init__(config)
        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)

        self.bilinear = nn.Bilinear(config.hidden_size, config.hidden_size, 1)

        self.loss_lambda = getattr(config, "loss_lambda", 1.)
        self.disable_rev_pos = getattr(config, "disable_rev_pos", False)
        self.padding_idx = 0  # 0 for bert models

        self.apply(self.init_weights)
        self.tie_weights()

示例#21

0

显示文件

文件： model.py 项目： emnlp20-anonymous/submission_1611

 def __init__(self, config, tokenizer):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.mlm_probability = 0.15
     self.bert = BertModel(config)
     self.tokenizer = tokenizer
     self.cls = BertOnlyMLMHead(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.topic_cls = nn.Linear(config.hidden_size, config.num_labels)
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.context_emb = nn.Parameter(
         torch.Tensor(config.hidden_size).normal_(
             mean=0.0, std=config.initializer_range))
     self.activation = nn.Tanh()
     self.init_weights()

示例#22

0

显示文件

    def __init__(self, config, bert_model=None):
        super(BertForQuestionAnsweringWithMaskedLM, self).__init__(config)
        self.num_labels = config.num_labels
        # self.loss_beta = args.loss_beta
        self.bert = BertModel(config)
        # qa
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
        # mlm
        self.cls = BertOnlyMLMHead(config)
        # answer content
        self.answer_content_classifier = nn.Sequential(
            nn.Linear(config.hidden_size, config.hidden_size), nn.ReLU(),
            nn.Linear(config.hidden_size, 2))

        self.init_weights()

示例#23

0

显示文件

    def __init__(self, config, model_size, task=None, n_classes=None):
        """
        The bare Bert Model transformer outputting raw hidden-states without
        any specific head on top.

        The model can behave as an encoder (with only self-attention) as well as a
        decoder, in which case a layer of cross-attention is added between the
        self-attention layers, following the architecture described in `Attention
        is all you need`_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob
        Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

        This model is a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`_ sub-class.
        Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
        usage and behavior.

        Args:
            config (:class:`~transformers.BertConfig`): Model configuration class with all the parameters of the model.
                Initializing with a config file does not load the weights associated with the model, only the configuration.
                Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
            model_size: Size of the Model
            task: MTB task
            n_classes: Number of classes

        References:
            Attention is all you need (https://arxiv.org/abs/1706.03762)
        """
        super(BertModel, self).__init__(config)
        self.config = config

        self.task = task
        self.model_size = model_size
        self.embeddings = BertEmbeddings(config)
        self.encoder = BertEncoder(config)
        self.pooler = BertPooler(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.init_weights()

        logger.info("Model config: ", self.config)
        if self.task is None:
            self.lm_head = BertOnlyMLMHead(config)
        elif self.task == "classification":
            self.n_classes = n_classes
            if self.model_size == "bert-base-uncased":
                self.classification_layer = nn.Linear(1536, n_classes)
            elif self.model_size == "bert-large-uncased":
                self.classification_layer = nn.Linear(2048, n_classes)

示例#24

0

显示文件

文件： models.py 项目： namisan/LM-BFF

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)
        self.init_weights()

        # These attributes should be assigned once the model is initialized
        self.model_args = None
        self.data_args = None
        self.label_word_list = None

        # For regression
        self.lb = None
        self.ub = None

        # For label search.
        self.return_full_softmax = None

示例#25

0

显示文件

文件： mmf_transformer.py 项目： weexiaolong/mmf

 def build_heads(self):
     """Initialize the classifier head. It takes the output of the
     transformer encoder and passes it through a pooler (we use the pooler from BERT
     model), then dropout, BertPredictionHeadTransform (which is a linear layer,
     followed by activation and layer norm) and lastly a linear layer projecting the
     hidden output to classification labels.
     """
     transformer_config = self.backend.get_config()
     if self.config.training_head_type == "classification":
         self.pooler = BertPooler(transformer_config)
         self.classifier = nn.Sequential(
             nn.Dropout(transformer_config.hidden_dropout_prob),
             BertPredictionHeadTransform(transformer_config),
             nn.Linear(transformer_config.hidden_size, self.config.num_labels),
         )
     elif self.config.training_head_type == "pretraining":
         self.cls = BertOnlyMLMHead(transformer_config)
         self.vocab_size = transformer_config.vocab_size

示例#26

0

显示文件

    def __init__(self, config):
        super().__init__(config)

        self.tokenizer = BertTokenizerFast("../Bert/assets/vocab.txt")

        self.num_labels = config.num_labels

        self.bert = BertModel(config)

        self.cls = BertOnlyMLMHead(config)

        # projected_emb = tf.layers.dense(output_layer, params["projection_size"])
        # projected_emb = tf.keras.layers.LayerNormalization(axis=-1)(projected_emb)
        # if is_training:
        #     projected_emb = tf.nn.dropout(projected_emb, rate=0.1)

        self.dense = nn.Linear(config.hidden_size, 128)
        self.LayerNorm = nn.LayerNorm(128)
        self.projected_emb = nn.Dropout(0.1)

示例#27

0

显示文件

 def __init__(self, config):
     super(VaeBertMatchModelClean, self).__init__(config)
     self.bert = BertModel(config)
     # cvae返回(latent_z, output) output就是重构的x:[batch,seq,768]
     # lantent_z = [batch, seq*hidden]
     self.input_size = config.hidden_size
     self.dropout = config.hidden_dropout_prob
     self.num_layers = args.num_layers
     self.decoder_type = args.decoder_type
     self.vae_module = VaeModel(input_size=self.input_size,
                                num_layers=self.num_layers,
                                dropout=self.dropout,
                                decoder_type=self.decoder_type)
     self.cls = BertOnlyMLMHead(config)
     # 加一个FFN
     # self.linear1 = nn.Linear(seq_len*hidden_size, seq_len*hidden_size*2)
     # self.linear2 = nn.Linear(seq_len*hidden_size*2, seq_len*hidden_size)
     self.linear3 = nn.Linear(self.input_size, 1)
     self.reconstruction_loss_func = nn.MSELoss()
     self.task_loss_func = nn.BCEWithLogitsLoss()

示例#28

0

显示文件

文件： utils_parsing.py 项目： yjernite/craftassist

 def __init__(self, config, args, tokenizer):
     super(DecoderWithLoss, self).__init__()
     # model components
     self.bert = BertModel(config)
     self.lm_head = BertOnlyMLMHead(config)
     self.span_b_proj = nn.ModuleList(
         [HighwayLayer(768) for _ in range(args.num_highway)])
     self.span_e_proj = nn.ModuleList(
         [HighwayLayer(768) for _ in range(args.num_highway)])
     # loss functions
     if args.node_label_smoothing > 0:
         self.lm_ce_loss = LabelSmoothingLoss(
             args.node_label_smoothing,
             config.vocab_size,
             ignore_index=tokenizer.pad_token_id)
     else:
         self.lm_ce_loss = torch.nn.CrossEntropyLoss(
             ignore_index=tokenizer.pad_token_id, reduction="none")
     self.span_ce_loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                   reduction="none")
     self.span_loss_lb = args.lambda_span_loss

示例#29

0

显示文件

文件： nlp.py 项目： hristo-vrigazov/dnn.cool

 def __init__(self, name: str, labels, config, inputs=None):
     kwargs = {
         'name':
         name,
         'labels':
         labels,
         'loss':
         LanguageModelCrossEntropyLoss(),
         'per_sample_loss':
         ReducedPerSample(LanguageModelCrossEntropyLoss(reduction='none'),
                          reduction=torch.mean),
         'available_func':
         all_correct,
         'inputs':
         inputs,
         'activation':
         None,
         'decoder':
         None,
         'module':
         BertOnlyMLMHead(config),
         'metrics': ()
     }
     super().__init__(**kwargs)

示例#30

0

显示文件

文件： heads.py 项目： colabnlp/multitasking_transformers

    def __init__(self, config):
        super().__init__(config)

        self.cls = BertOnlyMLMHead(config)