Exemplo n.º 1
0
 def __init__(self, config, num_labels):
     super(BertForNER, self).__init__(config)
     self.num_labels = num_labels
     self.bert = BertModel(config)
     self.dropout = torch.nn.Dropout(0.2)
     self.hidden2label = torch.nn.Linear(config.hidden_size, num_labels)
     self.apply(self.init_bert_weights)
Exemplo n.º 2
0
 def __init__(self, config):
     super(BertForMTPostTraining, self).__init__(config)
     self.bert = BertModel(config)
     self.cls = BertPreTrainingHeads(
         config, self.bert.embeddings.word_embeddings.weight)
     self.qa_outputs = torch.nn.Linear(config.hidden_size, 2)
     self.apply(self.init_bert_weights)
Exemplo n.º 3
0
 def __init__(self, config, num_labels):
     super(MyBertBasedModel, self).__init__(config)
     self.num_labels = num_labels
     self.bert = BertModel(config)  # basic BERT model
     self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
     self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
     self.apply(self.init_bert_weights)
Exemplo n.º 4
0
    def __init__(self, config, num_labels, no_masking, lambda_scale=1.0):
        super(BertForQuestionAnsweringConfidence, self).__init__(config)
        self.bert = BertModel(config)
        self.num_labels = num_labels
        self.no_masking = no_masking
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.qa_outputs = nn.Linear(config.hidden_size,
                                    2)  # [N, L, H] => [N, L, 2]
        self.qa_classifier = nn.Linear(
            config.hidden_size, self.num_labels)  # [N, H] => [N, n_class]
        self.lambda_scale = lambda_scale

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
Exemplo n.º 5
0
 def __init__(self, config, num_labels):
     super(MyBertForTokenClassification, self).__init__(config)
     self.num_labels = num_labels
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, num_labels)
     self.apply(self.init_bert_weights)
Exemplo n.º 6
0
    def __init__(self, config):
        super(ObjectModel, self).__init__(config)
        self.bert = BertModel(config)

        self.linear = nn.Linear(in_features=hidden_size, out_features=1)

        self.apply(self.init_bert_weights)
 def __init__(self, config):
     super(Bert2Gru, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.decoder = Decoder(args, num_classes=args.label_size, dropout=0.2)
     self.clsdense = nn.Linear(config.hidden_size, args.decoder_hidden_size)
     self.apply(self.init_bert_weights)
 def __init__(self, config):
     super(BertForSiameseClassification, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
     self.classifier = torch.nn.Linear(config.hidden_size, 2)
     self.apply(self.init_bert_weights)
     self.avg_vec = AvgVec()
Exemplo n.º 9
0
 def __init__(self, config):
     super(SquadModel, self).__init__(config)
     self.bert = BertModel(config)
     # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
     # self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.qa_outputs = nn.Linear(config.hidden_size, 2)
     self.apply(self.init_bert_weights)
Exemplo n.º 10
0
 def __init__(self, config, num_ents=10, num_lfs=10):
     super(BertForQA_lf, self).__init__(config)
     self.bert = BertModel(config)
     self.qa_outputs = nn.Linear(config.hidden_size, 2)
     self.ent_outputs = nn.Linear(config.hidden_size, num_ents)
     self.lf_outputs = nn.Linear(config.hidden_size, num_lfs)
     self.apply(self.init_bert_weights)
Exemplo n.º 11
0
    def __init__(self, config, num_labels=2, max_offset=10, offset_emb=30):
        """

        :param config:
        :param num_labels:
        :param max_offset:
        :param offset_emb: size of pos embedding, 0 to disable
        """
        print('model_post attention')
        print('max_offset:', max_offset)
        print('offset_emb:', offset_emb)

        super(BertPosattnForSequenceClassification, self).__init__(config)
        self.num_labels = num_labels
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        if offset_emb > 0:
            self.offset1_emb = nn.Embedding(2 * max_offset + 1, offset_emb)
            self.offset2_emb = nn.Embedding(2 * max_offset + 1, offset_emb)

        self.attn_layer_1 = nn.Linear((config.hidden_size + offset_emb) * 2,
                                      config.hidden_size)
        self.attn_tanh = nn.Tanh()
        self.attn_layer_2 = nn.Linear(config.hidden_size, 1)
        self.attn_softmax = nn.Softmax(dim=1)

        self.classifier = nn.Linear(config.hidden_size, num_labels)
        self.apply(self.init_bert_weights)
Exemplo n.º 12
0
    def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False):
        super(BertQAYesnoHierarchicalTopKfp32, self).__init__(config)
        print(f'The model {self.__class__.__name__} is loading...')
        print(f'The coefficient of evidence loss is {evidence_lambda}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda

        if freeze_predictor:
            for param in self.yesno_predictor.parameters():
                param.requires_grad = False
        self.freeze_predictor = freeze_predictor

        self.apply(self.init_bert_weights)
Exemplo n.º 13
0
    def __init__(self, config):
        super(BertQAModel, self).__init__(config)
        self.bert = BertModel(config)

        self.sigmoid = nn.Sigmoid()
        self.qa_outputs = nn.Linear(config.hidden_size, 2)
        self.apply(self.init_bert_weights)
Exemplo n.º 14
0
    def __init__(self,
                 config,
                 evidence_lambda: float = 0.8,
                 my_dropout_p: float = 0.2):
        super(BertHierarchicalRNN2, self).__init__(config)
        logger.info(f'Model {__class__.__name__} is loading...')
        logger.info(f'Model parameters:')
        logger.info(f'Evidence lambda: {evidence_lambda}')
        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(my_dropout_p)
        self.bert = BertModel(config)
        self.query_self_attn = layers.MultiHeadPooling2(config.hidden_size, 6)
        self.value_self_attn = layers.MultiHeadPooling2(config.hidden_size, 6)
        self.sentence_encoder = layers.ConcatRNN(config.hidden_size,
                                                 config.hidden_size // 2,
                                                 num_layers=1,
                                                 bidirectional=True,
                                                 rnn_type='lstm')
        self.attention_score = layers.AttentionScore(config.hidden_size, 256)

        # Output layer
        self.evidence_lambda = evidence_lambda
        self.predictor = nn.Linear(config.hidden_size * 2, 3)

        self.apply(self.init_bert_weights)
Exemplo n.º 15
0
    def __init__(self, config, evidence_lambda=0.8, num_choices=4, view_id=1):
        super(BertRACEHierarchicalTwoViewTopK, self).__init__(config)
        logger.info(f'The model {self.__class__.__name__} is loading...')
        logger.info(f'Currently the number of choices is {num_choices}')
        logger.info(f'The coefficient of evidence loss is {evidence_lambda}')
        logger.info(f'The view id of current model is {view_id}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)
        rep_layers.set_seq_dropout(True)
        rep_layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        config.hidden_size = int(config.hidden_size / 2)
        self.doc_sen_self_attn = rep_layers.LinearSelfAttention(config.hidden_size)
        self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.classifier = nn.Linear(config.hidden_size * 2, 1)
        self.evidence_lam = evidence_lambda
        self.num_choices = num_choices
        self.view_id = view_id

        self.apply(self.init_bert_weights)
Exemplo n.º 16
0
 def __init__(self, config, num_choices=2):
     super(BertForMultipleChoiceFeatures, self).__init__(config)
     self.num_choices = num_choices
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, 1)
     self.apply(self.init_bert_weights)
Exemplo n.º 17
0
    def __init__(self,
                 config,
                 model=None,
                 num_sequences=5,
                 num_labels=3,
                 pooling=('concat', ),
                 return_reps=True):
        super(BertForMultipleSequenceClassification, self).__init__(config)
        self.num_labels = num_labels
        self.num_sequences = num_sequences

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        num_inputs = len(set(pooling) - {'concat'})
        if 'concat' in pooling:
            num_inputs += num_sequences

        self.hidden_layer = nn.Linear(num_inputs * config.hidden_size,
                                      config.hidden_size)
        self.classifier = nn.Linear(config.hidden_size, num_labels)

        #self.classifier.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
        #self.classifier.bias.data.zero_()
        #self.apply(self.init_bert_weights)
        if model is not None:
            self.bert = model.bert
        else:
            self.bert = BertModel(config)

        assert (set(pooling).issubset({'concat', 'mean', 'max', 'min'}))
        self.pooling = pooling

        self.return_reps = return_reps
Exemplo n.º 18
0
 def __init__(self, config):
     super(BertScoring, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, 2)
     self.softmax = nn.Softmax()
     self.apply(self.init_bert_weights)
 def __init__(self, config):
     super(Bert2Crf, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, args.label_size)
     self.crf = CRF(num_tags=args.label_size, batch_first=True)
     self.apply(self.init_bert_weights)
    def __init__(self, config):
        super(BertForMaskedLM, self).__init__(config)

        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config,
                                   self.bert.embeddings.word_embeddings.weight)
        self.apply(self.init_bert_weights)
Exemplo n.º 21
0
    def test_sliding_window_with_batch(self):
        tokenizer = WordTokenizer(word_splitter=BertBasicWordSplitter())

        sentence = "the quickest quick brown fox jumped over the lazy dog"
        tokens = tokenizer.tokenize(sentence)

        vocab = Vocabulary()

        vocab_path = self.FIXTURES_ROOT / 'bert' / 'vocab.txt'
        token_indexer = PretrainedBertIndexer(str(vocab_path), truncate_long_sequences=False, max_pieces=8)

        config_path = self.FIXTURES_ROOT / 'bert' / 'config.json'
        config = BertConfig(str(config_path))
        bert_model = BertModel(config)
        token_embedder = BertEmbedder(bert_model, max_pieces=8)

        instance = Instance({"tokens": TextField(tokens, {"bert": token_indexer})})
        instance2 = Instance({"tokens": TextField(tokens + tokens + tokens, {"bert": token_indexer})})

        batch = Batch([instance, instance2])
        batch.index_instances(vocab)

        padding_lengths = batch.get_padding_lengths()
        tensor_dict = batch.as_tensor_dict(padding_lengths)
        tokens = tensor_dict["tokens"]
        bert_vectors = token_embedder(tokens["bert"], offsets=tokens["bert-offsets"])
        assert bert_vectors is not None
Exemplo n.º 22
0
    def __init__(self, config, sequence_len, input_len, num_labels=2):
        super(AttentionLongBert, self).__init__(config)
        self.num_labels = num_labels
        self.mem_size = config.hidden_size
        self.sequence_len = sequence_len
        self.total_input_len = sequence_len * input_len
        
        self.bert = BertModel(config)
        self.bert_layers = 12
        
        self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
        self.dropout25 = torch.nn.Dropout(0.25)
        #self.rnn = nn.GRU( config.hidden_size,  config.hidden_size * 2, bidirectional = True)
        
        self.lstm = nn.LSTM( config.hidden_size,  config.hidden_size * 2, bidirectional = True)
        self.classifier1 = torch.nn.Linear(config.hidden_size * 9, num_labels)
        self.classifier8 = torch.nn.Linear(config.hidden_size * 8, num_labels)
        self.attention1 = torch.nn.Linear(self.sequence_len, 64)
        self.attention2 = torch.nn.Linear(64, 128)
        self.attention3 = torch.nn.Linear(128 + config.hidden_size, 2*config.hidden_size)
        #self.classifier10 = torch.nn.Linear(config.hidden_size * 10, num_labels)
        #self.classifier12_1 = torch.nn.Linear(config.hidden_size * 12, config.hidden_size * 4)
        #self.classifier12_2 = torch.nn.Linear(config.hidden_size * 4, num_labels)
        #self.bn = nn.BatchNorm1d(config.hidden_size * 9)

        self.apply(self.init_bert_weights)
        self.leaky = nn.LeakyReLU(0.2)

        #self.att = DocAttNet(sent_hidden_size=config.hidden_size, doc_hidden_size = self.mem_size, num_classes = num_labels)
        self.att = NewAttention(config.hidden_size)
Exemplo n.º 23
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.bert_config = BertConfig.from_dict(opt)
        self.bert = BertModel(self.bert_config)
        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        mem_size = self.bert_config.hidden_size
        self.decoder_opt = opt['answer_opt']
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']
        self.bert_pooler = None

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if decoder_opt == 1:
                out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout)
                self.scoring_list.append(out_proj)
            else:
                out_proj = nn.Linear(self.bert_config.hidden_size, lab)
                self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        self.set_embed(opt)
    def __init__(self, config, graph_retriever_config):
        super(BertForGraphRetriever, self).__init__(config)

        self.graph_retriever_config = graph_retriever_config

        self.bert = BertModel(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        # Initial state
        self.s = Parameter(
            torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1))

        # Scaling factor for weight norm
        self.g = Parameter(torch.FloatTensor(1).fill_(1.0))

        # RNN weight
        self.rw = nn.Linear(2 * config.hidden_size, config.hidden_size)

        # EOE and output bias
        self.eos = Parameter(
            torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1))
        self.bias = Parameter(torch.FloatTensor(1).zero_())

        self.apply(self.init_bert_weights)
        self.cpu = torch.device('cpu')
Exemplo n.º 25
0
 def __init__(self, config):
     super(FusionBertModule, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     #self.pooler_mode = config.pooler_mode
     #self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.apply(self.init_bert_weights)
Exemplo n.º 26
0
 def __init__(self, config):
     super(BertQAYesNoMLP, self).__init__(config)
     print(f'The model {self.__class__.__name__} is loading...')
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.answer_choice = nn.Linear(config.hidden_size, 3)
     self.apply(self.init_bert_weights)
    def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device,
                 write_log, summary_writer):
        self.job_config = job_config

        if not use_pretrain:
            model_config = self.job_config.get_model_config()
            bert_config = BertConfig(**model_config)
            bert_config.vocab_size = len(tokenizer.vocab)

            self.bert_encoder = BertModel(bert_config)
        # Use pretrained bert weights
        else:
            self.bert_encoder = BertModel.from_pretrained(
                self.job_config.get_model_file_type(), cache_dir=cache_dir)
            bert_config = self.bert_encoder.config

        self.network = MTLRouting(self.bert_encoder,
                                  write_log=write_log,
                                  summary_writer=summary_writer)

        #config_data=self.config['data']

        # Pretrain Dataset
        self.network.register_batch(BatchType.PRETRAIN_BATCH,
                                    "pretrain_dataset",
                                    loss_calculation=BertPretrainingLoss(
                                        self.bert_encoder, bert_config))

        self.device = device
Exemplo n.º 28
0
 def __init__(self, config, num_choices=4):
     super(BertRACEPool, self).__init__(config)
     self.num_choices = num_choices
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, 1)
     self.apply(self.init_bert_weights)
Exemplo n.º 29
0
 def __init__(self, config, num_labels=20):
     super(BertForMultiLabelSequenceClassification, self).__init__(config)
     self.num_labels = num_labels
     self.bert = BertModel(config)
     self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
     self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
     self.apply(self.init_bert_weights)
Exemplo n.º 30
0
    def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0):
        super(BertQAYesnoHierarchicalNeg, self).__init__(config)
        print(f'The model {self.__class__.__name__} is loading...')
        print(f'The coefficient of evidence loss is {evidence_lambda}')

        layers.set_seq_dropout(True)
        layers.set_my_dropout_prob(config.hidden_dropout_prob)

        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.answer_choice = nn.Linear(config.hidden_size, 2)

        self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(
            config.hidden_size)
        self.que_self_attn = layers.LinearSelfAttn(config.hidden_size)

        self.word_similarity = layers.AttentionScore(config.hidden_size,
                                                     250,
                                                     do_similarity=False)
        self.vector_similarity = layers.AttentionScore(config.hidden_size,
                                                       250,
                                                       do_similarity=False)

        # self.yesno_predictor = nn.Linear(config.hidden_size, 2)
        self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3)
        self.evidence_lam = evidence_lambda
        self.negative_lam = negative_lambda

        self.apply(self.init_bert_weights)