def __init__(self, config, num_labels): super(BertForNER, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) self.dropout = torch.nn.Dropout(0.2) self.hidden2label = torch.nn.Linear(config.hidden_size, num_labels) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertForMTPostTraining, self).__init__(config) self.bert = BertModel(config) self.cls = BertPreTrainingHeads( config, self.bert.embeddings.word_embeddings.weight) self.qa_outputs = torch.nn.Linear(config.hidden_size, 2) self.apply(self.init_bert_weights)
def __init__(self, config, num_labels): super(MyBertBasedModel, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) # basic BERT model self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, num_labels) self.apply(self.init_bert_weights)
def __init__(self, config, num_labels, no_masking, lambda_scale=1.0): super(BertForQuestionAnsweringConfidence, self).__init__(config) self.bert = BertModel(config) self.num_labels = num_labels self.no_masking = no_masking self.dropout = nn.Dropout(config.hidden_dropout_prob) self.qa_outputs = nn.Linear(config.hidden_size, 2) # [N, L, H] => [N, L, 2] self.qa_classifier = nn.Linear( config.hidden_size, self.num_labels) # [N, H] => [N, n_class] self.lambda_scale = lambda_scale def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def __init__(self, config, num_labels): super(MyBertForTokenClassification, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_labels) self.apply(self.init_bert_weights)
def __init__(self, config): super(ObjectModel, self).__init__(config) self.bert = BertModel(config) self.linear = nn.Linear(in_features=hidden_size, out_features=1) self.apply(self.init_bert_weights)
def __init__(self, config): super(Bert2Gru, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.decoder = Decoder(args, num_classes=args.label_size, dropout=0.2) self.clsdense = nn.Linear(config.hidden_size, args.decoder_hidden_size) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertForSiameseClassification, self).__init__(config) self.bert = BertModel(config) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, 2) self.apply(self.init_bert_weights) self.avg_vec = AvgVec()
def __init__(self, config): super(SquadModel, self).__init__(config) self.bert = BertModel(config) # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.qa_outputs = nn.Linear(config.hidden_size, 2) self.apply(self.init_bert_weights)
def __init__(self, config, num_ents=10, num_lfs=10): super(BertForQA_lf, self).__init__(config) self.bert = BertModel(config) self.qa_outputs = nn.Linear(config.hidden_size, 2) self.ent_outputs = nn.Linear(config.hidden_size, num_ents) self.lf_outputs = nn.Linear(config.hidden_size, num_lfs) self.apply(self.init_bert_weights)
def __init__(self, config, num_labels=2, max_offset=10, offset_emb=30): """ :param config: :param num_labels: :param max_offset: :param offset_emb: size of pos embedding, 0 to disable """ print('model_post attention') print('max_offset:', max_offset) print('offset_emb:', offset_emb) super(BertPosattnForSequenceClassification, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) if offset_emb > 0: self.offset1_emb = nn.Embedding(2 * max_offset + 1, offset_emb) self.offset2_emb = nn.Embedding(2 * max_offset + 1, offset_emb) self.attn_layer_1 = nn.Linear((config.hidden_size + offset_emb) * 2, config.hidden_size) self.attn_tanh = nn.Tanh() self.attn_layer_2 = nn.Linear(config.hidden_size, 1) self.attn_softmax = nn.Softmax(dim=1) self.classifier = nn.Linear(config.hidden_size, num_labels) self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, freeze_predictor: bool = False): super(BertQAYesnoHierarchicalTopKfp32, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp(config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda if freeze_predictor: for param in self.yesno_predictor.parameters(): param.requires_grad = False self.freeze_predictor = freeze_predictor self.apply(self.init_bert_weights)
def __init__(self, config): super(BertQAModel, self).__init__(config) self.bert = BertModel(config) self.sigmoid = nn.Sigmoid() self.qa_outputs = nn.Linear(config.hidden_size, 2) self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda: float = 0.8, my_dropout_p: float = 0.2): super(BertHierarchicalRNN2, self).__init__(config) logger.info(f'Model {__class__.__name__} is loading...') logger.info(f'Model parameters:') logger.info(f'Evidence lambda: {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(my_dropout_p) self.bert = BertModel(config) self.query_self_attn = layers.MultiHeadPooling2(config.hidden_size, 6) self.value_self_attn = layers.MultiHeadPooling2(config.hidden_size, 6) self.sentence_encoder = layers.ConcatRNN(config.hidden_size, config.hidden_size // 2, num_layers=1, bidirectional=True, rnn_type='lstm') self.attention_score = layers.AttentionScore(config.hidden_size, 256) # Output layer self.evidence_lambda = evidence_lambda self.predictor = nn.Linear(config.hidden_size * 2, 3) self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, num_choices=4, view_id=1): super(BertRACEHierarchicalTwoViewTopK, self).__init__(config) logger.info(f'The model {self.__class__.__name__} is loading...') logger.info(f'Currently the number of choices is {num_choices}') logger.info(f'The coefficient of evidence loss is {evidence_lambda}') logger.info(f'The view id of current model is {view_id}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) rep_layers.set_seq_dropout(True) rep_layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) config.hidden_size = int(config.hidden_size / 2) self.doc_sen_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.que_self_attn = rep_layers.LinearSelfAttention(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.classifier = nn.Linear(config.hidden_size * 2, 1) self.evidence_lam = evidence_lambda self.num_choices = num_choices self.view_id = view_id self.apply(self.init_bert_weights)
def __init__(self, config, num_choices=2): super(BertForMultipleChoiceFeatures, self).__init__(config) self.num_choices = num_choices self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.apply(self.init_bert_weights)
def __init__(self, config, model=None, num_sequences=5, num_labels=3, pooling=('concat', ), return_reps=True): super(BertForMultipleSequenceClassification, self).__init__(config) self.num_labels = num_labels self.num_sequences = num_sequences self.dropout = nn.Dropout(config.hidden_dropout_prob) num_inputs = len(set(pooling) - {'concat'}) if 'concat' in pooling: num_inputs += num_sequences self.hidden_layer = nn.Linear(num_inputs * config.hidden_size, config.hidden_size) self.classifier = nn.Linear(config.hidden_size, num_labels) #self.classifier.weight.data.normal_(mean=0.0, std=model.config.initializer_range) #self.classifier.bias.data.zero_() #self.apply(self.init_bert_weights) if model is not None: self.bert = model.bert else: self.bert = BertModel(config) assert (set(pooling).issubset({'concat', 'mean', 'max', 'min'})) self.pooling = pooling self.return_reps = return_reps
def __init__(self, config): super(BertScoring, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 2) self.softmax = nn.Softmax() self.apply(self.init_bert_weights)
def __init__(self, config): super(Bert2Crf, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, args.label_size) self.crf = CRF(num_tags=args.label_size, batch_first=True) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertForMaskedLM, self).__init__(config) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config, self.bert.embeddings.word_embeddings.weight) self.apply(self.init_bert_weights)
def test_sliding_window_with_batch(self): tokenizer = WordTokenizer(word_splitter=BertBasicWordSplitter()) sentence = "the quickest quick brown fox jumped over the lazy dog" tokens = tokenizer.tokenize(sentence) vocab = Vocabulary() vocab_path = self.FIXTURES_ROOT / 'bert' / 'vocab.txt' token_indexer = PretrainedBertIndexer(str(vocab_path), truncate_long_sequences=False, max_pieces=8) config_path = self.FIXTURES_ROOT / 'bert' / 'config.json' config = BertConfig(str(config_path)) bert_model = BertModel(config) token_embedder = BertEmbedder(bert_model, max_pieces=8) instance = Instance({"tokens": TextField(tokens, {"bert": token_indexer})}) instance2 = Instance({"tokens": TextField(tokens + tokens + tokens, {"bert": token_indexer})}) batch = Batch([instance, instance2]) batch.index_instances(vocab) padding_lengths = batch.get_padding_lengths() tensor_dict = batch.as_tensor_dict(padding_lengths) tokens = tensor_dict["tokens"] bert_vectors = token_embedder(tokens["bert"], offsets=tokens["bert-offsets"]) assert bert_vectors is not None
def __init__(self, config, sequence_len, input_len, num_labels=2): super(AttentionLongBert, self).__init__(config) self.num_labels = num_labels self.mem_size = config.hidden_size self.sequence_len = sequence_len self.total_input_len = sequence_len * input_len self.bert = BertModel(config) self.bert_layers = 12 self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.dropout25 = torch.nn.Dropout(0.25) #self.rnn = nn.GRU( config.hidden_size, config.hidden_size * 2, bidirectional = True) self.lstm = nn.LSTM( config.hidden_size, config.hidden_size * 2, bidirectional = True) self.classifier1 = torch.nn.Linear(config.hidden_size * 9, num_labels) self.classifier8 = torch.nn.Linear(config.hidden_size * 8, num_labels) self.attention1 = torch.nn.Linear(self.sequence_len, 64) self.attention2 = torch.nn.Linear(64, 128) self.attention3 = torch.nn.Linear(128 + config.hidden_size, 2*config.hidden_size) #self.classifier10 = torch.nn.Linear(config.hidden_size * 10, num_labels) #self.classifier12_1 = torch.nn.Linear(config.hidden_size * 12, config.hidden_size * 4) #self.classifier12_2 = torch.nn.Linear(config.hidden_size * 4, num_labels) #self.bn = nn.BatchNorm1d(config.hidden_size * 9) self.apply(self.init_bert_weights) self.leaky = nn.LeakyReLU(0.2) #self.att = DocAttNet(sent_hidden_size=config.hidden_size, doc_hidden_size = self.mem_size, num_classes = num_labels) self.att = NewAttention(config.hidden_size)
def __init__(self, opt, bert_config=None): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False mem_size = self.bert_config.hidden_size self.decoder_opt = opt['answer_opt'] self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] self.bert_pooler = None for task, lab in enumerate(labels): decoder_opt = self.decoder_opt[task] dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) if decoder_opt == 1: out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout) self.scoring_list.append(out_proj) else: out_proj = nn.Linear(self.bert_config.hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init() self.set_embed(opt)
def __init__(self, config, graph_retriever_config): super(BertForGraphRetriever, self).__init__(config) self.graph_retriever_config = graph_retriever_config self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) # Initial state self.s = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) # Scaling factor for weight norm self.g = Parameter(torch.FloatTensor(1).fill_(1.0)) # RNN weight self.rw = nn.Linear(2 * config.hidden_size, config.hidden_size) # EOE and output bias self.eos = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) self.bias = Parameter(torch.FloatTensor(1).zero_()) self.apply(self.init_bert_weights) self.cpu = torch.device('cpu')
def __init__(self, config): super(FusionBertModule, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) #self.pooler_mode = config.pooler_mode #self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.apply(self.init_bert_weights)
def __init__(self, config): super(BertQAYesNoMLP, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.answer_choice = nn.Linear(config.hidden_size, 3) self.apply(self.init_bert_weights)
def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device, write_log, summary_writer): self.job_config = job_config if not use_pretrain: model_config = self.job_config.get_model_config() bert_config = BertConfig(**model_config) bert_config.vocab_size = len(tokenizer.vocab) self.bert_encoder = BertModel(bert_config) # Use pretrained bert weights else: self.bert_encoder = BertModel.from_pretrained( self.job_config.get_model_file_type(), cache_dir=cache_dir) bert_config = self.bert_encoder.config self.network = MTLRouting(self.bert_encoder, write_log=write_log, summary_writer=summary_writer) #config_data=self.config['data'] # Pretrain Dataset self.network.register_batch(BatchType.PRETRAIN_BATCH, "pretrain_dataset", loss_calculation=BertPretrainingLoss( self.bert_encoder, bert_config)) self.device = device
def __init__(self, config, num_choices=4): super(BertRACEPool, self).__init__(config) self.num_choices = num_choices self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.apply(self.init_bert_weights)
def __init__(self, config, num_labels=20): super(BertForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = num_labels self.bert = BertModel(config) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, num_labels) self.apply(self.init_bert_weights)
def __init__(self, config, evidence_lambda=0.8, negative_lambda=1.0): super(BertQAYesnoHierarchicalNeg, self).__init__(config) print(f'The model {self.__class__.__name__} is loading...') print(f'The coefficient of evidence loss is {evidence_lambda}') layers.set_seq_dropout(True) layers.set_my_dropout_prob(config.hidden_dropout_prob) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.answer_choice = nn.Linear(config.hidden_size, 2) self.doc_sen_self_attn = layers.LinearSelfAttnAllennlp( config.hidden_size) self.que_self_attn = layers.LinearSelfAttn(config.hidden_size) self.word_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) self.vector_similarity = layers.AttentionScore(config.hidden_size, 250, do_similarity=False) # self.yesno_predictor = nn.Linear(config.hidden_size, 2) self.yesno_predictor = nn.Linear(config.hidden_size * 2, 3) self.evidence_lam = evidence_lambda self.negative_lam = negative_lambda self.apply(self.init_bert_weights)