def test_inference_no_head(self): model = RobertaModel.from_pretrained('roberta-base') input_ids = torch.tensor( [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) output = model(input_ids)[0] # compare the actual values for a slice. expected_slice = torch.Tensor([[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]) self.assertTrue( torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
def create_and_check_model_as_decoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): config.add_cross_attention = True model = RobertaModel(config) model.to(torch_device) model.eval() result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, ) result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, ) result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def __init__(self, args): super(ClassifyModel, self).__init__() args.out_size = len(args.dense_features) self.dropout = nn.Dropout(args.hidden_dropout_prob) self.args = args # 创建BERT模型,并且导入预训练模型 config = RobertaConfig.from_pretrained(args.pretrained_model_path) config.output_hidden_states = True args.hidden_size = config.hidden_size args.num_hidden_layers = config.num_hidden_layers self.bert_text_layer = RobertaModel.from_pretrained(args.pretrained_model_path, config=config) self.text_linear = nn.Linear(in_features=args.text_dim + args.vocab_dim_v1 * len(args.text_features), out_features=args.hidden_size) logger.info("Load linear from %s", os.path.join(args.pretrained_model_path, "linear.bin")) self.text_linear.load_state_dict(torch.load(os.path.join(args.pretrained_model_path, "linear.bin"))) logger.info("Load embeddings from %s", os.path.join(args.pretrained_model_path, "embeddings.bin")) self.text_embeddings = nn.Embedding.from_pretrained( torch.load(os.path.join(args.pretrained_model_path, "embeddings.bin"))['weight'], freeze=True) args.out_size += args.hidden_size * 2 # 创建fusion-layer模型,随机初始化 config = RobertaConfig() config.num_hidden_layers = 4 config.intermediate_size = 2048 config.hidden_size = 512 config.num_attention_heads = 16 config.vocab_size = 5 self.fusion_text_layer = RobertaModel(config=config) self.fusion_text_layer.apply(self._init_weights) self.text_linear_1 = nn.Linear(args.text_dim_1 + args.hidden_size, 512) self.text_linear_1.apply(self._init_weights) self.norm = nn.BatchNorm1d(args.text_dim_1 + args.hidden_size) args.out_size += 1024 # 创建分类器,随机初始化 self.classifierHead = ClassificationHead(args) self.classifierHead.apply(self._init_weights)
def DistilBert(): import torch from transformers import RobertaTokenizer, RobertaModel # Transformers has a unified API # for 8 transformer architectures and 30 pretrained weights. # Model | Tokenizer | Pretrained weights shortcut # MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'), # (OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'), # (GPT2Model, GPT2Tokenizer, 'gpt2'), # (CTRLModel, CTRLTokenizer, 'ctrl'), # (TransfoXLModel, TransfoXLTokenizer, 'transfo-xl-wt103'), # (XLNetModel, XLNetTokenizer, 'xlnet-base-cased'), # (XLMModel, XLMTokenizer, 'xlm-mlm-enfr-1024'), # (DistilBertModel, DistilBertTokenizer, 'distilbert-base-uncased'), # (RobertaModel, RobertaTokenizer, 'roberta-base')] tokenizer = RobertaTokenizer.from_pretrained('roberta-base') encoded_text = pd.Series(dataset).apply( (lambda x: tokenizer.encode(x, add_special_tokens=True))) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model.resize_token_embeddings(len(encoded_text)) print(encoded_text[0]) print(len(encoded_text)) # print(dataset.map(len).max()) # model = RobertaModel.from_pretrained('roberta-base').to(device) parallel_model = torch.nn.DataParallel( RobertaModel.from_pretrained('roberta-base').to(device)) import numpy as np # max_len = 512 max_len = 0 for i in encoded_text.values: if len(i) > max_len: max_len = len(i) padded = np.array( [i + [0] * (max_len - len(i)) for i in encoded_text.values]) attention_mask = np.where(padded != 0, 1, 0) print(attention_mask.shape) input_ids = torch.tensor(padded).to(device) attention_mask = torch.tensor(attention_mask).to(device) with torch.no_grad(): last_hidden_states = parallel_model(input_ids, attention_mask=attention_mask) embeddings = last_hidden_states[0][:, 0, :].cpu().numpy() print(embeddings[0]) print(embeddings.shape) return embeddings
def get_model_and_tokenizer(model_name, device="cpu", random_weights=False, model_path=None): """ model_path: if given, initialize from path instead of official repo """ init_model = model_name if model_path: print("Initializing model from local path:", model_path) init_model = model_path if model_name.startswith("xlnet"): model = XLNetModel.from_pretrained( init_model, output_hidden_states=True).to(device) tokenizer = XLNetTokenizer.from_pretrained(init_model) sep = u"▁" elif model_name.startswith("gpt2"): model = GPT2Model.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = GPT2Tokenizer.from_pretrained(init_model) sep = "Ġ" elif model_name.startswith("xlm"): model = XLMModel.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = XLMTokenizer.from_pretrained(init_model) sep = "</w>" elif model_name.startswith("bert"): model = BertModel.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = BertTokenizer.from_pretrained(init_model) sep = "##" elif model_name.startswith("distilbert"): model = DistilBertModel.from_pretrained( init_model, output_hidden_states=True).to(device) tokenizer = DistilBertTokenizer.from_pretrained(init_model) sep = "##" elif model_name.startswith("roberta"): model = RobertaModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = RobertaTokenizer.from_pretrained(model_name) sep = "Ġ" else: print("Unrecognized model name:", model_name) sys.exit() if random_weights: print("Randomizing weights") model.init_weights() return model, tokenizer, sep
def train_model(self): # graph_emb = load_pkl_emb(self.graph_emb_path) if self.graph_emb_path is not None else None typed_nodes = load_typed_nodes(self.type_ann_edges) decoder_mapping = RobertaTokenizer.from_pretrained("microsoft/codebert-base").decoder tok_ids, words = zip(*decoder_mapping.items()) vocab_mapping = dict(zip(words, tok_ids)) batcher = self.get_batcher( self.train_data + self.test_data, self.batch_size, seq_len=self.seq_len, graphmap=None, wordmap=vocab_mapping, tagmap=None, class_weights=False, element_hash_size=1 ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = RobertaModel.from_pretrained("microsoft/codebert-base") model.to(device) node_ids = [] embeddings = [] for ind, batch in enumerate(tqdm(batcher)): # token_ids, graph_ids, labels, class_weights, lengths = b token_ids = torch.LongTensor(batch["tok_ids"]) lens = torch.LongTensor(batch["lens"]) token_ids[token_ids == len(vocab_mapping)] = vocab_mapping["<unk>"] def get_length_mask(target, lens): mask = torch.arange(target.size(1)).to(target.device)[None, :] < lens[:, None] return mask mask = get_length_mask(token_ids, lens) with torch.no_grad(): embs = model(input_ids=token_ids, attention_mask=mask) for s_emb, s_repl in zip(embs.last_hidden_state, batch["replacements"]): unique_repls = set(list(s_repl)) repls_for_ann = [r for r in unique_repls if r in typed_nodes] for r in repls_for_ann: position = s_repl.index(r) if position > 512: continue node_ids.append(r) embeddings.append(s_emb[position]) all_embs = torch.stack(embeddings, dim=0).numpy() embedder = Embedder(dict(zip(node_ids, range(len(node_ids)))), all_embs) pickle.dump(embedder, open("codebert_embeddings.pkl", "wb"), fix_imports=False) print(node_ids)
def __init__(self, config): super(BertForQuestionAnswering, self).__init__(config) # self.bert = RobertaModel(config, add_pooling_layer=False) self.roberta = RobertaModel(config, add_pooling_layer=False) if config.multi_layer_classifier: self.qa_classifier = BertMLP(config) else: self.qa_classifier = nn.Linear(config.hidden_size, 2) self.qa_classifier.weight = truncated_normal_( self.qa_classifier.weight, mean=0, std=0.02) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.init_weights()
def __init__(self, path, embedding_dim=768, feat_dim=64, num_class=20): super(SupConRobertaNet, self).__init__() self.embedding_dim = embedding_dim self.feat_dim = feat_dim self.num_class = num_class self.path = path self.encoder = RobertaModel.from_pretrained(self.path) # self.encoder = model_fun() self.projection = nn.Sequential( nn.Linear(self.embedding_dim, self.embedding_dim), nn.ReLU(inplace=True), nn.Linear(self.embedding_dim, self.feat_dim)) self.fc = nn.Linear(self.embedding_dim, self.num_class)
def __init__(self, n_classes, unfreeze): super(RobertaForClaimDetection, self).__init__() self.num_labels = n_classes config = RobertaConfig.from_pretrained('roberta-base', output_hidden_states=True) self.roberta = RobertaModel.from_pretrained('roberta-base', add_pooling_layer=True, config=config) self.drop = nn.Dropout(p=0.1) self.out = nn.Linear(self.roberta.config.hidden_size, n_classes) for param in self.roberta.base_model.parameters(): param.requires_grad = unfreeze
def __init__(self, config): """ :param config: 模型的配置 """ super().__init__(config) self.num_labels = config.num_labels # 加载模型配置,初始化一个roberta模型 self.roberta = RobertaModel(config) # 初始化一个dropout和自定义分类层 self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) # 调用transformers的自对应初始化权重函数 self.init_weights()
def __init__(self, config): super().__init__(config) self.config = config self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = GlobalAttentionHeadReluMean(config) self.log_softmax = nn.LogSoftmax(dim=1) self.classifier.apply(init_weights_relu) self.frame_classification_loss_sent = nn.NLLLoss()
def __init__(self, config): super(RobertaForQuestionAnswering, self).__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) print('Hidden Size: %s' % config.hidden_size) self.version_2_with_negative = True if self.version_2_with_negative: self.clf_output = PoolerAnswerClass(config.hidden_size)
def __init__(self, MODEL_PATH='roberta-base'): super(TweetModel, self).__init__() config = RobertaConfig.from_pretrained(MODEL_PATH + '/config.json', output_hidden_states=True) self.roberta = RobertaModel.from_pretrained(MODEL_PATH + '/pytorch_model.bin', config=config) self.dropout = nn.Dropout(0.5) self.fc = nn.Linear(config.hidden_size, 2) # self.fc_len = nn.Linear(config.hidden_size, 96) nn.init.normal_(self.fc.weight, std=0.02) nn.init.normal_(self.fc.bias, 0)
def __init__(self): super().__init__() roberta_config = RobertaConfig.from_pretrained( os.path.join(config_path, 'config.json'), output_hidden_states=True) self.roberta = RobertaModel.from_pretrained(os.path.join( model_path, 'pytorch_model.bin'), config=roberta_config) self.dropout = nn.Dropout(0.5) self.fc = nn.Linear(roberta_config.hidden_size, 2) nn.init.normal_(self.fc.weight, std=0.02) nn.init.normal_(self.fc.bias, 0)
def rbt3(): """ RBT3 3层RoBERTa-wwm-ext-base RBTL3(3层RoBERTa-wwm-ext-base/large) Returns: """ pretrained = "hfl/rbt3" tokenizer = BertTokenizer.from_pretrained(pretrained) model = RobertaModel.from_pretrained(pretrained) model.save_pretrained('rbt3') tokenizer.save_pretrained('rbt3')
def __init__(self, config: RobertaForSequenceClassificationConfig, roberta_share=None): super(RobertaForSequenceClassification, self).__init__(config) if roberta_share is not None: self.roberta = roberta_share else: self.roberta = RobertaModel(config, ) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.cls_classifier = nn.Linear(config.hidden_size, config.num_labels) self.cls_classifier.weight = truncated_normal_( self.cls_classifier.weight, mean=0, std=0.02) self.init_weights()
def __init__(self, cuda, model, epochs, learning_rate, train_dataloader, valid_dataloader_MATRES, test_dataloader_MATRES, valid_dataloader_HIEVE, test_dataloader_HIEVE, finetune, dataset, MATRES_best_PATH, HiEve_best_PATH, load_model_path, model_name=None, roberta_size="roberta-base"): self.cuda = cuda self.model = model self.dataset = dataset self.epochs = epochs self.learning_rate = learning_rate self.finetune = finetune self.train_dataloader = train_dataloader self.valid_dataloader_MATRES = valid_dataloader_MATRES self.test_dataloader_MATRES = test_dataloader_MATRES self.valid_dataloader_HIEVE = valid_dataloader_HIEVE self.test_dataloader_HIEVE = test_dataloader_HIEVE ### fine-tune roberta or not ### # if finetune is False, we use fixed roberta embeddings before bilstm and mlp self.roberta_size = roberta_size if not self.finetune: self.RoBERTaModel = RobertaModel.from_pretrained( self.roberta_size).to(self.cuda) if self.roberta_size == 'roberta-base': self.roberta_dim = 768 else: self.roberta_dim = 1024 self.MATRES_best_micro_F1 = -0.000001 self.MATRES_best_cm = [] self.MATRES_best_PATH = MATRES_best_PATH self.HiEve_best_F1 = -0.000001 self.HiEve_best_prfs = [] self.HiEve_best_PATH = HiEve_best_PATH self.load_model_path = load_model_path self.model_name = model_name self.best_epoch = 0 self.file = open("./rst_file/" + model_name + ".rst", "w")
def __init__(self, num_labels, config, dropout, hidden_size=None): super(CustomRobertatModel, self).__init__() self.num_labels = num_labels self.roberta = RobertaModel.from_pretrained("roberta-large") self.hidden_size = config.hidden_size print(config.hidden_size, self.hidden_size) self.lstm = nn.LSTM(config.hidden_size, self.hidden_size, bidirectional=True, batch_first=True) self.dropout = nn.Dropout(dropout) self.classifier = nn.Linear(self.hidden_size * 2, 2)
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = RobertaModel(config=config) model.eval() sequence_output, pooled_output = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) sequence_output, pooled_output = model( input_ids, token_type_ids=token_type_ids) sequence_output, pooled_output = model(input_ids) result = { "sequence_output": sequence_output, "pooled_output": pooled_output, } self.parent.assertListEqual( list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
class RobertaForPIQA(BertPreTrainedModel): config_class = RobertaConfig pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP base_model_prefix = "roberta" def __init__(self, config): super(RobertaForPIQA, self).__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config) self.init_weights() def _resize_type_embeddings(self, new_num_types): old_embeddings = self.roberta.embeddings.token_type_embeddings new_embeddings = self.roberta._get_resized_embeddings( old_embeddings, new_num_types) self.roberta.embeddings.token_type_embeddings = new_embeddings return self.roberta.embeddings.token_type_embeddings def forward(self, task=None, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None): seq_length = input_ids.size(2) outputs = self.roberta( input_ids=input_ids.view(-1, seq_length), attention_mask=attention_mask.view(-1, seq_length), token_type_ids=token_type_ids.view(-1, seq_length), position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds) sequence_output = outputs[0] logits = self.classifier(sequence_output) # import pdb; pdb.set_trace() logits = logits.view(-1, self.num_labels) outputs = (logits, ) + outputs[2:] if labels is not None: loss_fct = CrossEntropyLoss() loss = loss_fct(logits, labels.view(-1)) outputs = (loss, ) + outputs return outputs # (loss), logits, (hidden_states), (attentions)
def get_model_and_tokenizer(model_name='gpt2', device=CPU_DEVICE): if 'gpt3' in model_name: # For GPT-3 evals, use GPT-2 large model_name = 'gpt2-large' if 'gpt2' in model_name: tokenizer = GPT2Tokenizer.from_pretrained(model_name) model = GPT2LMHeadModel.from_pretrained( model_name, pad_token_id=tokenizer.eos_token_id).to(device) model = model.eval() elif 'roberta' in model_name: tokenizer = RobertaTokenizer.from_pretrained(model_name) model = RobertaModel.from_pretrained(model_name) else: raise ValueError(f'Unknown model: {model_name}') return model, tokenizer
def __init__(self): super(PhoBERTCLREncoder, self).__init__() self.config = RobertaConfig.from_pretrained( os.path.join(os.getcwd(), '../pretrained', 'PhoBERT_base_transformers', 'config.json')) self.phobert = RobertaModel.from_pretrained(os.path.join( os.getcwd(), '../pretrained', 'PhoBERT_base_transformers', 'model.bin'), config=self.config) self.linear_1 = torch.nn.Linear(4 * 768, 4 * 768) self.linear_2 = torch.nn.Linear(4 * 768, 512)
def __create_model(self): # roBERTa roberta_config = RobertaConfig.from_pretrained( self.config['roberta_config_file_path'], output_hidden_states=True) self.roberta = RobertaModel.from_pretrained( self.config['roberta_model_file_path'], config=roberta_config) # Dropout self.dropout = nn.Dropout(0.5) # Linear self.fc = nn.Linear(roberta_config.hidden_size, 2) nn.init.normal_(self.fc.weight, std=0.02) nn.init.normal_(self.fc.bias, 0)
def __init__(self, threshold=0.1, modelname="roberta-large"): super(MLMSim, self).__init__() if modelname.startswith("roberta-"): self.model = RobertaModel.from_pretrained(modelname) self.TOKENIZER = RobertaTokenizer.from_pretrained(modelname) elif modelname.startswith("bert-base-uncased"): self.model = BertModel.from_pretrained(modelname) self.TOKENIZER = BertTokenizer.from_pretrained(modelname) self.model.requires_grad = False self.pad_token = self.TOKENIZER.pad_token_id self.threshold = threshold self.mapping_log = open("log.txt", "w")
def __init__(self, config): super(RobertaForGLM, self).__init__(config) self.roberta = RobertaModel(config) self.lm_head = RobertaLMHead(config) self.bilinear = nn.Bilinear(config.hidden_size, config.hidden_size, 1) self.loss_lambda = getattr(config, "loss_lambda", 1.) self.disable_rev_pos = getattr(config, "disable_rev_pos", False) self.padding_idx = self.roberta.embeddings.padding_idx self.apply(self.init_weights) self.tie_weights()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config, add_pooling_layer=False) self.classifier_copa = RobertaClassificationHead(config) self.classifier_anli = RobertaClassificationHead(config) self.classifier_snli = RobertaClassificationHead(config) self.classifier_atomic = RobertaClassificationHead(config) self.classifier_social = RobertaClassificationHead(config) self.classifier_hella = RobertaClassificationHead(config) self.classifier_joci = RobertaClassificationHead(config) self.init_weights()
def __init__(self, embed_size=IN_FEATURES) : super(Net, self).__init__() self.embed_size = embed_size if USE_EMPATH : self.embed_size += 194 self.num_classes = 11 print(f"Embeddings length: {self.embed_size}") if MODEL_NAME == 'BERT' : self.bert = BertModel.from_pretrained("bert-base-cased") else : self.bert = RobertaModel.from_pretrained("roberta-base") self.fc = nn.Linear(self.embed_size, self.num_classes) self.dropout = nn.Dropout(BERT_DROPOUT)
def __init__( self, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation="relu", normalize_before=False, return_intermediate_dec=False, pass_pos_and_query=True, text_encoder_type="roberta-base", freeze_text_encoder=False, contrastive_loss=False, ): super().__init__() self.pass_pos_and_query = pass_pos_and_query encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before) encoder_norm = nn.LayerNorm(d_model) if normalize_before else None self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before) decoder_norm = nn.LayerNorm(d_model) self.decoder = TransformerDecoder( decoder_layer, num_decoder_layers, decoder_norm, return_intermediate=return_intermediate_dec ) self.CLS = nn.Embedding(1, d_model) if contrastive_loss else None self._reset_parameters() self.tokenizer = RobertaTokenizerFast.from_pretrained(text_encoder_type) self.text_encoder = RobertaModel.from_pretrained(text_encoder_type) if freeze_text_encoder: for p in self.text_encoder.parameters(): p.requires_grad_(False) self.expander_dropout = 0.1 config = self.text_encoder.config self.resizer = FeatureResizer( input_feat_size=config.hidden_size, output_feat_size=d_model, dropout=self.expander_dropout, ) self.d_model = d_model self.nhead = nhead
def __init__(self, config): super(RobertaForTokenClassification, self).__init__(config) self.num_labels = config.num_labels config.max_seq_length = 128 self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.conv1d = nn.Conv1d(config.max_seq_length * config.hidden_size, config.max_seq_length * config.hidden_size, 1, stride=config.hidden_size * 8) self.init_weights()
def __init__(self): super(ResnetRobertaBU, self).__init__() self.roberta = RobertaModel.from_pretrained("roberta-large") self.resnet = models.resnet101(pretrained=True) self.feats = torch.nn.Sequential(torch.nn.Linear(1000,1024)) self.feats2 = torch.nn.Sequential(torch.nn.LayerNorm(1024, eps=1e-12)) self.boxes = torch.nn.Sequential(torch.nn.Linear(4,1024),torch.nn.LayerNorm(1024, eps=1e-12)) self.dropout = torch.nn.Dropout(0.1) self.classifier = torch.nn.Linear(1024, 1)