def test_model_from_pretrained(self): cache_dir = "/tmp/transformers_test/" for model_name in list( ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = RobertaModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def Attentiontokens(datatype, keyword_size, sample_classes, tokenizer): dataname = 'attentiontokens_' + datatype + '_' if isfile(dataname + str(len(sample_classes)) + '.npy'): attentiontokens = torch.from_numpy(dataname + str(len(sample_classes) + '.npy')).tolist() else: total_model = torch.load('../models/vanilla_softmax_' + datatype + '_' + str(len(sample_classes))) model = total_model.module.bert pretrained_weight = model.state_dict() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_version = 'roberta-base' model = RobertaModel.from_pretrained(model_version, output_attentions=True).to(device) model.load_state_dict(pretrained_weight) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) tokenizer = RobertaTokenizer.from_pretrained(model_version, do_lower_case=True) train_data, _ = VanillaDataload(datatype, tokenizer, sample_classes) traindataset = Dataset(filename=train_data) train_loader = DataLoader(dataset=traindataset, batch_size=16, shuffle=False, num_workers=2) attention_scores = np.zeros(model.module.config.vocab_size) attention_freq = np.zeros(model.module.config.vocab_size) attention_average = np.zeros(model.module.config.vocab_size) with torch.no_grad(): for i, data in enumerate(train_loader): inputs, labels = data[0].to(device), data[1].to(device) attention = model(inputs)[-1] for i in range(len(attention[11])): for j in range(512): score = attention[11][i][0][0][j] token = inputs[i][j].item() attention_scores[token] += score.item() attention_freq[token] += 1 for i in range(model.module.config.vocab_size): score = attention_scores[i] freq = attention_freq[i] if freq == 0: average = 0 else: average = score / freq attention_average[i] = average key_num = keyword_size * len(sample_classes) attentiontokens = attention_average.argsort()[-key_num:][::-1].tolist() np.save(dataname + str(len(sample_classes)), attentiontokens) return attentiontokens
def __init__( self, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, ): super().__init__() # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "ROBERTA constructor.") # TK: The following code checks the same once again. if vocab_size is not None: config = RobertaConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = RobertaModel(config) elif pretrained_model_name is not None: model = RobertaModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = RobertaConfig.from_json_file(config_filename) model = RobertaModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the ROBERTA constructor") model.to(self._device) self.add_module("roberta", model) self.config = model.config self._hidden_size = model.config.hidden_size
def build_network(args): bert_model = RobertaModel.from_pretrained(args.roberta_model) network = QDGATNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=args.dropout) if torch.cuda.is_available(): network.cuda() return network
def __init__(self, PATH="data/roberta/"): super(EmotionModel, self).__init__() config = RobertaConfig.from_pretrained(PATH, return_dict=False) self.bert_model = RobertaModel.from_pretrained(PATH + "pytorch_model.bin", config=config) self.dropout = nn.Dropout(0.1) self.linear1 = nn.Linear(768, 1) self.linear2 = nn.Linear(768 + 1, 1)
def getModel(version): model = "" if version.split('-')[0] == 'bert': model = BertModel.from_pretrained( version, output_hidden_states=True) #.get_input_embeddings() elif version.split('-')[0] == 'roberta': model = RobertaModel.from_pretrained(version, output_hidden_states=True) return model
def __init__(self): super(TokenModel, self).__init__() self.config = RobertaConfig.from_pretrained(config.roberta_config, output_hidden_states=True) self.roberta = RobertaModel.from_pretrained(config.roberta_model, config=self.config) self.dropout = nn.Dropout(p=0.5) self.fc = nn.Linear(self.config.hidden_size, 2) nn.init.normal_(self.fc.weight, std=0.02) nn.init.normal_(self.fc.bias, 0)
def __init__(self, dropout): super(ROBERTAModel, self).__init__() self.roberta = RobertaModel.from_pretrained( config.PATHS['roberta'], config=RobertaConfig()) self.fc = nn.Linear(768, 2) self.dropout = nn.Dropout(dropout)
def __init__(self, dropout_rate=0.3, n_outputs=2): super(RobertaTokenClassifier, self).__init__() self.pretrained_model = RobertaModel.from_pretrained('roberta-base') self.d1 = torch.nn.Dropout(dropout_rate) self.l1 = torch.nn.Linear(768, 64) self.bn1 = torch.nn.LayerNorm(64) self.d2 = torch.nn.Dropout(dropout_rate) self.l2 = torch.nn.Linear(64, n_outputs)
def __init__(self, config, num_label): super(RobertaClassification, self).__init__() self.roberta = RobertaModel.from_pretrained('roberta-large-mnli') self.fc = nn.Linear(config.hidden_size, num_label) self.drop = nn.Dropout(config.hidden_dropout_prob) self.loss = nn.CrossEntropyLoss(reduction='sum') torch.nn.init.xavier_uniform_(self.fc.weight) torch.nn.init.constant_(self.fc.bias, 0.)
def create_roberta_autoencoder(hyperparameters): bert_encoder = RobertaModel.from_pretrained('roberta-base') vocab = autoencoder_objects['english_bert_tokenizer'].encoder autoencoder = create_autoencoder_given_encoder(bert_encoder, vocab, hyperparameters) autoencoder_optimizer = optim.Adam(autoencoder.decoder.parameters(), lr=hyperparameters['learning_rate'], weight_decay=10**(-5)) return autoencoder, autoencoder_optimizer
def __init__(self, name, sort='bert', setting='average'): self.setting = setting assert sort in ['bert', 'roberta'] if sort == 'bert': self.tokenizer = BertTokenizer.from_pretrained(name) self.model = BertModel.from_pretrained(name) elif sort == 'roberta': self.tokenizer = RobertaTokenizer.from_pretrained(name) self.model = RobertaModel.from_pretrained(name)
def __init__(self): super(DISTILROBERTA_SQUAD, self).__init__() self.roberta_model = RobertaModel.from_pretrained('distilroberta-base') self.fc_layers = nn.Sequential(nn.Linear(768, 256), nn.ReLU(), nn.Linear(256, 2)) self.criterion = nn.CrossEntropyLoss()
def __init__(self, atokenizer: AtomTokenizer, tokenizer: Tokenizer, dec_dim: int = 64, device: str = 'cpu', load_pretrained: bool = False): super(Parser, self).__init__() self.enc_dim = 768 self.dec_dim = dec_dim self.num_embeddings = len(atokenizer) self.device = device self.atom_tokenizer = atokenizer self.type_parser = TypeParser() self.tokenizer = tokenizer self.dropout = Dropout(0.1) self.enc_heads = 8 self.dec_heads = 8 self.d_atn_dec = self.dec_dim // self.dec_heads if load_pretrained: self.word_encoder = RobertaModel.from_pretrained( "pdelobelle/robbert-v2-dutch-base").to(device) else: json_path = path.join( path.join(path.dirname(path.dirname(__file__)), 'data'), 'config.json') self.word_encoder = RobertaModel( RobertaConfig.from_json_file(json_path)).to(device) self.supertagger = make_decoder(num_layers=6, num_heads_enc=self.enc_heads, num_heads_dec=self.dec_heads, d_encoder=self.enc_dim, d_decoder=self.dec_dim, d_atn_enc=self.enc_dim // self.enc_heads, d_atn_dec=self.d_atn_dec, d_v_enc=self.enc_dim // self.enc_heads, d_v_dec=self.dec_dim // self.dec_heads, d_interm=self.dec_dim * 2, dropout_rate=0.1).to(device) self.atom_embedder = ComplexEmbedding(self.num_embeddings, self.dec_dim // 2).to(device) self.linker = make_encoder(num_layers=3, num_heads=self.enc_heads, d_intermediate=self.dec_dim * 4, dropout=0.15, d_model=self.dec_dim * 2, d_k=(self.dec_dim * 2) // self.dec_heads, d_v=(self.dec_dim * 2) // self.dec_heads).to(device) self.pos_transformation = Sequential( FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2), LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device) self.neg_transformation = Sequential( FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2), LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device)
def test_inference_no_head(self): model = RobertaModel.from_pretrained("roberta-base") input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) output = model(input_ids)[0] # compare the actual values for a slice. expected_slice = torch.Tensor( [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]] ) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
def __init__(self, candidate_num, encoder, hidden_size=768): super(MatchSum, self).__init__() self.hidden_size = hidden_size self.candidate_num = candidate_num if encoder == 'bert': self.encoder = BertModel.from_pretrained('bert-base-uncased') else: self.encoder = RobertaModel.from_pretrained('roberta-base')
def __init__(self): super(Model, self).__init__() self.config = RobertaConfig.from_pretrained( model_config.pretrain_model_path, output_hidden_states=True) self.model = RobertaModel.from_pretrained( model_config.pretrain_model_path, config=self.config) self.dropout = nn.Dropout(self.config.hidden_dropout_prob) # self.classifier = nn.Linear(self.config.hidden_size * 2, 2) self.classifier = nn.Linear(self.config.hidden_size, 2) torch.nn.init.normal_(self.classifier.weight, std=0.02)
def __init__(self, config,model_name_or_path=None,pretrained_weights=None): super(RobertaQA, self).__init__(config) if model_name_or_path: self.roberta = RobertaModel.from_pretrained(model_name_or_path, config=config) else: self.roberta = RobertaModel(config=config) if pretrained_weights: self.roberta.load_state_dict(pretrained_weights) self.qa_outputs = nn.Linear(config.hidden_size, 2) torch.nn.init.normal_(self.qa_outputs.weight, mean=0.0,std=self.config.initializer_range)
def load_roberta(model_type="roberta-base", is_eval: bool = True): model = RobertaModel.from_pretrained(model_type) if is_eval: model.eval() tokenizer = RobertaTokenizer.from_pretrained(model_type, eos_token="[SEP]", sep_token="[SEP]", cls_token="[CLS]", pad_token="[PAD]") return model, tokenizer
def __init__(self, num_class=5, pretrain_model="roberta-base"): super(roberta_sum_classifier, self).__init__() self.pretrain_model = pretrain_model self.num_class = num_class self.roberta = RobertaModel.from_pretrained(pretrain_model) self.drop = nn.Dropout(p=0.3) self.out = nn.Linear(self.roberta.config.hidden_size, num_class) self.tokenizer = RobertaTokenizer.from_pretrained(pretrain_model)
def __init__(self, num_class): super(BERTClass, self).__init__() self.num_class = num_class self.l1 = RobertaModel.from_pretrained("roberta-base") self.hc_features = torch.nn.Linear(50, 128) self.from_bert = torch.nn.Linear(768, 128) self.dropout = torch.nn.Dropout(0.3) self.pre_classifier = torch.nn.Linear(256, 128) self.classifier = torch.nn.Linear(128, self.num_class) self.history = dict()
def __init__(self): super(ResnetRobertabd, self).__init__() self.roberta = RobertaModel.from_pretrained("roberta-large") self.resnet=models.resnet101(pretrained=True) self.feats = torch.nn.Sequential(torch.nn.Linear(1000,1024)) self.dropout = torch.nn.Dropout(0.1) self.classifier = torch.nn.Linear(1024, 1)
def __init__(self, config: SklearnConfig) -> None: super().__init__(config) self.device_name = "cuda:0" if torch.cuda.is_available() else "cpu" self.device = torch.device(self.device_name) model_name = "rinna/japanese-roberta-base" self.model = RobertaModel.from_pretrained(model_name, return_dict=True) self.tokenizer = T5Tokenizer.from_pretrained(model_name, padding=True) self.model.eval()
def __init__(self, path, embedding_dim=768, num_class=20, num_class1=20): super(TransferRobertaNet, self).__init__() self.embedding_dim = embedding_dim self.num_class = num_class self.num_class1 = num_class1 self.path = path self.encoder = RobertaModel.from_pretrained(self.path) # self.encoder = model_fun() self.transfer_fc = nn.Linear(self.embedding_dim, self.num_class) self.down_fc = nn.Linear(self.embedding_dim, self.num_class1)
def __init__(self, device='cpu', pretrain = True): super().__init__() config = RobertaConfig.from_pretrained("vinai/phobert-base") if pretrain: self.roberta = RobertaModel.from_pretrained("vinai/phobert-base") else: self.roberta = RobertaModel(config) self.fc = nn.Linear(768, 300) self.device = device
def __init__(self): super(RobertaBaseLinear, self).__init__() self.model_path = os.path.join(os.getcwd(), MODELS[self.__class__.__name__]['path']) self.roberta_base = RobertaModel.from_pretrained(self.model_path) self.ocemo_linear = nn.Linear(768, 7) self.ocnli_linear = nn.Linear(768, 3) self.tnews_linear = nn.Linear(768, 15)
def __init__(self, dropout_rate=0.3): super(ROBERTAOnMRPC, self).__init__() self.base_model = RobertaModel.from_pretrained('roberta-base') self.d1 = torch.nn.Dropout(dropout_rate) self.l1 = torch.nn.Linear(768, 128) self.bn1 = torch.nn.LayerNorm(128) self.d2 = torch.nn.Dropout(dropout_rate) self.l2 = torch.nn.Linear(128, 2) torch.nn.init.xavier_uniform_(self.l1.weight) torch.nn.init.xavier_uniform_(self.l2.weight)
def __init__(self, classifier_config_dir, device, task_type, n_clf_layers=6, use_dm=True, use_pm=True, use_rt=True, use_bio=False, use_name=False, use_network=False, use_count=False): super(ConcatenatedClassifier, self).__init__() # load text model self.device = device self.task_type = task_type self.use_text = use_dm | use_pm | use_rt self.use_bio = use_bio self.use_name = use_name self.use_etc = use_network | use_count self.text_model = RobertaModel.from_pretrained( "vinai/bertweet-base", output_attentions=False, output_hidden_states=False) if self.use_name: self.charEmbedding = nn.Embedding( num_embeddings=302, embedding_dim=300, padding_idx=301) # 302: 300-top frequent + pad + unk self.conv3 = nn.Conv1d(in_channels=300, out_channels=256, kernel_size=3, padding=1) self.conv4 = nn.Conv1d(in_channels=300, out_channels=256, kernel_size=4, padding=1) self.conv5 = nn.Conv1d(in_channels=300, out_channels=256, kernel_size=5, padding=1) # load classifier for combining these features config = RobertaConfig() config = config.from_json_file(classifier_config_dir) config.num_hidden_layers = n_clf_layers config.num_attention_heads = n_clf_layers config.max_position_embeddings = 7 if self.use_bio: config.max_position_embeddings += 2 if self.use_name: config.max_position_embeddings += 4 self.concat_model = RobertaModel(config) self.classifier = ClassifierLayer(use_count=use_count, use_network=use_network) return
def download_robert_base(): file = '../input/roberta-base' config = RobertaConfig.from_pretrained('roberta-base') config.save_pretrained(file) model = RobertaModel.from_pretrained('roberta-base') model.save_pretrained(file) tkn = RobertaTokenizer.from_pretrained('roberta-base') tkn.save_pretrained(file)
def __init__(self, my_config,args): super(NqModel, self).__init__() #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072, # attention_probs_dropout_prob=0) self.my_mask = None self.args = args # gradient_checkpointing self.bert_config = RobertaConfig.from_pretrained("roberta-large-mnli") self.bert_config.gradient_checkpointing = True self.bert = RobertaModel.from_pretrained("roberta-large-mnli",config=self.bert_config) #self.bert = RobertaModel.from_pretrained("roberta-base") my_config.hidden_size = self.bert.config.hidden_size self.right = 0 self.all = 0 #self.bert = AlbertModel(albert_base_configuration) #self.bert2 = BertModel(bert_config) #self.bert = BertModel(BertConfig()) #self.bert = RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265)) #print(my_config,bert_config) self.tok_dense = nn.Linear(my_config.hidden_size*2, my_config.hidden_size*2) self.tok_dense2 = nn.Linear(my_config.hidden_size, my_config.hidden_size) # self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size) # self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size) self.dropout = nn.Dropout(my_config.hidden_dropout_prob) self.tok_outputs = nn.Linear(my_config.hidden_size*2, 1) # tune to avoid fell into bad places self.tok_outputs2 = nn.Linear(my_config.hidden_size, 1) # config.max_token_len, config.max_token_relative # self.para_outputs = nn.Linear(self.config.hidden_size, 1) # self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2) # self.tok_to_label = nn.Linear(my_config.max_token_len,2) # self.par_to_label = nn.Linear(my_config.max_paragraph_len,2) #self.encoder = Encoder(my_config) self.encoder = Encoder(my_config) # self.encoder2 = Encoder(my_config) self.my_config = my_config # self.my_mask = self.ACC = 0 self.ALL = 0 self.ErrId = []