def test_model_from_pretrained(self):
     cache_dir = "/tmp/transformers_test/"
     for model_name in list(
             ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = RobertaModel.from_pretrained(model_name,
                                              cache_dir=cache_dir)
         shutil.rmtree(cache_dir)
         self.assertIsNotNone(model)
Exemplo n.º 2
0
def Attentiontokens(datatype, keyword_size, sample_classes, tokenizer):
    dataname = 'attentiontokens_' + datatype + '_'
    if isfile(dataname + str(len(sample_classes)) + '.npy'):
        attentiontokens = torch.from_numpy(dataname +
                                           str(len(sample_classes) +
                                               '.npy')).tolist()
    else:
        total_model = torch.load('../models/vanilla_softmax_' + datatype +
                                 '_' + str(len(sample_classes)))
        model = total_model.module.bert
        pretrained_weight = model.state_dict()
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model_version = 'roberta-base'
        model = RobertaModel.from_pretrained(model_version,
                                             output_attentions=True).to(device)
        model.load_state_dict(pretrained_weight)

        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model)
        tokenizer = RobertaTokenizer.from_pretrained(model_version,
                                                     do_lower_case=True)

        train_data, _ = VanillaDataload(datatype, tokenizer, sample_classes)
        traindataset = Dataset(filename=train_data)
        train_loader = DataLoader(dataset=traindataset,
                                  batch_size=16,
                                  shuffle=False,
                                  num_workers=2)

        attention_scores = np.zeros(model.module.config.vocab_size)
        attention_freq = np.zeros(model.module.config.vocab_size)
        attention_average = np.zeros(model.module.config.vocab_size)

        with torch.no_grad():
            for i, data in enumerate(train_loader):
                inputs, labels = data[0].to(device), data[1].to(device)
                attention = model(inputs)[-1]
                for i in range(len(attention[11])):
                    for j in range(512):
                        score = attention[11][i][0][0][j]
                        token = inputs[i][j].item()
                        attention_scores[token] += score.item()
                        attention_freq[token] += 1

        for i in range(model.module.config.vocab_size):
            score = attention_scores[i]
            freq = attention_freq[i]
            if freq == 0:
                average = 0
            else:
                average = score / freq
            attention_average[i] = average

        key_num = keyword_size * len(sample_classes)

        attentiontokens = attention_average.argsort()[-key_num:][::-1].tolist()
        np.save(dataname + str(len(sample_classes)), attentiontokens)
    return attentiontokens
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, " +
                "or config_filename should be passed into the " +
                "ROBERTA constructor.")

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = RobertaConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = RobertaModel(config)
        elif pretrained_model_name is not None:
            model = RobertaModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = RobertaConfig.from_json_file(config_filename)
            model = RobertaModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" +
                " be passed into the ROBERTA constructor")

        model.to(self._device)

        self.add_module("roberta", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
Exemplo n.º 4
0
def build_network(args):
    bert_model = RobertaModel.from_pretrained(args.roberta_model)
    network = QDGATNet(bert_model,
                       hidden_size=bert_model.config.hidden_size,
                       dropout_prob=args.dropout)

    if torch.cuda.is_available():
        network.cuda()
    return network
Exemplo n.º 5
0
 def __init__(self, PATH="data/roberta/"):
     super(EmotionModel, self).__init__()
     config = RobertaConfig.from_pretrained(PATH, return_dict=False)
     self.bert_model = RobertaModel.from_pretrained(PATH +
                                                    "pytorch_model.bin",
                                                    config=config)
     self.dropout = nn.Dropout(0.1)
     self.linear1 = nn.Linear(768, 1)
     self.linear2 = nn.Linear(768 + 1, 1)
Exemplo n.º 6
0
def getModel(version):
    model = ""
    if version.split('-')[0] == 'bert':
        model = BertModel.from_pretrained(
            version, output_hidden_states=True)  #.get_input_embeddings()
    elif version.split('-')[0] == 'roberta':
        model = RobertaModel.from_pretrained(version,
                                             output_hidden_states=True)
    return model
Exemplo n.º 7
0
    def __init__(self):
        super(TokenModel, self).__init__()
        self.config = RobertaConfig.from_pretrained(config.roberta_config, output_hidden_states=True)
        self.roberta = RobertaModel.from_pretrained(config.roberta_model, config=self.config)

        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(self.config.hidden_size, 2)
        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)
Exemplo n.º 8
0
    def __init__(self, dropout):
        super(ROBERTAModel, self).__init__()

        self.roberta = RobertaModel.from_pretrained(
            config.PATHS['roberta'],
            config=RobertaConfig())

        self.fc = nn.Linear(768, 2)
        self.dropout = nn.Dropout(dropout)
    def __init__(self, dropout_rate=0.3, n_outputs=2):
        super(RobertaTokenClassifier, self).__init__()

        self.pretrained_model = RobertaModel.from_pretrained('roberta-base')
        self.d1 = torch.nn.Dropout(dropout_rate)
        self.l1 = torch.nn.Linear(768, 64)
        self.bn1 = torch.nn.LayerNorm(64)
        self.d2 = torch.nn.Dropout(dropout_rate)
        self.l2 = torch.nn.Linear(64, n_outputs)
Exemplo n.º 10
0
    def __init__(self, config, num_label):
        super(RobertaClassification, self).__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-large-mnli')
        self.fc = nn.Linear(config.hidden_size, num_label)
        self.drop = nn.Dropout(config.hidden_dropout_prob)
        self.loss = nn.CrossEntropyLoss(reduction='sum')

        torch.nn.init.xavier_uniform_(self.fc.weight)
        torch.nn.init.constant_(self.fc.bias, 0.)
def create_roberta_autoencoder(hyperparameters):
    bert_encoder = RobertaModel.from_pretrained('roberta-base')
    vocab = autoencoder_objects['english_bert_tokenizer'].encoder
    autoencoder = create_autoencoder_given_encoder(bert_encoder, vocab,
                                                   hyperparameters)
    autoencoder_optimizer = optim.Adam(autoencoder.decoder.parameters(),
                                       lr=hyperparameters['learning_rate'],
                                       weight_decay=10**(-5))
    return autoencoder, autoencoder_optimizer
Exemplo n.º 12
0
 def __init__(self, name, sort='bert', setting='average'):
     self.setting = setting
     assert sort in ['bert', 'roberta']
     if sort == 'bert':
         self.tokenizer = BertTokenizer.from_pretrained(name)
         self.model = BertModel.from_pretrained(name)
     elif sort == 'roberta':
         self.tokenizer = RobertaTokenizer.from_pretrained(name)
         self.model = RobertaModel.from_pretrained(name)
Exemplo n.º 13
0
    def __init__(self):
        super(DISTILROBERTA_SQUAD, self).__init__()

        self.roberta_model = RobertaModel.from_pretrained('distilroberta-base')

        self.fc_layers = nn.Sequential(nn.Linear(768, 256), nn.ReLU(),
                                       nn.Linear(256, 2))

        self.criterion = nn.CrossEntropyLoss()
Exemplo n.º 14
0
    def __init__(self,
                 atokenizer: AtomTokenizer,
                 tokenizer: Tokenizer,
                 dec_dim: int = 64,
                 device: str = 'cpu',
                 load_pretrained: bool = False):
        super(Parser, self).__init__()
        self.enc_dim = 768
        self.dec_dim = dec_dim
        self.num_embeddings = len(atokenizer)
        self.device = device
        self.atom_tokenizer = atokenizer
        self.type_parser = TypeParser()
        self.tokenizer = tokenizer
        self.dropout = Dropout(0.1)
        self.enc_heads = 8
        self.dec_heads = 8
        self.d_atn_dec = self.dec_dim // self.dec_heads

        if load_pretrained:
            self.word_encoder = RobertaModel.from_pretrained(
                "pdelobelle/robbert-v2-dutch-base").to(device)
        else:
            json_path = path.join(
                path.join(path.dirname(path.dirname(__file__)), 'data'),
                'config.json')
            self.word_encoder = RobertaModel(
                RobertaConfig.from_json_file(json_path)).to(device)
        self.supertagger = make_decoder(num_layers=6,
                                        num_heads_enc=self.enc_heads,
                                        num_heads_dec=self.dec_heads,
                                        d_encoder=self.enc_dim,
                                        d_decoder=self.dec_dim,
                                        d_atn_enc=self.enc_dim //
                                        self.enc_heads,
                                        d_atn_dec=self.d_atn_dec,
                                        d_v_enc=self.enc_dim // self.enc_heads,
                                        d_v_dec=self.dec_dim // self.dec_heads,
                                        d_interm=self.dec_dim * 2,
                                        dropout_rate=0.1).to(device)
        self.atom_embedder = ComplexEmbedding(self.num_embeddings,
                                              self.dec_dim // 2).to(device)
        self.linker = make_encoder(num_layers=3,
                                   num_heads=self.enc_heads,
                                   d_intermediate=self.dec_dim * 4,
                                   dropout=0.15,
                                   d_model=self.dec_dim * 2,
                                   d_k=(self.dec_dim * 2) // self.dec_heads,
                                   d_v=(self.dec_dim * 2) //
                                   self.dec_heads).to(device)
        self.pos_transformation = Sequential(
            FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2),
            LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device)
        self.neg_transformation = Sequential(
            FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2),
            LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device)
Exemplo n.º 15
0
    def test_inference_no_head(self):
        model = RobertaModel.from_pretrained("roberta-base")

        input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
        output = model(input_ids)[0]
        # compare the actual values for a slice.
        expected_slice = torch.Tensor(
            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
        )
        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
Exemplo n.º 16
0
 def __init__(self, candidate_num, encoder, hidden_size=768):
     super(MatchSum, self).__init__()
     
     self.hidden_size = hidden_size
     self.candidate_num  = candidate_num
     
     if encoder == 'bert':
         self.encoder = BertModel.from_pretrained('bert-base-uncased')
     else:
         self.encoder = RobertaModel.from_pretrained('roberta-base')
Exemplo n.º 17
0
 def __init__(self):
     super(Model, self).__init__()
     self.config = RobertaConfig.from_pretrained(
         model_config.pretrain_model_path, output_hidden_states=True)
     self.model = RobertaModel.from_pretrained(
         model_config.pretrain_model_path, config=self.config)
     self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
     # self.classifier = nn.Linear(self.config.hidden_size * 2, 2)
     self.classifier = nn.Linear(self.config.hidden_size, 2)
     torch.nn.init.normal_(self.classifier.weight, std=0.02)
 def __init__(self, config,model_name_or_path=None,pretrained_weights=None):
     super(RobertaQA, self).__init__(config)
     if model_name_or_path:
         self.roberta = RobertaModel.from_pretrained(model_name_or_path, config=config)
     else:
         self.roberta = RobertaModel(config=config)
     if pretrained_weights:
         self.roberta.load_state_dict(pretrained_weights)
     self.qa_outputs = nn.Linear(config.hidden_size, 2)
     torch.nn.init.normal_(self.qa_outputs.weight, mean=0.0,std=self.config.initializer_range)
Exemplo n.º 19
0
def load_roberta(model_type="roberta-base", is_eval: bool = True):
    model = RobertaModel.from_pretrained(model_type)
    if is_eval:
        model.eval()
    tokenizer = RobertaTokenizer.from_pretrained(model_type,
                                                 eos_token="[SEP]",
                                                 sep_token="[SEP]",
                                                 cls_token="[CLS]",
                                                 pad_token="[PAD]")
    return model, tokenizer
Exemplo n.º 20
0
    def __init__(self, num_class=5, pretrain_model="roberta-base"):
        super(roberta_sum_classifier, self).__init__()
        self.pretrain_model = pretrain_model
        self.num_class = num_class

        self.roberta = RobertaModel.from_pretrained(pretrain_model)
        self.drop = nn.Dropout(p=0.3)
        self.out = nn.Linear(self.roberta.config.hidden_size, num_class)
        
        self.tokenizer = RobertaTokenizer.from_pretrained(pretrain_model)
 def __init__(self, num_class):
     super(BERTClass, self).__init__()
     self.num_class = num_class
     self.l1 = RobertaModel.from_pretrained("roberta-base")
     self.hc_features = torch.nn.Linear(50, 128)
     self.from_bert = torch.nn.Linear(768, 128)
     self.dropout = torch.nn.Dropout(0.3)
     self.pre_classifier = torch.nn.Linear(256, 128)
     self.classifier = torch.nn.Linear(128, self.num_class)
     self.history = dict()
Exemplo n.º 22
0
 def __init__(self):
     super(ResnetRobertabd, self).__init__()
     
     self.roberta = RobertaModel.from_pretrained("roberta-large")
     
     self.resnet=models.resnet101(pretrained=True)
     self.feats = torch.nn.Sequential(torch.nn.Linear(1000,1024))
     
     self.dropout = torch.nn.Dropout(0.1)
     self.classifier = torch.nn.Linear(1024, 1)
Exemplo n.º 23
0
    def __init__(self, config: SklearnConfig) -> None:
        super().__init__(config)
        self.device_name = "cuda:0" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(self.device_name)

        model_name = "rinna/japanese-roberta-base"
        self.model = RobertaModel.from_pretrained(model_name, return_dict=True)
        self.tokenizer = T5Tokenizer.from_pretrained(model_name, padding=True)

        self.model.eval()
Exemplo n.º 24
0
 def __init__(self, path, embedding_dim=768, num_class=20, num_class1=20):
     super(TransferRobertaNet, self).__init__()
     self.embedding_dim = embedding_dim
     self.num_class = num_class
     self.num_class1 = num_class1
     self.path = path
     self.encoder = RobertaModel.from_pretrained(self.path)
     #         self.encoder = model_fun()
     self.transfer_fc = nn.Linear(self.embedding_dim, self.num_class)
     self.down_fc = nn.Linear(self.embedding_dim, self.num_class1)
Exemplo n.º 25
0
    def __init__(self, device='cpu', pretrain = True):
        super().__init__()
        config = RobertaConfig.from_pretrained("vinai/phobert-base")
        if pretrain:
            self.roberta = RobertaModel.from_pretrained("vinai/phobert-base")
        else:
            self.roberta = RobertaModel(config)

        self.fc = nn.Linear(768, 300)
        self.device = device
Exemplo n.º 26
0
    def __init__(self):
        super(RobertaBaseLinear, self).__init__()
        self.model_path = os.path.join(os.getcwd(),
                                       MODELS[self.__class__.__name__]['path'])

        self.roberta_base = RobertaModel.from_pretrained(self.model_path)

        self.ocemo_linear = nn.Linear(768, 7)
        self.ocnli_linear = nn.Linear(768, 3)
        self.tnews_linear = nn.Linear(768, 15)
 def __init__(self, dropout_rate=0.3):
     super(ROBERTAOnMRPC, self).__init__()
     self.base_model = RobertaModel.from_pretrained('roberta-base')
     self.d1 = torch.nn.Dropout(dropout_rate)
     self.l1 = torch.nn.Linear(768, 128)
     self.bn1 = torch.nn.LayerNorm(128)
     self.d2 = torch.nn.Dropout(dropout_rate)
     self.l2 = torch.nn.Linear(128, 2)
     torch.nn.init.xavier_uniform_(self.l1.weight)
     torch.nn.init.xavier_uniform_(self.l2.weight)
Exemplo n.º 28
0
    def __init__(self,
                 classifier_config_dir,
                 device,
                 task_type,
                 n_clf_layers=6,
                 use_dm=True,
                 use_pm=True,
                 use_rt=True,
                 use_bio=False,
                 use_name=False,
                 use_network=False,
                 use_count=False):
        super(ConcatenatedClassifier, self).__init__()
        # load text model
        self.device = device
        self.task_type = task_type
        self.use_text = use_dm | use_pm | use_rt
        self.use_bio = use_bio
        self.use_name = use_name
        self.use_etc = use_network | use_count
        self.text_model = RobertaModel.from_pretrained(
            "vinai/bertweet-base",
            output_attentions=False,
            output_hidden_states=False)
        if self.use_name:
            self.charEmbedding = nn.Embedding(
                num_embeddings=302, embedding_dim=300,
                padding_idx=301)  # 302: 300-top frequent + pad + unk
            self.conv3 = nn.Conv1d(in_channels=300,
                                   out_channels=256,
                                   kernel_size=3,
                                   padding=1)
            self.conv4 = nn.Conv1d(in_channels=300,
                                   out_channels=256,
                                   kernel_size=4,
                                   padding=1)
            self.conv5 = nn.Conv1d(in_channels=300,
                                   out_channels=256,
                                   kernel_size=5,
                                   padding=1)

        # load classifier for combining these features
        config = RobertaConfig()
        config = config.from_json_file(classifier_config_dir)
        config.num_hidden_layers = n_clf_layers
        config.num_attention_heads = n_clf_layers
        config.max_position_embeddings = 7
        if self.use_bio:
            config.max_position_embeddings += 2
        if self.use_name:
            config.max_position_embeddings += 4
        self.concat_model = RobertaModel(config)
        self.classifier = ClassifierLayer(use_count=use_count,
                                          use_network=use_network)
        return
Exemplo n.º 29
0
def download_robert_base():
    file = '../input/roberta-base'

    config = RobertaConfig.from_pretrained('roberta-base')
    config.save_pretrained(file)
    
    model = RobertaModel.from_pretrained('roberta-base')
    model.save_pretrained(file)

    tkn = RobertaTokenizer.from_pretrained('roberta-base')
    tkn.save_pretrained(file)
Exemplo n.º 30
0
    def __init__(self, my_config,args):
        super(NqModel, self).__init__()
        #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072,
        #                                        attention_probs_dropout_prob=0)
        self.my_mask = None
        self.args = args
#        gradient_checkpointing
        self.bert_config = RobertaConfig.from_pretrained("roberta-large-mnli")
        self.bert_config.gradient_checkpointing = True
        self.bert =  RobertaModel.from_pretrained("roberta-large-mnli",config=self.bert_config)
        #self.bert = RobertaModel.from_pretrained("roberta-base")
        my_config.hidden_size = self.bert.config.hidden_size

        self.right = 0
        self.all = 0
        #self.bert =  AlbertModel(albert_base_configuration)
        
        #self.bert2 = BertModel(bert_config)

        #self.bert = BertModel(BertConfig())
        
        
        #self.bert =  RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265))

        #print(my_config,bert_config)
        self.tok_dense = nn.Linear(my_config.hidden_size*2, my_config.hidden_size*2)
        
        self.tok_dense2 = nn.Linear(my_config.hidden_size, my_config.hidden_size)
#        self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)
#        self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        
        self.dropout = nn.Dropout(my_config.hidden_dropout_prob)

        self.tok_outputs = nn.Linear(my_config.hidden_size*2, 1) # tune to avoid fell into bad places
        self.tok_outputs2 = nn.Linear(my_config.hidden_size, 1)
#        config.max_token_len, config.max_token_relative
#        self.para_outputs = nn.Linear(self.config.hidden_size, 1)
#        self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2)
        
#        self.tok_to_label = nn.Linear(my_config.max_token_len,2)
#        self.par_to_label = nn.Linear(my_config.max_paragraph_len,2)

        #self.encoder = Encoder(my_config)
        self.encoder = Encoder(my_config)
#        self.encoder2 = Encoder(my_config)
        
        self.my_config = my_config
        
#        self.my_mask = 

        self.ACC = 0
        self.ALL = 0
        
        self.ErrId = []