def test_inference_no_head(self):
        model = RobertaModel.from_pretrained('roberta-base')

        input_ids = torch.tensor(
            [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
        output = model(input_ids)[0]
        # compare the actual values for a slice.
        expected_slice = torch.Tensor([[[-0.0231, 0.0782, 0.0074],
                                        [-0.1854, 0.0539, -0.0174],
                                        [0.0548, 0.0799, 0.1687]]])
        self.assertTrue(
            torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
示例#2
0
 def create_and_check_model_as_decoder(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
     encoder_hidden_states,
     encoder_attention_mask,
 ):
     config.add_cross_attention = True
     model = RobertaModel(config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
         encoder_attention_mask=encoder_attention_mask,
     )
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
     )
     result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
     self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
     self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
示例#3
0
    def __init__(self, args):
        super(ClassifyModel, self).__init__()
        args.out_size = len(args.dense_features)
        self.dropout = nn.Dropout(args.hidden_dropout_prob)
        self.args = args

        # 创建BERT模型,并且导入预训练模型
        config = RobertaConfig.from_pretrained(args.pretrained_model_path)
        config.output_hidden_states = True
        args.hidden_size = config.hidden_size
        args.num_hidden_layers = config.num_hidden_layers
        self.bert_text_layer = RobertaModel.from_pretrained(args.pretrained_model_path, config=config)
        self.text_linear = nn.Linear(in_features=args.text_dim + args.vocab_dim_v1 * len(args.text_features),
                                     out_features=args.hidden_size)
        logger.info("Load linear from %s", os.path.join(args.pretrained_model_path, "linear.bin"))
        self.text_linear.load_state_dict(torch.load(os.path.join(args.pretrained_model_path, "linear.bin")))
        logger.info("Load embeddings from %s", os.path.join(args.pretrained_model_path, "embeddings.bin"))

        self.text_embeddings = nn.Embedding.from_pretrained(
            torch.load(os.path.join(args.pretrained_model_path, "embeddings.bin"))['weight'],
            freeze=True)
        args.out_size += args.hidden_size * 2

        # 创建fusion-layer模型,随机初始化
        config = RobertaConfig()
        config.num_hidden_layers = 4
        config.intermediate_size = 2048
        config.hidden_size = 512
        config.num_attention_heads = 16
        config.vocab_size = 5
        self.fusion_text_layer = RobertaModel(config=config)
        self.fusion_text_layer.apply(self._init_weights)
        self.text_linear_1 = nn.Linear(args.text_dim_1 + args.hidden_size, 512)
        self.text_linear_1.apply(self._init_weights)
        self.norm = nn.BatchNorm1d(args.text_dim_1 + args.hidden_size)
        args.out_size += 1024

        # 创建分类器,随机初始化
        self.classifierHead = ClassificationHead(args)
        self.classifierHead.apply(self._init_weights)
def DistilBert():
    import torch
    from transformers import RobertaTokenizer, RobertaModel
    # Transformers has a unified API
    # for 8 transformer architectures and 30 pretrained weights.
    #          Model          | Tokenizer          | Pretrained weights shortcut
    # MODELS = [(BertModel,       BertTokenizer,       'bert-base-uncased'),
    #           (OpenAIGPTModel,  OpenAIGPTTokenizer,  'openai-gpt'),
    #           (GPT2Model,       GPT2Tokenizer,       'gpt2'),
    #           (CTRLModel,       CTRLTokenizer,       'ctrl'),
    #           (TransfoXLModel,  TransfoXLTokenizer,  'transfo-xl-wt103'),
    #           (XLNetModel,      XLNetTokenizer,      'xlnet-base-cased'),
    #           (XLMModel,        XLMTokenizer,        'xlm-mlm-enfr-1024'),
    #           (DistilBertModel, DistilBertTokenizer, 'distilbert-base-uncased'),
    #           (RobertaModel,    RobertaTokenizer,    'roberta-base')]

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    encoded_text = pd.Series(dataset).apply(
        (lambda x: tokenizer.encode(x, add_special_tokens=True)))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # model.resize_token_embeddings(len(encoded_text))
    print(encoded_text[0])
    print(len(encoded_text))
    # print(dataset.map(len).max())
    # model = RobertaModel.from_pretrained('roberta-base').to(device)
    parallel_model = torch.nn.DataParallel(
        RobertaModel.from_pretrained('roberta-base').to(device))

    import numpy as np
    # max_len = 512
    max_len = 0
    for i in encoded_text.values:
        if len(i) > max_len:
            max_len = len(i)

    padded = np.array(
        [i + [0] * (max_len - len(i)) for i in encoded_text.values])

    attention_mask = np.where(padded != 0, 1, 0)
    print(attention_mask.shape)

    input_ids = torch.tensor(padded).to(device)
    attention_mask = torch.tensor(attention_mask).to(device)

    with torch.no_grad():
        last_hidden_states = parallel_model(input_ids,
                                            attention_mask=attention_mask)

    embeddings = last_hidden_states[0][:, 0, :].cpu().numpy()
    print(embeddings[0])
    print(embeddings.shape)
    return embeddings
示例#5
0
def get_model_and_tokenizer(model_name,
                            device="cpu",
                            random_weights=False,
                            model_path=None):
    """
    model_path: if given, initialize from path instead of official repo
    """

    init_model = model_name
    if model_path:
        print("Initializing model from local path:", model_path)
        init_model = model_path

    if model_name.startswith("xlnet"):
        model = XLNetModel.from_pretrained(
            init_model, output_hidden_states=True).to(device)
        tokenizer = XLNetTokenizer.from_pretrained(init_model)
        sep = u"▁"
    elif model_name.startswith("gpt2"):
        model = GPT2Model.from_pretrained(init_model,
                                          output_hidden_states=True).to(device)
        tokenizer = GPT2Tokenizer.from_pretrained(init_model)
        sep = "Ġ"
    elif model_name.startswith("xlm"):
        model = XLMModel.from_pretrained(init_model,
                                         output_hidden_states=True).to(device)
        tokenizer = XLMTokenizer.from_pretrained(init_model)
        sep = "</w>"
    elif model_name.startswith("bert"):
        model = BertModel.from_pretrained(init_model,
                                          output_hidden_states=True).to(device)
        tokenizer = BertTokenizer.from_pretrained(init_model)
        sep = "##"
    elif model_name.startswith("distilbert"):
        model = DistilBertModel.from_pretrained(
            init_model, output_hidden_states=True).to(device)
        tokenizer = DistilBertTokenizer.from_pretrained(init_model)
        sep = "##"
    elif model_name.startswith("roberta"):
        model = RobertaModel.from_pretrained(
            model_name, output_hidden_states=True).to(device)
        tokenizer = RobertaTokenizer.from_pretrained(model_name)
        sep = "Ġ"
    else:
        print("Unrecognized model name:", model_name)
        sys.exit()

    if random_weights:
        print("Randomizing weights")
        model.init_weights()

    return model, tokenizer, sep
    def train_model(self):
        # graph_emb = load_pkl_emb(self.graph_emb_path) if self.graph_emb_path is not None else None

        typed_nodes = load_typed_nodes(self.type_ann_edges)

        decoder_mapping = RobertaTokenizer.from_pretrained("microsoft/codebert-base").decoder
        tok_ids, words = zip(*decoder_mapping.items())
        vocab_mapping = dict(zip(words, tok_ids))
        batcher = self.get_batcher(
            self.train_data + self.test_data, self.batch_size, seq_len=self.seq_len,
            graphmap=None,
            wordmap=vocab_mapping, tagmap=None,
            class_weights=False, element_hash_size=1
        )

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = RobertaModel.from_pretrained("microsoft/codebert-base")
        model.to(device)

        node_ids = []
        embeddings = []

        for ind, batch in enumerate(tqdm(batcher)):
            # token_ids, graph_ids, labels, class_weights, lengths = b
            token_ids = torch.LongTensor(batch["tok_ids"])
            lens = torch.LongTensor(batch["lens"])

            token_ids[token_ids == len(vocab_mapping)] = vocab_mapping["<unk>"]

            def get_length_mask(target, lens):
                mask = torch.arange(target.size(1)).to(target.device)[None, :] < lens[:, None]
                return mask

            mask = get_length_mask(token_ids, lens)
            with torch.no_grad():
                embs = model(input_ids=token_ids, attention_mask=mask)

            for s_emb, s_repl in zip(embs.last_hidden_state, batch["replacements"]):
                unique_repls = set(list(s_repl))
                repls_for_ann = [r for r in unique_repls if r in typed_nodes]

                for r in repls_for_ann:
                    position = s_repl.index(r)
                    if position > 512:
                        continue
                    node_ids.append(r)
                    embeddings.append(s_emb[position])

        all_embs = torch.stack(embeddings, dim=0).numpy()
        embedder = Embedder(dict(zip(node_ids, range(len(node_ids)))), all_embs)
        pickle.dump(embedder, open("codebert_embeddings.pkl", "wb"), fix_imports=False)
        print(node_ids)
示例#7
0
    def __init__(self, config):
        super(BertForQuestionAnswering, self).__init__(config)

        # self.bert = RobertaModel(config, add_pooling_layer=False)
        self.roberta = RobertaModel(config, add_pooling_layer=False)
        if config.multi_layer_classifier:
            self.qa_classifier = BertMLP(config)
        else:
            self.qa_classifier = nn.Linear(config.hidden_size, 2)
            self.qa_classifier.weight = truncated_normal_(
                self.qa_classifier.weight, mean=0, std=0.02)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.init_weights()
示例#8
0
 def __init__(self, path, embedding_dim=768, feat_dim=64, num_class=20):
     super(SupConRobertaNet, self).__init__()
     self.embedding_dim = embedding_dim
     self.feat_dim = feat_dim
     self.num_class = num_class
     self.path = path
     self.encoder = RobertaModel.from_pretrained(self.path)
     #         self.encoder = model_fun()
     self.projection = nn.Sequential(
         nn.Linear(self.embedding_dim, self.embedding_dim),
         nn.ReLU(inplace=True), nn.Linear(self.embedding_dim,
                                          self.feat_dim))
     self.fc = nn.Linear(self.embedding_dim, self.num_class)
示例#9
0
    def __init__(self, n_classes, unfreeze):
        super(RobertaForClaimDetection, self).__init__()
        self.num_labels = n_classes
        config = RobertaConfig.from_pretrained('roberta-base',
                                               output_hidden_states=True)
        self.roberta = RobertaModel.from_pretrained('roberta-base',
                                                    add_pooling_layer=True,
                                                    config=config)
        self.drop = nn.Dropout(p=0.1)
        self.out = nn.Linear(self.roberta.config.hidden_size, n_classes)

        for param in self.roberta.base_model.parameters():
            param.requires_grad = unfreeze
示例#10
0
 def __init__(self, config):
     """
     :param config:  模型的配置
     """
     super().__init__(config)
     self.num_labels = config.num_labels
     # 加载模型配置,初始化一个roberta模型
     self.roberta = RobertaModel(config)
     # 初始化一个dropout和自定义分类层
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     # 调用transformers的自对应初始化权重函数
     self.init_weights()
示例#11
0
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.classifier = GlobalAttentionHeadReluMean(config)
        self.log_softmax = nn.LogSoftmax(dim=1)

        self.classifier.apply(init_weights_relu)

        self.frame_classification_loss_sent = nn.NLLLoss()
示例#12
0
    def __init__(self, config):
        super(RobertaForQuestionAnswering, self).__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        print('Hidden Size: %s' % config.hidden_size)

        self.version_2_with_negative = True

        if self.version_2_with_negative:
            self.clf_output = PoolerAnswerClass(config.hidden_size)
示例#13
0
    def __init__(self, MODEL_PATH='roberta-base'):
        super(TweetModel, self).__init__()

        config = RobertaConfig.from_pretrained(MODEL_PATH + '/config.json',
                                               output_hidden_states=True)
        self.roberta = RobertaModel.from_pretrained(MODEL_PATH +
                                                    '/pytorch_model.bin',
                                                    config=config)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(config.hidden_size, 2)
        # self.fc_len = nn.Linear(config.hidden_size, 96)
        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)
示例#14
0
    def __init__(self):
        super().__init__()

        roberta_config = RobertaConfig.from_pretrained(
            os.path.join(config_path, 'config.json'),
            output_hidden_states=True)
        self.roberta = RobertaModel.from_pretrained(os.path.join(
            model_path, 'pytorch_model.bin'),
                                                    config=roberta_config)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(roberta_config.hidden_size, 2)
        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)
def rbt3():
    """
    RBT3 3层RoBERTa-wwm-ext-base
    RBTL3(3层RoBERTa-wwm-ext-base/large)
    Returns:

    """
    pretrained = "hfl/rbt3"
    tokenizer = BertTokenizer.from_pretrained(pretrained)
    model = RobertaModel.from_pretrained(pretrained)

    model.save_pretrained('rbt3')
    tokenizer.save_pretrained('rbt3')
示例#16
0
 def __init__(self,
              config: RobertaForSequenceClassificationConfig,
              roberta_share=None):
     super(RobertaForSequenceClassification, self).__init__(config)
     if roberta_share is not None:
         self.roberta = roberta_share
     else:
         self.roberta = RobertaModel(config, )
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.cls_classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.cls_classifier.weight = truncated_normal_(
         self.cls_classifier.weight, mean=0, std=0.02)
     self.init_weights()
示例#17
0
    def __init__(self,
                 cuda,
                 model,
                 epochs,
                 learning_rate,
                 train_dataloader,
                 valid_dataloader_MATRES,
                 test_dataloader_MATRES,
                 valid_dataloader_HIEVE,
                 test_dataloader_HIEVE,
                 finetune,
                 dataset,
                 MATRES_best_PATH,
                 HiEve_best_PATH,
                 load_model_path,
                 model_name=None,
                 roberta_size="roberta-base"):
        self.cuda = cuda
        self.model = model
        self.dataset = dataset
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.finetune = finetune
        self.train_dataloader = train_dataloader
        self.valid_dataloader_MATRES = valid_dataloader_MATRES
        self.test_dataloader_MATRES = test_dataloader_MATRES
        self.valid_dataloader_HIEVE = valid_dataloader_HIEVE
        self.test_dataloader_HIEVE = test_dataloader_HIEVE
        ### fine-tune roberta or not ###
        # if finetune is False, we use fixed roberta embeddings before bilstm and mlp
        self.roberta_size = roberta_size
        if not self.finetune:
            self.RoBERTaModel = RobertaModel.from_pretrained(
                self.roberta_size).to(self.cuda)
        if self.roberta_size == 'roberta-base':
            self.roberta_dim = 768
        else:
            self.roberta_dim = 1024

        self.MATRES_best_micro_F1 = -0.000001
        self.MATRES_best_cm = []
        self.MATRES_best_PATH = MATRES_best_PATH

        self.HiEve_best_F1 = -0.000001
        self.HiEve_best_prfs = []
        self.HiEve_best_PATH = HiEve_best_PATH

        self.load_model_path = load_model_path
        self.model_name = model_name
        self.best_epoch = 0
        self.file = open("./rst_file/" + model_name + ".rst", "w")
    def __init__(self, num_labels, config, dropout, hidden_size=None):
        super(CustomRobertatModel, self).__init__()
        self.num_labels = num_labels
        self.roberta = RobertaModel.from_pretrained("roberta-large")
        self.hidden_size = config.hidden_size

        print(config.hidden_size, self.hidden_size)

        self.lstm = nn.LSTM(config.hidden_size,
                            self.hidden_size,
                            bidirectional=True,
                            batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(self.hidden_size * 2, 2)
示例#19
0
        def create_and_check_roberta_model(self, config, input_ids,
                                           token_type_ids, input_mask,
                                           sequence_labels, token_labels,
                                           choice_labels):
            model = RobertaModel(config=config)
            model.eval()
            sequence_output, pooled_output = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids)
            sequence_output, pooled_output = model(
                input_ids, token_type_ids=token_type_ids)
            sequence_output, pooled_output = model(input_ids)

            result = {
                "sequence_output": sequence_output,
                "pooled_output": pooled_output,
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size])
            self.parent.assertListEqual(list(result["pooled_output"].size()),
                                        [self.batch_size, self.hidden_size])
示例#20
0
class RobertaForPIQA(BertPreTrainedModel):
    config_class = RobertaConfig
    pretrained_model_archive_map = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
    base_model_prefix = "roberta"

    def __init__(self, config):
        super(RobertaForPIQA, self).__init__(config)
        self.num_labels = config.num_labels
        self.roberta = RobertaModel(config)
        self.classifier = RobertaClassificationHead(config)
        self.init_weights()

    def _resize_type_embeddings(self, new_num_types):
        old_embeddings = self.roberta.embeddings.token_type_embeddings
        new_embeddings = self.roberta._get_resized_embeddings(
            old_embeddings, new_num_types)
        self.roberta.embeddings.token_type_embeddings = new_embeddings
        return self.roberta.embeddings.token_type_embeddings

    def forward(self,
                task=None,
                input_ids=None,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None,
                labels=None):
        seq_length = input_ids.size(2)
        outputs = self.roberta(
            input_ids=input_ids.view(-1, seq_length),
            attention_mask=attention_mask.view(-1, seq_length),
            token_type_ids=token_type_ids.view(-1, seq_length),
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds)

        sequence_output = outputs[0]
        logits = self.classifier(sequence_output)
        # import pdb; pdb.set_trace()

        logits = logits.view(-1, self.num_labels)
        outputs = (logits, ) + outputs[2:]

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels.view(-1))
            outputs = (loss, ) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)
示例#21
0
def get_model_and_tokenizer(model_name='gpt2', device=CPU_DEVICE):
    if 'gpt3' in model_name:  # For GPT-3 evals, use GPT-2 large
        model_name = 'gpt2-large'
    if 'gpt2' in model_name:
        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        model = GPT2LMHeadModel.from_pretrained(
            model_name, pad_token_id=tokenizer.eos_token_id).to(device)
        model = model.eval()
    elif 'roberta' in model_name:
        tokenizer = RobertaTokenizer.from_pretrained(model_name)
        model = RobertaModel.from_pretrained(model_name)
    else:
        raise ValueError(f'Unknown model: {model_name}')
    return model, tokenizer
示例#22
0
    def __init__(self):
        super(PhoBERTCLREncoder, self).__init__()

        self.config = RobertaConfig.from_pretrained(
            os.path.join(os.getcwd(), '../pretrained',
                         'PhoBERT_base_transformers', 'config.json'))

        self.phobert = RobertaModel.from_pretrained(os.path.join(
            os.getcwd(), '../pretrained', 'PhoBERT_base_transformers',
            'model.bin'),
                                                    config=self.config)

        self.linear_1 = torch.nn.Linear(4 * 768, 4 * 768)
        self.linear_2 = torch.nn.Linear(4 * 768, 512)
示例#23
0
    def __create_model(self):
        # roBERTa
        roberta_config = RobertaConfig.from_pretrained(
            self.config['roberta_config_file_path'], output_hidden_states=True)
        self.roberta = RobertaModel.from_pretrained(
            self.config['roberta_model_file_path'], config=roberta_config)

        # Dropout
        self.dropout = nn.Dropout(0.5)

        # Linear
        self.fc = nn.Linear(roberta_config.hidden_size, 2)
        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)
示例#24
0
    def __init__(self, threshold=0.1, modelname="roberta-large"):
        super(MLMSim, self).__init__()
        if modelname.startswith("roberta-"):
            self.model = RobertaModel.from_pretrained(modelname)
            self.TOKENIZER = RobertaTokenizer.from_pretrained(modelname)
        elif modelname.startswith("bert-base-uncased"):
            self.model = BertModel.from_pretrained(modelname)
            self.TOKENIZER = BertTokenizer.from_pretrained(modelname)

        self.model.requires_grad = False
        self.pad_token = self.TOKENIZER.pad_token_id
        self.threshold = threshold

        self.mapping_log = open("log.txt", "w")
示例#25
0
    def __init__(self, config):
        super(RobertaForGLM, self).__init__(config)

        self.roberta = RobertaModel(config)
        self.lm_head = RobertaLMHead(config)

        self.bilinear = nn.Bilinear(config.hidden_size, config.hidden_size, 1)

        self.loss_lambda = getattr(config, "loss_lambda", 1.)
        self.disable_rev_pos = getattr(config, "disable_rev_pos", False)
        self.padding_idx = self.roberta.embeddings.padding_idx

        self.apply(self.init_weights)
        self.tie_weights()
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.classifier_copa = RobertaClassificationHead(config)
        self.classifier_anli = RobertaClassificationHead(config)
        self.classifier_snli = RobertaClassificationHead(config)
        self.classifier_atomic = RobertaClassificationHead(config)
        self.classifier_social = RobertaClassificationHead(config)
        self.classifier_hella = RobertaClassificationHead(config)
        self.classifier_joci = RobertaClassificationHead(config)

        self.init_weights()
示例#27
0
 def __init__(self, embed_size=IN_FEATURES) :
   super(Net, self).__init__()
   self.embed_size = embed_size
   if USE_EMPATH :
     self.embed_size += 194
   self.num_classes = 11
   print(f"Embeddings length: {self.embed_size}")
   
   if MODEL_NAME == 'BERT' :
     self.bert = BertModel.from_pretrained("bert-base-cased")
   else :
     self.bert = RobertaModel.from_pretrained("roberta-base")
   self.fc = nn.Linear(self.embed_size, self.num_classes)
   self.dropout = nn.Dropout(BERT_DROPOUT)
示例#28
0
    def __init__(
        self,
        d_model=512,
        nhead=8,
        num_encoder_layers=6,
        num_decoder_layers=6,
        dim_feedforward=2048,
        dropout=0.1,
        activation="relu",
        normalize_before=False,
        return_intermediate_dec=False,
        pass_pos_and_query=True,
        text_encoder_type="roberta-base",
        freeze_text_encoder=False,
        contrastive_loss=False,
    ):
        super().__init__()

        self.pass_pos_and_query = pass_pos_and_query
        encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before)
        encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)

        decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation, normalize_before)
        decoder_norm = nn.LayerNorm(d_model)
        self.decoder = TransformerDecoder(
            decoder_layer, num_decoder_layers, decoder_norm, return_intermediate=return_intermediate_dec
        )

        self.CLS = nn.Embedding(1, d_model) if contrastive_loss else None

        self._reset_parameters()

        self.tokenizer = RobertaTokenizerFast.from_pretrained(text_encoder_type)
        self.text_encoder = RobertaModel.from_pretrained(text_encoder_type)

        if freeze_text_encoder:
            for p in self.text_encoder.parameters():
                p.requires_grad_(False)

        self.expander_dropout = 0.1
        config = self.text_encoder.config
        self.resizer = FeatureResizer(
            input_feat_size=config.hidden_size,
            output_feat_size=d_model,
            dropout=self.expander_dropout,
        )

        self.d_model = d_model
        self.nhead = nhead
示例#29
0
    def __init__(self, config):
        super(RobertaForTokenClassification, self).__init__(config)
        self.num_labels = config.num_labels
        config.max_seq_length = 128
        self.roberta = RobertaModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.conv1d = nn.Conv1d(config.max_seq_length * config.hidden_size,
                                config.max_seq_length * config.hidden_size,
                                1,
                                stride=config.hidden_size * 8)

        self.init_weights()
示例#30
0
 def __init__(self):
     super(ResnetRobertaBU, self).__init__()
     
     self.roberta = RobertaModel.from_pretrained("roberta-large")
     
     self.resnet = models.resnet101(pretrained=True)
     
     self.feats = torch.nn.Sequential(torch.nn.Linear(1000,1024))
     self.feats2 = torch.nn.Sequential(torch.nn.LayerNorm(1024, eps=1e-12))
     
     self.boxes = torch.nn.Sequential(torch.nn.Linear(4,1024),torch.nn.LayerNorm(1024, eps=1e-12))
     
     self.dropout = torch.nn.Dropout(0.1)
     self.classifier = torch.nn.Linear(1024, 1)