示例#1
0
    def __init__(self, large, model_name, temp_dir, finetune=False):
        super(Bert, self).__init__()

        if model_name == 'bert':
            if (large):
                self.model = BertModel.from_pretrained('bert-large-uncased',
                                                       cache_dir=temp_dir)
            else:
                self.model = BertModel.from_pretrained('bert-base-uncased',
                                                       cache_dir=temp_dir)

        elif model_name == 'scibert':
            self.model = BertModel.from_pretrained(
                'allenai/scibert_scivocab_uncased', cache_dir=temp_dir)

        elif model_name == 'longformer':
            if large:
                self.model = LongformerModel.from_pretrained(
                    'allenai/longformer-large-4096', cache_dir=temp_dir)
            else:
                self.model = LongformerModel.from_pretrained(
                    'allenai/longformer-base-4096', cache_dir=temp_dir)

        self.model_name = model_name
        self.finetune = finetune
示例#2
0
def load_torch_model(model_name, device):
    torch_model_name_or_dir = (PRETRAINED_LONGFORMER_MODELS[model_name]
                               if model_name in PRETRAINED_LONGFORMER_MODELS
                               else model_name)
    model = LongformerModel.from_pretrained(torch_model_name_or_dir)
    model.to(device)
    return model
示例#3
0
    def __init__(self, config_path):
        config = configparser.ConfigParser()
        config.read(config_path)

        self.save_dir = Path(config.get("general", "save_dir"))
        if not self.save_dir.exists():
            self.save_dir.mkdir(parents=True)
        self.clf_th = config.getfloat("general", "clf_th")

        self.mlp_model_path = config.get("model", "mlp")
        assert Path(self.mlp_model_path).exists()

        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        bert_config_path = config.get("bert", "config_path")
        assert Path(bert_config_path).exists()
        self.bert_config = LongformerConfig.from_json_file(bert_config_path)
        self.max_seq_length = self.bert_config.max_position_embeddings - 2
        self.bert_tokenizer = LongformerTokenizer.from_pretrained(
            'allenai/longformer-base-4096')
        # bert_tokenizer_path = config.get("bert", "tokenizer_path")
        # assert Path(bert_config_path).exists()
        # self.bert_tokenizer = LongformerTokenizer.from_pretrained(bert_tokenizer_path)
        bert_model_path = config.get("bert", "model_path")
        assert Path(bert_model_path).exists()
        self.bert_model = LongformerModel.from_pretrained(
            bert_model_path, config=self.bert_config)
        self.bert_model.to(self.device)
        self.bert_model.eval()

        gold_dir = Path(config.get("data", "gold_dir"))
        assert Path(gold_dir).exists()
        self.gold_dataset = ConllDataset(gold_dir)
        target_dir = Path(config.get("data", "target_dir"))
        assert Path(target_dir).exists()
        self.target_dataset = ConllDataset(target_dir)
def convert_longformer_qa_checkpoint_to_pytorch(
        longformer_model: str, longformer_question_answering_ckpt_path: str,
        pytorch_dump_folder_path: str):

    # load longformer model from model identifier
    longformer = LongformerModel.from_pretrained(longformer_model)
    lightning_model = LightningModel(longformer)

    ckpt = torch.load(longformer_question_answering_ckpt_path,
                      map_location=torch.device("cpu"))
    lightning_model.load_state_dict(ckpt["state_dict"])

    # init longformer question answering model
    longformer_for_qa = LongformerForQuestionAnswering.from_pretrained(
        longformer_model)

    # transfer weights
    longformer_for_qa.longformer.load_state_dict(
        lightning_model.model.state_dict())
    longformer_for_qa.qa_outputs.load_state_dict(
        lightning_model.qa_outputs.state_dict())
    longformer_for_qa.eval()

    # save model
    longformer_for_qa.save_pretrained(pytorch_dump_folder_path)

    print(
        f"Conversion successful. Model saved under {pytorch_dump_folder_path}")
示例#5
0
    def __init__(self,
                 pretrained: str,
                 max_query_len: int,
                 max_doc_len: int,
                 mode: str = 'cls',
                 task: str = 'ranking') -> None:
        super(LongformerMaxp, self).__init__()
        self._pretrained = pretrained
        self._max_query_len = max_query_len
        self._max_doc_len = max_doc_len
        self._mode = mode
        self._task = task
        self._config = LongformerConfig.from_pretrained(self._pretrained)
        self._config.attention_mode = 'sliding_chunks'
        self._config.gradient_checkpointing = 'True'
        #print("attention_mode: "+self._config.attention_mode)
        self._model = LongformerModel.from_pretrained(self._pretrained,
                                                      config=self._config)
        self._activation = nn.ReLU()
        self.dense = nn.Linear(self._config.hidden_size, 128)
        self.dropout = nn.Dropout(self._config.hidden_dropout_prob)
        self.out_proj = nn.Linear(128, 2)

        if self._task == 'ranking':
            self._dense2 = nn.Linear(128, 1)
        elif self._task == 'classification':
            self._dense2 = nn.Linear(128, 2)
        else:
            raise ValueError('Task must be `ranking` or `classification`.')
def main(dataset_directory, jsonlines_filename):
    dataset, ids, images = extract_article_list(
        os.path.join(dataset_directory, jsonlines_filename))
    print(f'Len dataset = {len(dataset)}')

    text_model = LongformerModel.from_pretrained(
        "allenai/longformer-base-4096").to("cuda")
    text_model.eval()
    tokenizer = LongformerTokenizer.from_pretrained(
        "allenai/longformer-base-4096")

    # pool = Pool(processes=48)
    # processed_text = list(tqdm(pool.map(process_text, dataset), total=len(dataset)))
    # pool.close()
    batch_size = 8
    all_embeddings_avg = np.zeros((len(dataset), 768), dtype=np.float)
    for i, chunk in tqdm(enumerate(chunks(dataset, batch_size)),
                         total=len(dataset) / batch_size):
        with torch.no_grad():
            tokenized_text = tokenizer(chunk,
                                       return_tensors="pt",
                                       truncation=True,
                                       padding="max_length")
            model_out = text_model(**(tokenized_text.to("cuda")))
            all_embeddings_avg[i * batch_size:i * batch_size +
                               len(chunk), :] = torch.mean(
                                   model_out[0], dim=1).cpu().numpy()

    data_df = pd.DataFrame(zip(ids, images, all_embeddings_avg))
    data_df.to_pickle(
        os.path.join(dataset_directory,
                     f"longformer_{jsonlines_filename.split('.')[0]}.pkl"))
示例#7
0
    def __init__(self, model_name='', n_class = 50,probing=False):
        super().__init__()
        #Transformers Encoder
        
        if model_name=="Bert_base":
            self.model=BertModel.from_pretrained('bert-base-uncased')
        elif model_name=="Longformer_base":
            self.model= LongformerModel.from_pretrained('allenai/longformer-base-4096')
        else:
            self.model = AutoModel.from_pretrained(model_name)

        # !!! different layers
        self.probing=probing
        if self.probing:
            for child in self.model.children():
                for param in child.parameters():
                    param.requires_grad = False

        #hyperparams
        self.model_name = model_name
        self.c = n_class
        self.hid = self.model.config.hidden_size

        #model blocks
        self.fc = nn.Linear(self.hid, self.c)
    def test_layer_local_attn(self):
        model = LongformerModel.from_pretrained("patrickvonplaten/longformer-random-tiny")
        model.eval()
        layer = model.encoder.layer[0].attention.self.to(torch_device)
        hidden_states = self._get_hidden_states()
        batch_size, seq_length, hidden_size = hidden_states.size()
        attention_mask = torch.zeros((batch_size, seq_length), dtype=torch.float32, device=torch_device)
        attention_mask[:, -2:] = -10000

        is_index_masked = attention_mask < 0
        is_index_global_attn = attention_mask > 0
        is_global_attn = is_index_global_attn.flatten().any().item()

        output_hidden_states = layer(
            hidden_states,
            attention_mask=attention_mask,
            is_index_masked=is_index_masked,
            is_index_global_attn=is_index_global_attn,
            is_global_attn=is_global_attn,
        )[0]

        self.assertTrue(output_hidden_states.shape, (1, 4, 8))
        self.assertTrue(
            torch.allclose(
                output_hidden_states[0, 1],
                torch.tensor(
                    [0.0019, 0.0122, -0.0171, -0.0256, -0.0300, 0.0173, -0.0115, 0.0048],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            )
        )
    def test_layer_global_attn(self):
        model = LongformerModel.from_pretrained(
            "patrickvonplaten/longformer-random-tiny")
        model.eval()
        layer = model.encoder.layer[0].attention.self.to(torch_device)
        hidden_states = torch.cat(
            [self._get_hidden_states(),
             self._get_hidden_states() - 0.5],
            dim=0)
        batch_size, seq_length, hidden_size = hidden_states.size()
        attention_mask = torch.zeros((batch_size, seq_length),
                                     dtype=torch.float32,
                                     device=torch_device)

        # create attn mask
        attention_mask[0, -2:] = 10000.0
        attention_mask[0, -1:] = -10000.0
        attention_mask[1, 1:] = 10000.0

        is_index_masked = attention_mask < 0
        is_index_global_attn = attention_mask > 0
        is_global_attn = is_index_global_attn.flatten().any().item()

        output_hidden_states = layer(
            hidden_states,
            attention_mask=attention_mask,
            is_index_masked=is_index_masked,
            is_index_global_attn=is_index_global_attn,
            is_global_attn=is_global_attn,
        )[0]

        self.assertTrue(output_hidden_states.shape, (2, 4, 8))

        self.assertTrue(
            torch.allclose(
                output_hidden_states[0, 2],
                torch.tensor(
                    [
                        -0.0651, -0.0393, 0.0309, -0.0342, -0.0066, -0.0155,
                        -0.0209, -0.0494
                    ],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            ))

        self.assertTrue(
            torch.allclose(
                output_hidden_states[1, -2],
                torch.tensor(
                    [
                        -0.0405, -0.0384, 0.0396, -0.0374, -0.0341, 0.0136,
                        0.0014, -0.0571
                    ],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            ))
    def test_inference_no_head_long(self):
        model = LongformerModel.from_pretrained("allenai/longformer-base-4096")
        model.to(torch_device)

        # 'Hello world! ' repeated 1000 times
        input_ids = torch.tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]],
                                 dtype=torch.long,
                                 device=torch_device)  # long input

        attention_mask = torch.ones(input_ids.shape,
                                    dtype=torch.long,
                                    device=input_ids.device)
        global_attention_mask = torch.zeros(input_ids.shape,
                                            dtype=torch.long,
                                            device=input_ids.device)
        global_attention_mask[:, [
            1, 4, 21
        ]] = 1  # Set global attention on a few random positions

        output = model(input_ids,
                       attention_mask=attention_mask,
                       global_attention_mask=global_attention_mask)[0]

        expected_output_sum = torch.tensor(74585.8594, device=torch_device)
        expected_output_mean = torch.tensor(0.0243, device=torch_device)
        self.assertTrue(
            torch.allclose(output.sum(), expected_output_sum, atol=1e-4))
        self.assertTrue(
            torch.allclose(output.mean(), expected_output_mean, atol=1e-4))
示例#11
0
 def __init__(self):
     self.model = LongformerModel.from_pretrained(
         'allenai/longformer-base-4096')
     self.tokenizer = LongformerTokenizer.from_pretrained(
         'allenai/longformer-base-4096')
     self.led_tokenizer = LEDTokenizer.from_pretrained(
         'allenai/led-base-16384')
     self.led_model = LEDModel.from_pretrained('allenai/led-base-16384')
示例#12
0
 def __init__(self, h_dim=768, **kwargs):
     super().__init__(**kwargs)
     #         self.data_processor = data_processor
     self.Longformer = LongformerModel.from_pretrained(
         'allenai/longformer-base-4096')
     self.testing = False
     self.training = True
     self.dropout = nn.Dropout(0.5)
     self.proj_layer = nn.Linear(h_dim, 1)
示例#13
0
    def train(self, x, y=None):
        logging.info("Building vectorizer on " + self.__class__.__name__)
        t0 = time.time()

        processed_dataset = [clean_string_longformer(entry) for entry in x]

        train_dataset = load_custom_dataset(
            self.tokenizer,
            processed_dataset,
            y,
            "train",
            self.input_length,
        )
        test_dataset = load_custom_dataset(
            self.tokenizer,
            processed_dataset,
            y,
            "test",
            self.input_length,
        )

        today = date.today()
        date_string = today.strftime("%d_%m_%Y")
        time_string = today.strftime("%H_%M_%S")

        training_args = TrainingArguments(
            output_dir=f'./results/{date_string}',
            num_train_epochs=self.epochs,
            per_device_train_batch_size=self.batch_size,
            per_device_eval_batch_size=self.batch_size,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir=f'./logs/{date_string}',
            load_best_model_at_end=True,
            fp16=False,
            fp16_opt_level="O2",
            evaluation_strategy="epoch",
            metric_for_best_model="eval_loss",
            greater_is_better=False,
        )

        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
        )

        trainer.train()
        self.model = trainer.model
        model_path = os.path.join(self.save_directory, self.dataset_name,
                                  f"{self.model_name}_{time_string}")
        self.model.save_pretrained(model_path)
        self.model = LongformerModel.from_pretrained(model_path)
        self.model.to("cuda")
        elapsed = (time.time() - t0)
        logging.info("Done in %.3fsec" % elapsed)
    def make_dataset(self, data_root: str) -> None:
        """ Make Dataset
        Make dataset from json files and save it as csv.

        Args:
            data_root: Root directory for document json files.
        """

        log.info(f"Making dataset...")
        json_paths = glob.glob(f"{data_root}/**/*.json", recursive=True)

        # nltk settings
        nltk.download('punkt')
        stemmer = PorterStemmer()
        cv = CountVectorizer()
        texts = [] # A list of tokenized texts separated by half-width characters

        # Longformer
        feature_matrix = []
        device = torch.device('cuda')
        tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
        model = LongformerModel.from_pretrained('allenai/longformer-base-4096').to(device)
        for json_path in tqdm(json_paths):
            with open(json_path) as f:
                json_obj = json.load(f)
                body = json_obj["body"]

                soup = BeautifulSoup(body, "html.parser")
                for script in soup(["script", "style"]):
                    script.decompose()
                text = soup.get_text()

                with torch.no_grad():
                    input_ids = torch.tensor(tokenizer.encode(text)).unsqueeze(0).to(device)
                    attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device).to(device)
                    global_attention_mask = torch.zeros(input_ids.shape, dtype=torch.long, device=input_ids.device).to(device)
                    outputs = model(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask)

                    vec = outputs.last_hidden_state[0].cpu().detach().clone().numpy().mean(0)
                # np.append(feature_matrix, vec)
                feature_matrix.append(list(vec))
                # log.info(f"Done: {len(feature_matrix)}")

                
        feature_matrix = np.array(feature_matrix)
        log.info(f"Longformer: {feature_matrix.shape}")

        # Calculate distance matrix
        dist_mat = squareform(pdist(feature_matrix, metric='cosine'))

        df = pd.DataFrame(dist_mat)
        df.to_csv(join(self.cache_path, "json_document_longformer.csv"), index=False)
        log.info(f"Successfully made dataset.")
 def __init__(self, input_size):
     super().__init__()
     self.activation = torch.nn.SELU()
     self.dropout = torch.nn.Dropout(p=0.1)
     self.projector_1 = torch.nn.Linear(input_size, 512)
     self.projector_2 = torch.nn.Linear(512, 256)
     self.text_model = LongformerModel.from_pretrained(
         "allenai/longformer-base-4096")
     fine_tune_layers = 3
     for i, (name, param) in enumerate(self.text_model.named_parameters()):
         if i == (12 - fine_tune_layers) * 22 + 5:
             break
         param.requires_grad = False
示例#16
0
    def test_inference_no_head(self):
        model = LongformerModel.from_pretrained("allenai/longformer-base-4096")
        model.to(torch_device)

        # 'Hello world!'
        input_ids = torch.tensor([[0, 20920, 232, 328, 1437, 2]], dtype=torch.long, device=torch_device)
        attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device)
        output = model(input_ids, attention_mask=attention_mask)[0]
        output_without_mask = model(input_ids)[0]

        expected_output_slice = torch.tensor([0.0549, 0.1087, -0.1119, -0.0368, 0.0250], device=torch_device)
        self.assertTrue(torch.allclose(output[0, 0, -5:], expected_output_slice, atol=1e-4))
        self.assertTrue(torch.allclose(output_without_mask[0, 0, -5:], expected_output_slice, atol=1e-4))
示例#17
0
def main(args):
    model_name = args.model
    onnx_model_path = model_name + ".onnx"

    from transformers import LongformerModel
    model = LongformerModel.from_pretrained(PRETRAINED_LONGFORMER_MODELS[model_name])

    export_longformer(model, onnx_model_path, args.export_padding)

    if args.optimize_onnx or args.precision != 'fp32':
        fp32_model_path = model_name + "_fp32.onnx"
        fp16_model_path = model_name + "_fp16.onnx" if args.precision == 'fp16' else None
        optimize_longformer(onnx_model_path, fp32_model_path, fp16_model_path)
def test_all(args):
    # Currently, the longformer attention operator could only run in GPU (no CPU implementation yet).
    device = torch.device('cuda:0')

    results = []
    for model_name in args.models:
        # Here we run an example input
        from transformers import LongformerModel
        torch_model_name_or_dir = MODELS[model_name]
        model = LongformerModel.from_pretrained(
            torch_model_name_or_dir)  # pretrained model name or directory
        model.to(device)

        # Search onnx model in the following order: optimized fp16 model, optimized fp32 model, raw model
        optimized = False
        precision = 'fp32'
        onnx_model_path = model_name + ".onnx"
        optimized_fp32_model = model_name + "_fp32.onnx"
        optimized_fp16_model = model_name + "_fp16.onnx"
        import os.path
        if os.path.isfile(optimized_fp16_model):
            onnx_model_path = optimized_fp16_model
            optimized = True
            precision = 'fp16'
        elif os.path.isfile(optimized_fp32_model):
            onnx_model_path = optimized_fp32_model
            optimized = True

        for num_threads in args.num_threads:
            if "torch" in args.engines:
                results += test_torch(device, model, model_name,
                                      args.batch_sizes, args.sequence_lengths,
                                      args.global_lengths, args.test_times,
                                      num_threads)

            if "onnxruntime" in args.engines:
                session = benchmark_helper.create_onnxruntime_session(
                    onnx_model_path,
                    use_gpu=True,
                    enable_all_optimization=True,
                    num_threads=num_threads)
                results += test_onnxruntime(device, model, model_name, session,
                                            args.batch_sizes,
                                            args.sequence_lengths,
                                            args.global_lengths,
                                            args.test_times, num_threads,
                                            optimized, precision)
    return results
示例#19
0
def main(args):
    model_name = args.model
    onnx_model_path = model_name + ".onnx"

    global weight_bias_format
    weight_bias_format = 0 if args.no_merge_qkv else 1

    model = LongformerModel.from_pretrained(
        PRETRAINED_LONGFORMER_MODELS[model_name])

    export_longformer(model, onnx_model_path, args.export_padding)

    if args.optimize_onnx or args.precision != "fp32":
        fp32_model_path = model_name + f"_f{weight_bias_format}" + "_fp32.onnx"
        fp16_model_path = model_name + f"_f{weight_bias_format}" + "_fp16.onnx" if args.precision == "fp16" else None
        optimize_longformer(onnx_model_path, fp32_model_path, fp16_model_path)
示例#20
0
 def __init__(self, params):
     super(LongEntityLinkerModule, self).__init__()
     self.params = params
     if params['use_longformer']:
         self.ctxt_encoder = LongformerModel.from_pretrained(
             'allenai/longformer-base-4096')
         longformer_output_dim = self.ctxt_encoder.embeddings.word_embeddings.weight.size(
             1)
         self.NULL_IDX = 0
     else:
         self.ctxt_encoder = BertModel.from_pretrained('bert-base-uncased')
         self.NULL_IDX = 0
         longformer_output_dim = self.ctxt_encoder.embeddings.word_embeddings.weight.size(
             1)
     self.config = self.ctxt_encoder.config
     self.linear_compression = None
     if longformer_output_dim != self.params['cand_emb_dim']:
         self.linear_compression = nn.Linear(longformer_output_dim,
                                             self.params['cand_emb_dim'])
示例#21
0
    def __init__(self, params):
        super().__init__()
        
        if 'dropout' in params:
            self.dropout = nn.Dropout(p=params['dropout'])
        else:
            self.dropout = None
            
#         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=False, do_basic_tokenize=False)
#         self.bert = BertModel.from_pretrained("bert-base-uncased")

        self.max_length = params['max_length'] if 'max_length' in params else 1024
        self.max_memory_size = params['max_memory_size']
        
        self.tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
        self.bert = LongformerModel.from_pretrained("allenai/longformer-base-4096", gradient_checkpointing=True)

        self.num_labels = params["label_length"] if 'label_length' in params else 2

        self.fc = nn.Linear(768, self.num_labels)
示例#22
0
 def __init__(self, params):
     super(LongEncoderModule, self).__init__()
     self.params = params
     if params['use_longformer']:
         self.ctxt_encoder = LongformerModel.from_pretrained('allenai/longformer-base-4096')
         longformer_output_dim = self.ctxt_encoder.embeddings.word_embeddings.weight.size(1)
         self.NULL_IDX = 0
     else:
         # temporary change to large cased SpanBert for test
         #self.ctxt_encoder = BertModel.from_pretrained('bert-base-uncased')
         self.ctxt_encoder = BertModel.from_pretrained('../models/spanbert_hf_base')
         self.NULL_IDX = 0
         longformer_output_dim = self.ctxt_encoder.embeddings.word_embeddings.weight.size(1)
     #num_tags = 4 if not self.params['end_tag'] else 5
     #num_tags = 3 if not self.params['end_tag'] else 4
     num_tags = 9 if self.params['conll'] else 3
     self.config = self.ctxt_encoder.config
     self.tagger = LongTagger(longformer_output_dim, num_tags, self.params['classifier'])
     self.linear_compression = None
     if longformer_output_dim != self.params['cand_emb_dim']:
         self.linear_compression = nn.Linear(longformer_output_dim, self.params['cand_emb_dim'])
示例#23
0
    def load_model(self):
        if 'longformer' in self.args.model_path:
            model = LongformerModel.from_pretrained(self.args.model_path)
            for layer in model.encoder.layer:
                layer.attention.self.attention_mode = self.args.attention_mode
                self.args.attention_window = 512 # layer.attention.self.attention_window
        elif self.args.model_path in ['bart.large', 'bart.base']:
            model = torch.hub.load('pytorch/fairseq', self.args.model_path)
            model.config = model.args
            model.config.hidden_size = model.config.decoder_output_dim
        elif 'bart' in self.args.model_path and 'base' in self.args.model_path:
            config = AutoConfig.from_pretrained(self.args.model_path)
            config.encoder_attention_heads = 12
            config.decoder_attention_heads = 12
            config.attention_dropout = 0.1
            if self.args.seq2seq:
                model = AutoModelWithLMHead.from_pretrained(self.args.model_path, config=config)
            else:
                model = AutoModel.from_pretrained(self.args.model_path, config=config)
        elif 'bart' in self.args.model_path and 'large' in self.args.model_path:
            config = AutoConfig.from_pretrained(self.args.model_path)
            config.attention_dropout = 0.1
            config.gradient_checkpointing = True
            if self.args.seq2seq:
                model = AutoModelWithLMHead.from_pretrained(self.args.model_path, config=config)
            else:
                model = AutoModel.from_pretrained(self.args.model_path, config=config)
        else:
            model = AutoModel.from_pretrained(self.args.model_path)

        print("Loaded model with config:")
        print(model.config)

        for p in model.parameters():
            p.requires_grad_(True)
        model.train()
        return model
示例#24
0
 def __init__(self):
     super(Model, self).__init__()
     self.model = LongformerModel.from_pretrained(model_config.pretrain_model_path, gradient_checkpointing=True)
     self.config = self.model.config
     self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
     self.classifier = nn.Linear(self.config.hidden_size, config.num_labels)
    def test_layer_attn_probs(self):
        model = LongformerModel.from_pretrained(
            "patrickvonplaten/longformer-random-tiny")
        model.eval()
        layer = model.encoder.layer[0].attention.self.to(torch_device)
        hidden_states = torch.cat(
            [self._get_hidden_states(),
             self._get_hidden_states() - 0.5],
            dim=0)
        batch_size, seq_length, hidden_size = hidden_states.size()
        attention_mask = torch.zeros((batch_size, seq_length),
                                     dtype=torch.float32,
                                     device=torch_device)

        # create attn mask
        attention_mask[0, -2:] = 10000.0
        attention_mask[0, -1:] = -10000.0
        attention_mask[1, 1:] = 10000.0

        is_index_masked = attention_mask < 0
        is_index_global_attn = attention_mask > 0
        is_global_attn = is_index_global_attn.flatten().any().item()

        output_hidden_states, local_attentions, global_attentions = layer(
            hidden_states,
            attention_mask=attention_mask,
            is_index_masked=is_index_masked,
            is_index_global_attn=is_index_global_attn,
            is_global_attn=is_global_attn,
            output_attentions=True,
        )

        self.assertEqual(local_attentions.shape, (2, 4, 2, 8))
        self.assertEqual(global_attentions.shape, (2, 2, 3, 4))

        # All tokens with global attention have weight 0 in local attentions.
        self.assertTrue(torch.all(local_attentions[0, 2:4, :, :] == 0))
        self.assertTrue(torch.all(local_attentions[1, 1:4, :, :] == 0))

        # The weight of all tokens with local attention must sum to 1.
        self.assertTrue(
            torch.all(
                torch.abs(global_attentions[0, :, :2, :].sum(dim=-1) -
                          1) < 1e-6))
        self.assertTrue(
            torch.all(
                torch.abs(global_attentions[1, :, :1, :].sum(dim=-1) -
                          1) < 1e-6))

        self.assertTrue(
            torch.allclose(
                local_attentions[0, 0, 0, :],
                torch.tensor(
                    [
                        0.3328, 0.0000, 0.0000, 0.0000, 0.0000, 0.3355, 0.3318,
                        0.0000
                    ],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            ))

        self.assertTrue(
            torch.allclose(
                local_attentions[1, 0, 0, :],
                torch.tensor(
                    [
                        0.2492, 0.2502, 0.2502, 0.0000, 0.0000, 0.2505, 0.0000,
                        0.0000
                    ],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            ))

        # All the global attention weights must sum to 1.
        self.assertTrue(
            torch.all(torch.abs(global_attentions.sum(dim=-1) - 1) < 1e-6))

        self.assertTrue(
            torch.allclose(
                global_attentions[0, 0, 1, :],
                torch.tensor(
                    [0.2500, 0.2500, 0.2500, 0.2500],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            ))

        self.assertTrue(
            torch.allclose(
                global_attentions[1, 0, 0, :],
                torch.tensor(
                    [0.2497, 0.2500, 0.2499, 0.2504],
                    dtype=torch.float32,
                    device=torch_device,
                ),
                atol=1e-3,
            ))
示例#26
0
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

import gensim
from torch import nn as nn

from config import SBERT_MODEL_NAME
from utils.types import FolkLoreData, FolkLoreEmb, FolkLoreEmbCoarse

nlp = en_core_web_sm.load()
sbert_model = SentenceTransformer(SBERT_MODEL_NAME)

from transformers import LongformerModel, LongformerTokenizerFast, LongformerConfig

LFconfig = LongformerConfig.from_pretrained('allenai/longformer-base-4096')
LF_model = LongformerModel.from_pretrained('allenai/longformer-base-4096',
                                           config=LFconfig)
LF_tokenizer = LongformerTokenizerFast.from_pretrained(
    'allenai/longformer-base-4096')
LF_tokenizer.model_max_length = LF_model.config.max_position_embeddings


class MatrixVectorScaledDotProductAttention(nn.Module):
    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = nn.Dropout(attn_dropout)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, q, k, v, mask=None):
        """
		q: tensor of shape (n*b, d_k)
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long,
                                device=input_ids.device)  # TODO: use random word ID. #TODO: simulate masked word
    global_attention_mask = torch.zeros(input_ids.shape, dtype=torch.long, device=input_ids.device)
    if num_global_tokens > 0:
        global_token_index = list(range(num_global_tokens))
        global_attention_mask[:, global_token_index] = 1
    # TODO: support more inputs like token_type_ids, position_ids
    return input_ids, attention_mask, global_attention_mask

args = parse_arguments()

model_name = args.model
onnx_model_path = model_name + ".onnx"

from transformers import LongformerModel
model = LongformerModel.from_pretrained(MODELS[model_name]) # pretrained model name or directory

input_ids, attention_mask, global_attention_mask = get_dummy_inputs(sequence_length=args.sequence_length, num_global_tokens=args.global_length, device=torch.device('cpu'))

example_outputs = model(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask)

# A new function to replace LongformerSelfAttention.forward
#For transformers 4.0
def my_longformer_self_attention_forward_4(self, hidden_states, attention_mask=None, is_index_masked=None, is_index_global_attn=None, is_global_attn=None):
    # TODO: move mask calculation to LongFormerModel class to avoid calculating it again and again in each layer.
    global_mask = is_index_global_attn.int()
    torch.masked_fill(attention_mask, is_index_global_attn, 0.0)

    weight = torch.stack((self.query.weight.transpose(0,1), self.key.weight.transpose(0,1), self.value.weight.transpose(0,1)), dim=1)
    weight = weight.reshape(self.embed_dim, 3*self.embed_dim)
示例#28
0
 def __init__(self, model_name: str = "allenai/longformer-base-4096"):
     self.model = LongformerModel.from_pretrained(model_name)
     self.tokenizer = LongformerTokenizer.from_pretrained(model_name)
示例#29
0
from transformers import ElectraForMaskedLM, ElectraTokenizer
tokenizer = ElectraTokenizer.from_pretrained('google/electra-small-generator')
model = ElectraForMaskedLM.from_pretrained('google/electra-small-generator')

input_ids = torch.tensor(
    tokenizer.encode("Hello, my dog is cute",
                     add_special_tokens=True)).unsqueeze(0)  # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)

loss, prediction_scores = outputs[:2]
print(prediction_scores)

## Longformer
from transformers import LongformerModel, LongformerTokenizer

model = LongformerModel.from_pretrained('longformer-base-4096')
tokenizer = LongformerTokenizer.from_pretrained('longformer-base-4096')

SAMPLE_TEXT = ' '.join(['Hello world! '] * 1000)  # long input document
input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(
    0)  # batch of size 1

# Attention mask values -- 0: no attention, 1: local attention, 2: global attention
attention_mask = torch.ones(
    input_ids.shape, dtype=torch.long,
    device=input_ids.device)  # initialize to local attention
attention_mask[:, [
    1,
    4,
    21,
]] = 2  # Set global attention based on the task. For example,
示例#30
0
print(df.target.value_counts())

import torch
from transformers import DistilBertTokenizerFast, DistilBertModel, DistilBertConfig
from transformers import LongformerTokenizerFast, LongformerModel, LongformerConfig

#model_name = 'distilbert-base-uncased'
model_name = 'allenai/longformer-base-4096'
tokenizer = LongformerTokenizerFast.from_pretrained(model_name)

df["vecs"] = df.text.map(
    lambda x: torch.LongTensor(tokenizer.encode(x)).unsqueeze(0))

config = LongformerConfig.from_pretrained(model_name,
                                          output_hidden_states=True)
model = LongformerModel.from_pretrained(model_name, config=config)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'

model = model.to(device)
input_tf = tokenizer.batch_encode_plus(df.text.to_list(),
                                       return_tensors='pt',
                                       padding=True)
#vecs = input_tf['input_ids'].to(device)
#granola_ids = granola_ids.to(device)

model.eval()

with torch.no_grad():
    print("and GO!!!!")