示例#1
0
def convert_xlnet_checkpoint_to_pytorch(
    tf_checkpoint_path, bert_config_file, pytorch_dump_folder_path, finetuning_task=None
):
    # Initialise PyTorch model
    config = XLNetConfig.from_json_file(bert_config_file)

    finetuning_task = finetuning_task.lower() if finetuning_task is not None else ""
    if finetuning_task in GLUE_TASKS_NUM_LABELS:
        print(f"Building PyTorch XLNetForSequenceClassification model from configuration: {config}")
        config.finetuning_task = finetuning_task
        config.num_labels = GLUE_TASKS_NUM_LABELS[finetuning_task]
        model = XLNetForSequenceClassification(config)
    elif "squad" in finetuning_task:
        config.finetuning_task = finetuning_task
        model = XLNetForQuestionAnswering(config)
    else:
        model = XLNetLMHeadModel(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_xlnet(model, config, tf_checkpoint_path)

    # Save pytorch-model
    pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME)
    pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME)
    print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}")
    torch.save(model.state_dict(), pytorch_weights_dump_path)
    print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}")
    with open(pytorch_config_dump_path, "w", encoding="utf-8") as f:
        f.write(config.to_json_string())
def load_xlnet(task_type,
               xlnet_model_name,
               xlnet_load_mode,
               all_state,
               num_labels,
               xlnet_config_json_path=None):
    if xlnet_config_json_path is None:
        xlnet_config_json_path = os.path.join(
            get_xlnet_config_path(xlnet_model_name), "xlnet_config.json")
    if xlnet_load_mode in ("model_only", "full_model_only"):
        state_dict = all_state
    elif xlnet_load_mode in [
            "state_model_only", "state_all", "state_full_model"
    ]:
        state_dict = all_state["model"]
    else:
        raise KeyError(xlnet_load_mode)

    if task_type == TaskType.CLASSIFICATION:
        if xlnet_load_mode in ("state_full_model", "full_model_only"):
            model = XLNetForSequenceClassification.from_state_dict_full(
                config_file=
                xlnet_config_json_path,  # need to figure out what the config file is
                state_dict=state_dict,
                num_labels=num_labels,
            )
        else:
            model = XLNetForSequenceClassification.from_state_dict(
                config_file=xlnet_config_json_path,
                state_dict=state_dict,
                num_labels=num_labels,
            )
    else:
        raise KeyError(task_type)
    return model
示例#3
0
    def __init__(
        self,
        language=Language.ENGLISHCASED,
        num_labels=5,
        cache_dir=".",
        num_gpus=None,
        num_epochs=1,
        batch_size=8,
        lr=5e-5,
        adam_eps=1e-8,
        warmup_steps=0,
        weight_decay=0.0,
        max_grad_norm=1.0,
    ):
        """Initializes the classifier and the underlying pretrained model.

        Args:
            language (Language, optional): The pretrained model's language.
                                           Defaults to 'xlnet-base-cased'.
            num_labels (int, optional): The number of unique labels in the
                training data. Defaults to 5.
            cache_dir (str, optional): Location of XLNet's cache directory.
                Defaults to ".".
            num_gpus (int, optional): The number of gpus to use.
                                      If None is specified, all available GPUs
                                      will be used. Defaults to None.
            num_epochs (int, optional): Number of training epochs.
                Defaults to 1.
            batch_size (int, optional): Training batch size. Defaults to 8.
            lr (float): Learning rate of the Adam optimizer. Defaults to 5e-5.
            adam_eps (float, optional): term added to the denominator to improve
                                        numerical stability. Defaults to 1e-8.
            warmup_steps (int, optional): Number of steps in which to increase
                                        learning rate linearly from 0 to 1. Defaults to 0.
            weight_decay (float, optional): Weight decay. Defaults to 0.
            max_grad_norm (float, optional): Maximum norm for the gradients. Defaults to 1.0
        """

        if num_labels < 2:
            raise ValueError("Number of labels should be at least 2.")

        self.language = language
        self.num_labels = num_labels
        self.cache_dir = cache_dir

        self.num_gpus = num_gpus
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.lr = lr
        self.adam_eps = adam_eps
        self.warmup_steps = warmup_steps
        self.weight_decay = weight_decay
        self.max_grad_norm = max_grad_norm

        # create classifier
        self.config = XLNetConfig.from_pretrained(
            self.language.value, num_labels=num_labels, cache_dir=cache_dir
        )
        self.model = XLNetForSequenceClassification(self.config)
 def loadModel(self, filepath):
     """
     -Function to load model with saved states(parameters)
     -Args:
         filpath (str): path to the saved model
     -Return:
         model
     """
     saved = torch.load(filepath, map_location='cpu')
     state_dict = saved['state_dict']
     config = XLNetConfig(num_labels = 2)
     model = XLNetForSequenceClassification(config)
     # loading the trained parameters with model
     model.load_state_dict(state_dict)
     return model
示例#5
0
def train(data_loader, epochs=3):
    """
    Given the data_loader, it fine-tunes BERT for the specific task.
    The BERT authors recommend between 2 and 4 training epochs.

    Returns fine-tuned BERT model.
    """
    model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased",
                                                           num_labels=2)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay_rate':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay_rate':
        0.0
    }]

    # This variable contains all of the hyperparemeter information our training loop needs
    optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5)

    train_loss_set = []

    # trange is a tqdm wrapper around the normal python range
    for _ in trange(epochs, desc="Epoch"):
        model.train()

        # Tracking variables
        tr_loss, nb_tr_examples, nb_tr_steps = 0, 0, 0

        for batch in data_loader:
            batch = tuple(t.to(device) for t in batch)
            optimizer.zero_grad(
            )  # clears any previously calculated gradients before performing a backward pass

            b_input_ids, b_input_mask, b_labels = batch
            outputs = model(b_input_ids,
                            token_type_ids=None,
                            attention_mask=b_input_mask,
                            labels=b_labels)
            loss = outputs[0]
            logits = outputs[1]
            train_loss_set.append(loss.item())

            loss.backward()
            optimizer.step()

            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1

    print("Train loss: {}".format(tr_loss / nb_tr_steps))
    return model
示例#6
0
 def __init__(self):
     super(XlnetModel, self).__init__()
     self.xlnet = XLNetForSequenceClassification.from_pretrained(
         "hfl/chinese-xlnet-base", num_labels=2)  # /bert_pretrain/
     self.device = torch.device("cuda")
     for param in self.xlnet.parameters():
         param.requires_grad = True  # 每个参数都要 求梯度
示例#7
0
    def __init__(self, batchsize=16, max_len=64):
        RANDOM_SEED = 42
        np.random.seed(RANDOM_SEED)
        torch.manual_seed(RANDOM_SEED)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print('device {}'.format(device))

        model = XLNetForSequenceClassification.from_pretrained(
            'xlnet-base-cased', num_labels=3)
        model = model.to(device)

        self.device = device
        self.model = model

        PRE_TRAINED_MODEL_NAME = 'xlnet-base-cased'
        self.tokenizer = XLNetTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

        self.test_size = 0.5
        self.random_state = 101
        self.MAX_LEN = max_len
        self.BATCH_SIZE = batchsize
        self.EPOCHS = 10
        self.num_data_workers = 4
        self.model_file = './models/xlnet_model_batch{}.bin'.format(batchsize)
        self.class_names = ['positive', 'negative', 'neutral']
        #self.class_names = ['positive', 'negative']

        self.columns = None
示例#8
0
    def __init__(self,
                 xlnet_pretrained_model="xlnet-base-cased",
                 xlnet_pretrained_tokenizer=None,
                 train_batch_size=8,
                 eval_batch_size=8,
                 num_labels=2,
                 learning_rate=3e-5,
                 train_dset=None,
                 eval_dset=None):

        # define hyperparameters
        self.train_batch_size = train_batch_size
        self.eval_batch_size = eval_batch_size
        self.num_labels = num_labels

        # loading pre-trained models
        self.model = XLNetForSequenceClassification.from_pretrained(
            xlnet_pretrained_model, num_labels=num_labels).to(self.DEVICE)
        self.tokenizer = XLNetTokenizer.from_pretrained(xlnet_pretrained_model)

        # creating / loading datasets
        self.train_dset = train_dset
        self.eval_dset = eval_dset
        self.train_loader = DataLoader(self.train_dset,
                                       batch_size=self.train_batch_size,
                                       shuffle=True)
        self.eval_loader = DataLoader(self.eval_dset,
                                      batch_size=self.eval_batch_size)
def Get_Model(modelName):
    model = ''
    if modelName == 'XLNet':
        model = XLNetForSequenceClassification.from_pretrained(
            # Use the 12-layer BERT model, with an uncased vocab.
            pretrained_model_path,
            # The number of output labels--2 for binary classification.
            num_labels=2)
    elif modelName == 'BERT':
        model = BertForSequenceClassification.from_pretrained(
            # Use the 12-layer BERT model, with an uncased vocab.
            pretrained_model_path,
            # The number of output labels--2 for binary classification.
            num_labels=2)
    elif modelName == 'RoBerta':
        model = RobertaForSequenceClassification.from_pretrained(
            # Use the 12-layer BERT model, with an uncased vocab.
            pretrained_model_path,
            # The number of output labels--2 for binary classification.
            num_labels=2)
    elif modelName == 'Albert':
        model = AlbertForSequenceClassification.from_pretrained(
            # Use the 12-layer BERT model, with an uncased vocab.
            pretrained_model_path,
            # The number of output labels--2 for binary classification.
            num_labels=2)
    return model
示例#10
0
 def __init__(self, config):
     super(Model, self).__init__()
     model_config = XLNetConfig.from_pretrained(config.bert_path, num_labels=config.num_classes)
     self.xlnet = XLNetForSequenceClassification.from_pretrained(config.bert_path, config=model_config)
     for param in self.bert.parameters():
         param.requires_grad = True
     self.fc = nn.Linear(config.hidden_size, config.num_classes)
示例#11
0
    def get_predictions(self, sentences):
        """
        Get the string predictions for each sentence
        :param sentences: the sentences
        :return: a dataframe containing the sentences and the predictions
        """
        """
        Makes prediction on sentences
        :param sentences: the sentences
        :return: a dataframe a dataframe with sentences and predictions
        """
        self.tag2idx = get_existing_tag2idx(self.model_folder)
        tag2name = {self.tag2idx[key]: key for key in self.tag2idx.keys()}

        model = XLNetForSequenceClassification.from_pretrained(
            self.model_folder, num_labels=len(tag2name))
        model.to(self.device)
        model.eval()

        logger.info("Setting input embedding")

        input, masks, segs = generate_dataloader_input(sentences)
        dataloader = get_dataloader(input, masks, segs, BATCH_NUM)

        nb_eval_steps, nb_eval_examples = 0, 0

        y_predict = []
        logger.info("Running evaluation...")

        for step, batch in enumerate(dataloader):
            if nb_eval_steps % 100 == 0:
                logger.info(f"Step {nb_eval_steps}")

            batch = tuple(t.to(self.device) for t in batch)
            b_input_ids, b_input_mask, b_segs = batch

            with torch.no_grad():
                outputs = model(
                    input_ids=b_input_ids,
                    token_type_ids=b_segs,
                    input_mask=b_input_mask,
                )
                logits = outputs[0]

            # Get text classification predict result
            logits = logits.detach().cpu().numpy()

            for predict in np.argmax(logits, axis=1):
                y_predict.append(predict)

            nb_eval_steps += 1

        final_df = pd.DataFrame({
            "sentences": sentences,
            "label": [tag2name[pred] for pred in y_predict],
            "y_pred": y_predict
        })

        return final_df
示例#12
0
    def __init__(self, model_name, model_type):
        """
        Hyper-parameters found with validation set:
        xlnet-large-casd : epoch = 4,  learning_rate = 1E-5, batch_size = 16, epsilon = 1e-6
        bert-large-uncased : epoch = 4,  learning_rate = 3E-5, batch_size = 16, epsilon = 1e-8
        ALBERT xxlarge-v2 large : epoch = 3,  learning_rate = 5E-5, batch_size = 8, epsilon = 1e-6 to be improved...
        """
        self.model_name = model_name
        self.model_type = model_type

        # Cf transformers library, batch of 16 or 32 is advised for training. For memory issues, we will take 16. Gradient accumulation step has not lead
        # to great improvment and therefore won't be used here.
        if model_type == 'albert':
            self.batch_size = 8
        else:
            self.batch_size = 16

        available_model_name = ["xlnet-large-cased", "bert-large-uncased", "albert-xlarge-v2"]
        available_model_type = ["bert", "xlnet", "albert"]

        if self.model_name not in available_model_name:
            raise Exception("Error : model_name should be in", available_model_name)
        if self.model_type not in available_model_type:
            raise Exception("Error : model_name should be in", available_model_type)

        # Load BertForSequenceClassification, the pretrained BERT model with a single linear regression layer on top of the pooled output
        # Load our fined tune model: ex: BertForSequenceClassification.from_pretrained('./my_saved_model_directory/')
        if self.model_type == 'bert':
            self.config = BertConfig.from_pretrained(self.model_name, num_labels=1)  # num_labels=1 for regression task
            self.model = BertForSequenceClassification.from_pretrained(self.model_name, config=self.config)
        elif self.model_type == 'xlnet':
            self.config = XLNetConfig.from_pretrained(self.model_name, num_labels=1)
            self.model = XLNetForSequenceClassification.from_pretrained(self.model_name, config=self.config)
        elif self.model_type == 'albert':
            self.config = AlbertConfig.from_pretrained(self.model_name, num_labels=1)
            self.model = AlbertForSequenceClassification.from_pretrained(self.model_name, config=self.config)
        self.model.cuda()

        if self.model_name == 'xlnet-large-cased':
            self.epochs = 4
            self.lr = 1e-5
            self.eps = 1e-6

        elif self.model_name == 'bert-large-uncased':
            self.epochs = 4
            self.lr = 3e-5
            self.eps = 1e-8

        elif self.model_name == 'albert-xxlarge-v2':
            self.epochs = 3
            self.lr = 5e-5
            self.eps = 1e-6

        self.max_grad_norm = 1.0  # Gradient threshold, gradients norms that exceed this threshold are scaled down to match the norm.

        self.optimizer = AdamW(self.model.parameters(), lr=self.lr, eps=self.eps)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.n_gpu = torch.cuda.device_count()
        torch.cuda.get_device_name(0)
def load_model():
    checkpoints = list(
        os.path.dirname(c) for c in sorted(
            glob.glob(checkpoint_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
    model = XLNetForSequenceClassification.from_pretrained(checkpoints[0])
    tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
    model.to(device)
    model.eval()
    return (model, tokenizer)
示例#14
0
文件: XLNet.py 项目: Lipairui/textgo
 def load_model(self,model_path):
     model = XLNetForSequenceClassification.from_pretrained(
             model_path, # Use the 12-layer BERT model
             num_labels = self.args['num_classes'], # The number of output labels--2 for binary classification
             output_attentions = False, # Whether the model returns attentions weights.
             output_hidden_states = False # Whether the model returns all hidden-states.
             )
     if torch.cuda.is_available():
         model.cuda(self.device)
     return model
def create_from_pretrained(task_type, xlnet_model_name, cache_dir, num_labels):
    if task_type == TaskType.CLASSIFICATION:
        model = XLNetForSequenceClassification.from_pretrained(
            pretrained_model_name_or_path=xlnet_model_name,
            cache_dir=cache_dir,
            num_labels=num_labels)
    # delete the regression task because sentiment analysis doesn't have regression
    else:
        raise KeyError(task_type)
    return model
示例#16
0
 def __init__(self, requires_grad=True):
     super(XlnetModel, self).__init__()
     self.xlnet = XLNetForSequenceClassification.from_pretrained(
         'xlnet-large-cased', num_labels=2)
     self.tokenizer = AutoTokenizer.from_pretrained('xlnet-large-cased',
                                                    do_lower_case=True)
     self.requires_grad = requires_grad
     self.device = torch.device("cuda")
     for param in self.xlnet.parameters():
         param.requires_grad = requires_grad  # Each parameter requires gradient
示例#17
0
def main():
    # コマンドライン引数の取得(このファイル上部のドキュメントから自動生成)
    args = docopt(__doc__)
    pprint(args)

    # パラメータの取得
    lr = float(args['--lr'])
    seq_len = int(args['--seq_len'])
    max_epoch = int(args['--max_epoch'])
    batch_size = int(args['--batch_size'])
    num_train = int(args['--num_train'])
    num_valid = int(args['--num_valid'])

    # モデルの選択
    pretrained_weights = 'xlnet-base-cased'
    tokenizer = XLNetTokenizer.from_pretrained(pretrained_weights)
    config = XLNetConfig.from_pretrained(pretrained_weights, num_labels=4)
    model = XLNetForSequenceClassification.from_pretrained(pretrained_weights)
    print(model.config.num_labels)

    # 使用デバイスの取得
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # データの読み込みとデータセットの作成
    encoder = TwinPhraseEncoder(tokenizer, seq_len)

    train_dataset = WordnetDataset(mode='train',
                                   num_data=num_train,
                                   transform=encoder)
    valid_dataset = WordnetDataset(mode='valid',
                                   num_data=num_valid,
                                   transform=encoder)
    train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True)
    valid_loader = data.DataLoader(valid_dataset, batch_size, shuffle=True)

    # 最適化法の定義
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # 学習
    for epoch in range(1, max_epoch + 1):
        print('=' * 27 + f' Epoch {epoch:0>2} ' + '=' * 27)
        # Training
        loss, accu = train_model(model, optimizer, train_loader, device)
        print(
            f'|  Training    |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |'
        )
        # Validation
        loss, accu = valid_model(model, optimizer, valid_loader, device)
        print(
            f'|  Validation  |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |'
        )
        # 保存
        torch.save(model.state_dict(), f'../result/{pretrained_weights}.pkl')
示例#18
0
    def __init__(self, pretrained_model_path, num_classes, device, d_model=1024, n_layer=24, n_head=16, \
            d_inner=4096, ff_activation='gelu', untie_r=True, attn_type='bi',initializer_range=0.02, \
            layer_norm_eps=1e-12, dropout=0.1):
        super(XLNetForTextClassification, self).__init__()

        print('Reloading pretrained models...')
        self.tokenizer = XLNetTokenizer.from_pretrained(pretrained_model_path)
        self.tokenizer.model_max_length = 512
        self.model = XLNetForSequenceClassification.from_pretrained(
            pretrained_model_path, num_labels=num_classes).to(device)
        self.softmax = torch.nn.Softmax(dim=1)
        self.device = device
    def create_and_check_use_mems_train(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
        token_labels,
    ):
        model = XLNetForSequenceClassification(config)
        model.to(torch_device)
        model.train()

        train_size = input_ids_1.shape[0]

        batch_size = 4
        for i in range(train_size // batch_size + 1):
            input_ids = input_ids_1[i:(i + 1) * batch_size]
            labels = sequence_labels[i:(i + 1) * batch_size]
            outputs = model(input_ids=input_ids,
                            labels=labels,
                            return_dict=True)
            self.parent.assertIsNone(outputs.mems)
            self.parent.assertIsNotNone(outputs.loss)
示例#20
0
        def create_and_check_xlnet_sequence_classif(
                self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask,
                input_mask, target_mapping, segment_ids, lm_labels,
                sequence_labels, is_impossible_labels, token_labels):
            model = XLNetForSequenceClassification(config)
            model.to(torch_device)
            model.eval()

            logits, mems_1 = model(input_ids_1)
            loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels)

            result = {
                "loss": loss,
                "mems_1": mems_1,
                "logits": logits,
            }

            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["logits"].size()),
                [self.batch_size, self.type_sequence_label_size])
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
                [[self.seq_length, self.batch_size, self.hidden_size]] *
                self.num_hidden_layers)
    def create_and_check_xlnet_sequence_classif(
        self,
        config,
        input_ids_1,
        input_ids_2,
        input_ids_q,
        perm_mask,
        input_mask,
        target_mapping,
        segment_ids,
        lm_labels,
        sequence_labels,
        is_impossible_labels,
        token_labels,
    ):
        model = XLNetForSequenceClassification(config)
        model.to(torch_device)
        model.eval()

        result = model(input_ids_1)
        result = model(input_ids_1, labels=sequence_labels)

        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(
            result.logits.shape,
            (self.batch_size, self.type_sequence_label_size))
        self.parent.assertListEqual(
            [mem.shape for mem in result.mems],
            [(self.seq_length, self.batch_size, self.hidden_size)] *
            self.num_hidden_layers,
        )
 def __init__(self, requires_grad=True, num_labels=2):
     super(XlnetModel, self).__init__()
     self.num_labels = num_labels
     self.xlnet = XLNetForSequenceClassification.from_pretrained(
         'hfl/chinese-xlnet-base', num_labels=self.num_labels)
     self.tokenizer = XLNetTokenizer.from_pretrained(
         'hfl/chinese-xlnet-base', do_lower_case=True)
     # self.xlnet = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels = self.num_labels)
     # self.tokenizer = AutoTokenizer.from_pretrained('xlnet-large-cased', do_lower_case=True)
     self.requires_grad = requires_grad
     self.device = torch.device("cuda")
     for param in self.xlnet.parameters():
         param.requires_grad = requires_grad  # 每个参数都要求梯度
示例#23
0
def make_model(args, device):
    if args.model == "roberta":
        config = RobertaConfig.from_pretrained("roberta-base")
        config.num_labels = 5
        if args.dataset == "imdb":
            config.num_labels = 2
        if args.dataset == "ag_news":
            config.num_labels = 4
        if args.dataset == "yahoo":
            config.num_labels = 10
        pretrained_model = RobertaForSequenceClassification.from_pretrained(
            "roberta-base", config=config)
        return scl_model_Roberta(config,
                                 device,
                                 pretrained_model,
                                 with_semi=args.with_mix,
                                 with_sum=args.with_summary)

    if args.model == "bert":
        config = BertConfig.from_pretrained("bert-base-uncased")
        config.num_labels = 5
        if args.dataset == "imdb":
            config.num_labels = 2
        if args.dataset == "ag_news":
            config.num_labels = 4
        if args.dataset == "yahoo":
            config.num_labels = 10
        pretrained_model = BertForSequenceClassification.from_pretrained(
            "bert-base-uncased", config=config)
        return scl_model_Bert(config,
                              device,
                              pretrained_model,
                              with_semi=args.with_mix,
                              with_sum=args.with_summary)

    if args.model == "xlnet":
        config = XLNetConfig.from_pretrained("xlnet-base-cased")
        config.num_labels = 5
        if args.dataset == "imdb":
            config.num_labels = 2
        if args.dataset == "ag_news":
            config.num_labels = 4
        if args.dataset == "yahoo":
            config.num_labels = 10
        pretrained_model = XLNetForSequenceClassification.from_pretrained(
            "xlnet-base-cased", config=config)
        return scl_model_Xlnet(config,
                               device,
                               pretrained_model,
                               with_semi=args.with_mix,
                               with_sum=args.with_summary)
def pick_model(model_name, num_labels):
    """
        Return specified model:
        Available model names:
        ['albert-base-v2'\
          , 'bert-base-uncased', 'bert-large-uncased'\
          , 'roberta-base', 'xlnet-base-cased',  ]
    """
    if model_name == 'albert-base-v2':
        model = AlbertForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
            output_attentions=
            False,  # Whether the model returns attentions weights.
            output_hidden_states=
            False,  # Whether the model returns all hidden-states.
        )
    if model_name in ('bert-base-uncased', 'bert-large-uncased'):
        model = BertForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
            output_attentions=
            False,  # Whether the model returns attentions weights.
            output_hidden_states=
            False,  # Whether the model returns all hidden-states.
        )
    if model_name in ('roberta-base', "roberta-large", "roberta-large-mnli"):
        model = RobertaForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
            output_attentions=
            False,  # Whether the model returns attentions weights.
            output_hidden_states=
            False,  # Whether the model returns all hidden-states.
        )
    if model_name == 'xlnet-base-cased':
        model = XLNetForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels,
            output_attentions=
            False,  # Whether the model returns attentions weights.
            output_hidden_states=
            False,  # Whether the model returns all hidden-states.
        )

    print(f'Loaded {model_name} model.')
    if torch.cuda.is_available():
        model.cuda()

    return model
示例#25
0
    def finetune(self,
                 input_text: List[str],
                 output: List[str],
                 max_input_length=128,
                 validation_split: float = 0.15,
                 epochs: int = 20,
                 batch_size: int = None,
                 early_stopping: bool = True,
                 trainer: pl.Trainer = None):
        """
        Finetune XLNet for text classification.
        input_text and output must be ordered 1:1
        Unique data classes automatically determined from output data

        Args:
            input_text: List of strings to classify (must match output ordering)
            output: List of input classifications (must match input ordering)
            max_input_length: Maximum number of tokens to be supported as input. Caps at 512.
            validation_split: Float between 0 and 1 that determines what percentage of the data to use for validation
            epochs: Integer that specifies how many iterations of training to do
            batch_size: Leave as None to determine the batch size automatically
            epochs: Integer that specifies how many iterations of training to do
            batch_size: Leave as None to determine the batch size automatically
            early_stopping: Boolean that determines whether to automatically stop when validation loss stops improving
            trainer: Your custom pytorch_lightning trainer
        """

        assert len(input_text) == len(output)
        OPTIMAL_BATCH_SIZE = 128

        labels = set(output)
        self.labels = {k: v for k, v in enumerate(labels)}

        class_to_idx = {v: k for k, v in enumerate(labels)}

        self.model = XLNetForSequenceClassification.from_pretrained(
            self.model_path, num_labels=len(labels))

        print("Processing data...")
        dataset = zip(input_text, output)
        dataset = [(self.encode(r[0], class_to_idx[r[1]], max_input_length))
                   for r in dataset]
        Finetunable.finetune(self,
                             dataset,
                             validation_split=validation_split,
                             epochs=epochs,
                             optimal_batch_size=OPTIMAL_BATCH_SIZE,
                             early_stopping=early_stopping,
                             trainer=trainer)
    def __init__(self, bert_config, device, n_class):
        """

        :param bert_config: str, BERT configuration description
        :param device: torch.device
        :param n_class: int
        """

        super(DefaultModel, self).__init__()

        self.n_class = n_class
        self.bert_config = bert_config
        self.bert = XLNetForSequenceClassification.from_pretrained(self.bert_config, num_labels=self.n_class, output_hidden_states= False)
        self.tokenizer = XLNetTokenizer.from_pretrained(self.bert_config)
        self.device = device
示例#27
0
def demo5():
    from transformers import XLNetConfig, XLNetModel, XLNetTokenizer, XLNetForSequenceClassification
    import torch
    # 定义路径,初始化tokenizer
    XLN_PATH = r"D:\transformr_files\XLNetLMHeadModel"
    tokenizer = XLNetTokenizer.from_pretrained(XLN_PATH)
    # 加载配置
    model_config = XLNetConfig.from_pretrained(XLN_PATH)
    # 设定类别数为3
    model_config.num_labels = 3
    # 直接从xlnet的config新建XLNetForSequenceClassification(和上一节方法等效)
    cls_model = XLNetForSequenceClassification.from_pretrained(
        XLN_PATH, config=model_config)
    # 设定模式
    model.eval()
    token_codes = tokenizer.encode_plus("i like you, what about you")
示例#28
0
def main(argv, arc):
    assert len(
        argv) == 4, 'input should be :test_data, output_path, model_path '
    test_path = argv[1]
    model_name = argv[2]
    output_path = argv[3]

    test_df = pd.read_csv(test_path, dtype={'A': 'str', 'B': 'str'})
    if 'Unnamed: 0' in test_df.columns:
        test_df = test_df.drop(['Unnamed: 0'], axis=1)

    print(len(test_df), end='\n')
    tokenizer = XLNetTokenizer.from_pretrained(pre_trained_model_name,
                                               do_lower_case=True)
    testset = DialogueDataset(test_df, 'test', tokenizer=tokenizer)

    # first way
    # with open(f'./model/{model_name}', 'rb') as input_model:
    #     model = pickle.load(input_model)

    # second way
    NUM_LABELS = 2

    model = xlnet_model()
    model.model = XLNetForSequenceClassification.from_pretrained(
        pre_trained_model_name, num_labels=NUM_LABELS)
    # model.model = BertForNextSentencePrediction.from_pretrained(pre_trained_model_name)
    model.model.load_state_dict(
        torch.load(f'{model_name}', map_location=f'cuda:{device}'))
    print(model.val_accu_list)

    preds = model.predict(testset)
    test_df['prob'] = preds
    groups = test_df.groupby('question')
    ans = []
    for index, data in groups:
        if 'candidate_id' in test_df.columns:
            ans.append(data.loc[data['prob'].idxmax(), 'candidate_id'])
        else:
            ans.append(data.loc[data['prob'].idxmax(), 'B'])

    pred_df = pd.DataFrame()
    # pred_df['id'] = [f'{i}' for i in range(80000,82000)]
    pred_df['id'] = [f'{80000 + i}' for i in range(0, len(ans))]
    # pred_df['id'] = [82000]
    pred_df['candidate-id'] = ans
    pred_df.to_csv(output_path, index=False)
示例#29
0
def run_xlnet(device, results_file):

	set_seed(args.seed)
	torch.cuda.empty_cache()

	#get the data
	logging.info('Constructing datasets...')
	train_data, dev_data, test_data = read_samples_xlnet()

	#prepare the model and data
	model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=args.num_label,
                                                          output_attentions=False, output_hidden_states=False)
	param_optimizer = list(model.named_parameters())
	no_decay = ['bias', 'gamma', 'beta']
	optimizer_grouped_parameters = [
    	{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    	 'weight_decay_rate': 0.01},
    	{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     	'weight_decay_rate': 0.0}]

	optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=1e-6)
	epoch = args.epochs

	train_iter = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=32)
	dev_iter = DataLoader(dev_data, sampler=SequentialSampler(dev_data), batch_size=32)
	test_iter = DataLoader(test_data, sampler=SequentialSampler(test_data), batch_size=32)


	#create model save directory
	checkpoint_dir = os.path.join(args.checkpoint_dir, args.model_name)
	if not os.path.exists(checkpoint_dir):
		os.makedirs(checkpoint_dir)


	#run the tests
	logging.info(
        "Number of training samples {train}, number of dev samples {dev}, number of test samples {test}".format(
            train=len(train_data),
            dev=len(dev_data),
            test=len(test_data)))

	train_xlnet(epoch, model, train_iter, dev_iter, optimizer, device, checkpoint_dir, results_file)

	model = load_model(checkpoint_dir)
	acc, f1, recall, prec, f1_ave, recall_ave, prec_ave = test_xlnet(test_iter, model, device)
	del model
	return acc, f1, recall, prec, f1_ave, recall_ave, prec_ave
def _config_model(model_name: AvailableClassificationModels, num_labels: int,
                  use_gpu: bool):
    model_name = str(model_name.value)
    model = None
    if 'bert' in model_name:
        model = BertForSequenceClassification.from_pretrained(
            model_name, num_labels=num_labels, output_attentions=True)
    elif 'xlnet' in model_name:
        model = XLNetForSequenceClassification.from_pretrained(
            model_name, num_labels=num_labels, output_attentions=True)
    elif 'roberta' in model_name:
        model = RobertaForSequenceClassification.from_pretrained(
            model_name, num_labels=num_labels, output_attentions=True)

    if use_gpu:
        model.cuda()
    return model