def convert_xlnet_checkpoint_to_pytorch( tf_checkpoint_path, bert_config_file, pytorch_dump_folder_path, finetuning_task=None ): # Initialise PyTorch model config = XLNetConfig.from_json_file(bert_config_file) finetuning_task = finetuning_task.lower() if finetuning_task is not None else "" if finetuning_task in GLUE_TASKS_NUM_LABELS: print(f"Building PyTorch XLNetForSequenceClassification model from configuration: {config}") config.finetuning_task = finetuning_task config.num_labels = GLUE_TASKS_NUM_LABELS[finetuning_task] model = XLNetForSequenceClassification(config) elif "squad" in finetuning_task: config.finetuning_task = finetuning_task model = XLNetForQuestionAnswering(config) else: model = XLNetLMHeadModel(config) # Load weights from tf checkpoint load_tf_weights_in_xlnet(model, config, tf_checkpoint_path) # Save pytorch-model pytorch_weights_dump_path = os.path.join(pytorch_dump_folder_path, WEIGHTS_NAME) pytorch_config_dump_path = os.path.join(pytorch_dump_folder_path, CONFIG_NAME) print(f"Save PyTorch model to {os.path.abspath(pytorch_weights_dump_path)}") torch.save(model.state_dict(), pytorch_weights_dump_path) print(f"Save configuration file to {os.path.abspath(pytorch_config_dump_path)}") with open(pytorch_config_dump_path, "w", encoding="utf-8") as f: f.write(config.to_json_string())
def load_xlnet(task_type, xlnet_model_name, xlnet_load_mode, all_state, num_labels, xlnet_config_json_path=None): if xlnet_config_json_path is None: xlnet_config_json_path = os.path.join( get_xlnet_config_path(xlnet_model_name), "xlnet_config.json") if xlnet_load_mode in ("model_only", "full_model_only"): state_dict = all_state elif xlnet_load_mode in [ "state_model_only", "state_all", "state_full_model" ]: state_dict = all_state["model"] else: raise KeyError(xlnet_load_mode) if task_type == TaskType.CLASSIFICATION: if xlnet_load_mode in ("state_full_model", "full_model_only"): model = XLNetForSequenceClassification.from_state_dict_full( config_file= xlnet_config_json_path, # need to figure out what the config file is state_dict=state_dict, num_labels=num_labels, ) else: model = XLNetForSequenceClassification.from_state_dict( config_file=xlnet_config_json_path, state_dict=state_dict, num_labels=num_labels, ) else: raise KeyError(task_type) return model
def __init__( self, language=Language.ENGLISHCASED, num_labels=5, cache_dir=".", num_gpus=None, num_epochs=1, batch_size=8, lr=5e-5, adam_eps=1e-8, warmup_steps=0, weight_decay=0.0, max_grad_norm=1.0, ): """Initializes the classifier and the underlying pretrained model. Args: language (Language, optional): The pretrained model's language. Defaults to 'xlnet-base-cased'. num_labels (int, optional): The number of unique labels in the training data. Defaults to 5. cache_dir (str, optional): Location of XLNet's cache directory. Defaults to ".". num_gpus (int, optional): The number of gpus to use. If None is specified, all available GPUs will be used. Defaults to None. num_epochs (int, optional): Number of training epochs. Defaults to 1. batch_size (int, optional): Training batch size. Defaults to 8. lr (float): Learning rate of the Adam optimizer. Defaults to 5e-5. adam_eps (float, optional): term added to the denominator to improve numerical stability. Defaults to 1e-8. warmup_steps (int, optional): Number of steps in which to increase learning rate linearly from 0 to 1. Defaults to 0. weight_decay (float, optional): Weight decay. Defaults to 0. max_grad_norm (float, optional): Maximum norm for the gradients. Defaults to 1.0 """ if num_labels < 2: raise ValueError("Number of labels should be at least 2.") self.language = language self.num_labels = num_labels self.cache_dir = cache_dir self.num_gpus = num_gpus self.num_epochs = num_epochs self.batch_size = batch_size self.lr = lr self.adam_eps = adam_eps self.warmup_steps = warmup_steps self.weight_decay = weight_decay self.max_grad_norm = max_grad_norm # create classifier self.config = XLNetConfig.from_pretrained( self.language.value, num_labels=num_labels, cache_dir=cache_dir ) self.model = XLNetForSequenceClassification(self.config)
def loadModel(self, filepath): """ -Function to load model with saved states(parameters) -Args: filpath (str): path to the saved model -Return: model """ saved = torch.load(filepath, map_location='cpu') state_dict = saved['state_dict'] config = XLNetConfig(num_labels = 2) model = XLNetForSequenceClassification(config) # loading the trained parameters with model model.load_state_dict(state_dict) return model
def train(data_loader, epochs=3): """ Given the data_loader, it fine-tunes BERT for the specific task. The BERT authors recommend between 2 and 4 training epochs. Returns fine-tuned BERT model. """ model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0 }] # This variable contains all of the hyperparemeter information our training loop needs optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5) train_loss_set = [] # trange is a tqdm wrapper around the normal python range for _ in trange(epochs, desc="Epoch"): model.train() # Tracking variables tr_loss, nb_tr_examples, nb_tr_steps = 0, 0, 0 for batch in data_loader: batch = tuple(t.to(device) for t in batch) optimizer.zero_grad( ) # clears any previously calculated gradients before performing a backward pass b_input_ids, b_input_mask, b_labels = batch outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) loss = outputs[0] logits = outputs[1] train_loss_set.append(loss.item()) loss.backward() optimizer.step() # Update tracking variables tr_loss += loss.item() nb_tr_examples += b_input_ids.size(0) nb_tr_steps += 1 print("Train loss: {}".format(tr_loss / nb_tr_steps)) return model
def __init__(self): super(XlnetModel, self).__init__() self.xlnet = XLNetForSequenceClassification.from_pretrained( "hfl/chinese-xlnet-base", num_labels=2) # /bert_pretrain/ self.device = torch.device("cuda") for param in self.xlnet.parameters(): param.requires_grad = True # 每个参数都要 求梯度
def __init__(self, batchsize=16, max_len=64): RANDOM_SEED = 42 np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('device {}'.format(device)) model = XLNetForSequenceClassification.from_pretrained( 'xlnet-base-cased', num_labels=3) model = model.to(device) self.device = device self.model = model PRE_TRAINED_MODEL_NAME = 'xlnet-base-cased' self.tokenizer = XLNetTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME) self.test_size = 0.5 self.random_state = 101 self.MAX_LEN = max_len self.BATCH_SIZE = batchsize self.EPOCHS = 10 self.num_data_workers = 4 self.model_file = './models/xlnet_model_batch{}.bin'.format(batchsize) self.class_names = ['positive', 'negative', 'neutral'] #self.class_names = ['positive', 'negative'] self.columns = None
def __init__(self, xlnet_pretrained_model="xlnet-base-cased", xlnet_pretrained_tokenizer=None, train_batch_size=8, eval_batch_size=8, num_labels=2, learning_rate=3e-5, train_dset=None, eval_dset=None): # define hyperparameters self.train_batch_size = train_batch_size self.eval_batch_size = eval_batch_size self.num_labels = num_labels # loading pre-trained models self.model = XLNetForSequenceClassification.from_pretrained( xlnet_pretrained_model, num_labels=num_labels).to(self.DEVICE) self.tokenizer = XLNetTokenizer.from_pretrained(xlnet_pretrained_model) # creating / loading datasets self.train_dset = train_dset self.eval_dset = eval_dset self.train_loader = DataLoader(self.train_dset, batch_size=self.train_batch_size, shuffle=True) self.eval_loader = DataLoader(self.eval_dset, batch_size=self.eval_batch_size)
def Get_Model(modelName): model = '' if modelName == 'XLNet': model = XLNetForSequenceClassification.from_pretrained( # Use the 12-layer BERT model, with an uncased vocab. pretrained_model_path, # The number of output labels--2 for binary classification. num_labels=2) elif modelName == 'BERT': model = BertForSequenceClassification.from_pretrained( # Use the 12-layer BERT model, with an uncased vocab. pretrained_model_path, # The number of output labels--2 for binary classification. num_labels=2) elif modelName == 'RoBerta': model = RobertaForSequenceClassification.from_pretrained( # Use the 12-layer BERT model, with an uncased vocab. pretrained_model_path, # The number of output labels--2 for binary classification. num_labels=2) elif modelName == 'Albert': model = AlbertForSequenceClassification.from_pretrained( # Use the 12-layer BERT model, with an uncased vocab. pretrained_model_path, # The number of output labels--2 for binary classification. num_labels=2) return model
def __init__(self, config): super(Model, self).__init__() model_config = XLNetConfig.from_pretrained(config.bert_path, num_labels=config.num_classes) self.xlnet = XLNetForSequenceClassification.from_pretrained(config.bert_path, config=model_config) for param in self.bert.parameters(): param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes)
def get_predictions(self, sentences): """ Get the string predictions for each sentence :param sentences: the sentences :return: a dataframe containing the sentences and the predictions """ """ Makes prediction on sentences :param sentences: the sentences :return: a dataframe a dataframe with sentences and predictions """ self.tag2idx = get_existing_tag2idx(self.model_folder) tag2name = {self.tag2idx[key]: key for key in self.tag2idx.keys()} model = XLNetForSequenceClassification.from_pretrained( self.model_folder, num_labels=len(tag2name)) model.to(self.device) model.eval() logger.info("Setting input embedding") input, masks, segs = generate_dataloader_input(sentences) dataloader = get_dataloader(input, masks, segs, BATCH_NUM) nb_eval_steps, nb_eval_examples = 0, 0 y_predict = [] logger.info("Running evaluation...") for step, batch in enumerate(dataloader): if nb_eval_steps % 100 == 0: logger.info(f"Step {nb_eval_steps}") batch = tuple(t.to(self.device) for t in batch) b_input_ids, b_input_mask, b_segs = batch with torch.no_grad(): outputs = model( input_ids=b_input_ids, token_type_ids=b_segs, input_mask=b_input_mask, ) logits = outputs[0] # Get text classification predict result logits = logits.detach().cpu().numpy() for predict in np.argmax(logits, axis=1): y_predict.append(predict) nb_eval_steps += 1 final_df = pd.DataFrame({ "sentences": sentences, "label": [tag2name[pred] for pred in y_predict], "y_pred": y_predict }) return final_df
def __init__(self, model_name, model_type): """ Hyper-parameters found with validation set: xlnet-large-casd : epoch = 4, learning_rate = 1E-5, batch_size = 16, epsilon = 1e-6 bert-large-uncased : epoch = 4, learning_rate = 3E-5, batch_size = 16, epsilon = 1e-8 ALBERT xxlarge-v2 large : epoch = 3, learning_rate = 5E-5, batch_size = 8, epsilon = 1e-6 to be improved... """ self.model_name = model_name self.model_type = model_type # Cf transformers library, batch of 16 or 32 is advised for training. For memory issues, we will take 16. Gradient accumulation step has not lead # to great improvment and therefore won't be used here. if model_type == 'albert': self.batch_size = 8 else: self.batch_size = 16 available_model_name = ["xlnet-large-cased", "bert-large-uncased", "albert-xlarge-v2"] available_model_type = ["bert", "xlnet", "albert"] if self.model_name not in available_model_name: raise Exception("Error : model_name should be in", available_model_name) if self.model_type not in available_model_type: raise Exception("Error : model_name should be in", available_model_type) # Load BertForSequenceClassification, the pretrained BERT model with a single linear regression layer on top of the pooled output # Load our fined tune model: ex: BertForSequenceClassification.from_pretrained('./my_saved_model_directory/') if self.model_type == 'bert': self.config = BertConfig.from_pretrained(self.model_name, num_labels=1) # num_labels=1 for regression task self.model = BertForSequenceClassification.from_pretrained(self.model_name, config=self.config) elif self.model_type == 'xlnet': self.config = XLNetConfig.from_pretrained(self.model_name, num_labels=1) self.model = XLNetForSequenceClassification.from_pretrained(self.model_name, config=self.config) elif self.model_type == 'albert': self.config = AlbertConfig.from_pretrained(self.model_name, num_labels=1) self.model = AlbertForSequenceClassification.from_pretrained(self.model_name, config=self.config) self.model.cuda() if self.model_name == 'xlnet-large-cased': self.epochs = 4 self.lr = 1e-5 self.eps = 1e-6 elif self.model_name == 'bert-large-uncased': self.epochs = 4 self.lr = 3e-5 self.eps = 1e-8 elif self.model_name == 'albert-xxlarge-v2': self.epochs = 3 self.lr = 5e-5 self.eps = 1e-6 self.max_grad_norm = 1.0 # Gradient threshold, gradients norms that exceed this threshold are scaled down to match the norm. self.optimizer = AdamW(self.model.parameters(), lr=self.lr, eps=self.eps) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.n_gpu = torch.cuda.device_count() torch.cuda.get_device_name(0)
def load_model(): checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(checkpoint_dir + '/**/' + WEIGHTS_NAME, recursive=True))) model = XLNetForSequenceClassification.from_pretrained(checkpoints[0]) tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') model.to(device) model.eval() return (model, tokenizer)
def load_model(self,model_path): model = XLNetForSequenceClassification.from_pretrained( model_path, # Use the 12-layer BERT model num_labels = self.args['num_classes'], # The number of output labels--2 for binary classification output_attentions = False, # Whether the model returns attentions weights. output_hidden_states = False # Whether the model returns all hidden-states. ) if torch.cuda.is_available(): model.cuda(self.device) return model
def create_from_pretrained(task_type, xlnet_model_name, cache_dir, num_labels): if task_type == TaskType.CLASSIFICATION: model = XLNetForSequenceClassification.from_pretrained( pretrained_model_name_or_path=xlnet_model_name, cache_dir=cache_dir, num_labels=num_labels) # delete the regression task because sentiment analysis doesn't have regression else: raise KeyError(task_type) return model
def __init__(self, requires_grad=True): super(XlnetModel, self).__init__() self.xlnet = XLNetForSequenceClassification.from_pretrained( 'xlnet-large-cased', num_labels=2) self.tokenizer = AutoTokenizer.from_pretrained('xlnet-large-cased', do_lower_case=True) self.requires_grad = requires_grad self.device = torch.device("cuda") for param in self.xlnet.parameters(): param.requires_grad = requires_grad # Each parameter requires gradient
def main(): # コマンドライン引数の取得(このファイル上部のドキュメントから自動生成) args = docopt(__doc__) pprint(args) # パラメータの取得 lr = float(args['--lr']) seq_len = int(args['--seq_len']) max_epoch = int(args['--max_epoch']) batch_size = int(args['--batch_size']) num_train = int(args['--num_train']) num_valid = int(args['--num_valid']) # モデルの選択 pretrained_weights = 'xlnet-base-cased' tokenizer = XLNetTokenizer.from_pretrained(pretrained_weights) config = XLNetConfig.from_pretrained(pretrained_weights, num_labels=4) model = XLNetForSequenceClassification.from_pretrained(pretrained_weights) print(model.config.num_labels) # 使用デバイスの取得 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # データの読み込みとデータセットの作成 encoder = TwinPhraseEncoder(tokenizer, seq_len) train_dataset = WordnetDataset(mode='train', num_data=num_train, transform=encoder) valid_dataset = WordnetDataset(mode='valid', num_data=num_valid, transform=encoder) train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True) valid_loader = data.DataLoader(valid_dataset, batch_size, shuffle=True) # 最適化法の定義 optimizer = optim.Adam(model.parameters(), lr=lr) # 学習 for epoch in range(1, max_epoch + 1): print('=' * 27 + f' Epoch {epoch:0>2} ' + '=' * 27) # Training loss, accu = train_model(model, optimizer, train_loader, device) print( f'| Training | loss-avg : {loss:>8.6f} | accuracy : {accu:>8.3%} |' ) # Validation loss, accu = valid_model(model, optimizer, valid_loader, device) print( f'| Validation | loss-avg : {loss:>8.6f} | accuracy : {accu:>8.3%} |' ) # 保存 torch.save(model.state_dict(), f'../result/{pretrained_weights}.pkl')
def __init__(self, pretrained_model_path, num_classes, device, d_model=1024, n_layer=24, n_head=16, \ d_inner=4096, ff_activation='gelu', untie_r=True, attn_type='bi',initializer_range=0.02, \ layer_norm_eps=1e-12, dropout=0.1): super(XLNetForTextClassification, self).__init__() print('Reloading pretrained models...') self.tokenizer = XLNetTokenizer.from_pretrained(pretrained_model_path) self.tokenizer.model_max_length = 512 self.model = XLNetForSequenceClassification.from_pretrained( pretrained_model_path, num_labels=num_classes).to(device) self.softmax = torch.nn.Softmax(dim=1) self.device = device
def create_and_check_use_mems_train( self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask, target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels, token_labels, ): model = XLNetForSequenceClassification(config) model.to(torch_device) model.train() train_size = input_ids_1.shape[0] batch_size = 4 for i in range(train_size // batch_size + 1): input_ids = input_ids_1[i:(i + 1) * batch_size] labels = sequence_labels[i:(i + 1) * batch_size] outputs = model(input_ids=input_ids, labels=labels, return_dict=True) self.parent.assertIsNone(outputs.mems) self.parent.assertIsNotNone(outputs.loss)
def create_and_check_xlnet_sequence_classif( self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask, target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels, token_labels): model = XLNetForSequenceClassification(config) model.to(torch_device) model.eval() logits, mems_1 = model(input_ids_1) loss, logits, mems_1 = model(input_ids_1, labels=sequence_labels) result = { "loss": loss, "mems_1": mems_1, "logits": logits, } self.parent.assertListEqual(list(result["loss"].size()), []) self.parent.assertListEqual( list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]) self.parent.assertListEqual( list(list(mem.size()) for mem in result["mems_1"]), [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
def create_and_check_xlnet_sequence_classif( self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask, target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels, token_labels, ): model = XLNetForSequenceClassification(config) model.to(torch_device) model.eval() result = model(input_ids_1) result = model(input_ids_1, labels=sequence_labels) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual( result.logits.shape, (self.batch_size, self.type_sequence_label_size)) self.parent.assertListEqual( [mem.shape for mem in result.mems], [(self.seq_length, self.batch_size, self.hidden_size)] * self.num_hidden_layers, )
def __init__(self, requires_grad=True, num_labels=2): super(XlnetModel, self).__init__() self.num_labels = num_labels self.xlnet = XLNetForSequenceClassification.from_pretrained( 'hfl/chinese-xlnet-base', num_labels=self.num_labels) self.tokenizer = XLNetTokenizer.from_pretrained( 'hfl/chinese-xlnet-base', do_lower_case=True) # self.xlnet = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels = self.num_labels) # self.tokenizer = AutoTokenizer.from_pretrained('xlnet-large-cased', do_lower_case=True) self.requires_grad = requires_grad self.device = torch.device("cuda") for param in self.xlnet.parameters(): param.requires_grad = requires_grad # 每个参数都要求梯度
def make_model(args, device): if args.model == "roberta": config = RobertaConfig.from_pretrained("roberta-base") config.num_labels = 5 if args.dataset == "imdb": config.num_labels = 2 if args.dataset == "ag_news": config.num_labels = 4 if args.dataset == "yahoo": config.num_labels = 10 pretrained_model = RobertaForSequenceClassification.from_pretrained( "roberta-base", config=config) return scl_model_Roberta(config, device, pretrained_model, with_semi=args.with_mix, with_sum=args.with_summary) if args.model == "bert": config = BertConfig.from_pretrained("bert-base-uncased") config.num_labels = 5 if args.dataset == "imdb": config.num_labels = 2 if args.dataset == "ag_news": config.num_labels = 4 if args.dataset == "yahoo": config.num_labels = 10 pretrained_model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", config=config) return scl_model_Bert(config, device, pretrained_model, with_semi=args.with_mix, with_sum=args.with_summary) if args.model == "xlnet": config = XLNetConfig.from_pretrained("xlnet-base-cased") config.num_labels = 5 if args.dataset == "imdb": config.num_labels = 2 if args.dataset == "ag_news": config.num_labels = 4 if args.dataset == "yahoo": config.num_labels = 10 pretrained_model = XLNetForSequenceClassification.from_pretrained( "xlnet-base-cased", config=config) return scl_model_Xlnet(config, device, pretrained_model, with_semi=args.with_mix, with_sum=args.with_summary)
def pick_model(model_name, num_labels): """ Return specified model: Available model names: ['albert-base-v2'\ , 'bert-base-uncased', 'bert-large-uncased'\ , 'roberta-base', 'xlnet-base-cased', ] """ if model_name == 'albert-base-v2': model = AlbertForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions= False, # Whether the model returns attentions weights. output_hidden_states= False, # Whether the model returns all hidden-states. ) if model_name in ('bert-base-uncased', 'bert-large-uncased'): model = BertForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions= False, # Whether the model returns attentions weights. output_hidden_states= False, # Whether the model returns all hidden-states. ) if model_name in ('roberta-base', "roberta-large", "roberta-large-mnli"): model = RobertaForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions= False, # Whether the model returns attentions weights. output_hidden_states= False, # Whether the model returns all hidden-states. ) if model_name == 'xlnet-base-cased': model = XLNetForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions= False, # Whether the model returns attentions weights. output_hidden_states= False, # Whether the model returns all hidden-states. ) print(f'Loaded {model_name} model.') if torch.cuda.is_available(): model.cuda() return model
def finetune(self, input_text: List[str], output: List[str], max_input_length=128, validation_split: float = 0.15, epochs: int = 20, batch_size: int = None, early_stopping: bool = True, trainer: pl.Trainer = None): """ Finetune XLNet for text classification. input_text and output must be ordered 1:1 Unique data classes automatically determined from output data Args: input_text: List of strings to classify (must match output ordering) output: List of input classifications (must match input ordering) max_input_length: Maximum number of tokens to be supported as input. Caps at 512. validation_split: Float between 0 and 1 that determines what percentage of the data to use for validation epochs: Integer that specifies how many iterations of training to do batch_size: Leave as None to determine the batch size automatically epochs: Integer that specifies how many iterations of training to do batch_size: Leave as None to determine the batch size automatically early_stopping: Boolean that determines whether to automatically stop when validation loss stops improving trainer: Your custom pytorch_lightning trainer """ assert len(input_text) == len(output) OPTIMAL_BATCH_SIZE = 128 labels = set(output) self.labels = {k: v for k, v in enumerate(labels)} class_to_idx = {v: k for k, v in enumerate(labels)} self.model = XLNetForSequenceClassification.from_pretrained( self.model_path, num_labels=len(labels)) print("Processing data...") dataset = zip(input_text, output) dataset = [(self.encode(r[0], class_to_idx[r[1]], max_input_length)) for r in dataset] Finetunable.finetune(self, dataset, validation_split=validation_split, epochs=epochs, optimal_batch_size=OPTIMAL_BATCH_SIZE, early_stopping=early_stopping, trainer=trainer)
def __init__(self, bert_config, device, n_class): """ :param bert_config: str, BERT configuration description :param device: torch.device :param n_class: int """ super(DefaultModel, self).__init__() self.n_class = n_class self.bert_config = bert_config self.bert = XLNetForSequenceClassification.from_pretrained(self.bert_config, num_labels=self.n_class, output_hidden_states= False) self.tokenizer = XLNetTokenizer.from_pretrained(self.bert_config) self.device = device
def demo5(): from transformers import XLNetConfig, XLNetModel, XLNetTokenizer, XLNetForSequenceClassification import torch # 定义路径,初始化tokenizer XLN_PATH = r"D:\transformr_files\XLNetLMHeadModel" tokenizer = XLNetTokenizer.from_pretrained(XLN_PATH) # 加载配置 model_config = XLNetConfig.from_pretrained(XLN_PATH) # 设定类别数为3 model_config.num_labels = 3 # 直接从xlnet的config新建XLNetForSequenceClassification(和上一节方法等效) cls_model = XLNetForSequenceClassification.from_pretrained( XLN_PATH, config=model_config) # 设定模式 model.eval() token_codes = tokenizer.encode_plus("i like you, what about you")
def main(argv, arc): assert len( argv) == 4, 'input should be :test_data, output_path, model_path ' test_path = argv[1] model_name = argv[2] output_path = argv[3] test_df = pd.read_csv(test_path, dtype={'A': 'str', 'B': 'str'}) if 'Unnamed: 0' in test_df.columns: test_df = test_df.drop(['Unnamed: 0'], axis=1) print(len(test_df), end='\n') tokenizer = XLNetTokenizer.from_pretrained(pre_trained_model_name, do_lower_case=True) testset = DialogueDataset(test_df, 'test', tokenizer=tokenizer) # first way # with open(f'./model/{model_name}', 'rb') as input_model: # model = pickle.load(input_model) # second way NUM_LABELS = 2 model = xlnet_model() model.model = XLNetForSequenceClassification.from_pretrained( pre_trained_model_name, num_labels=NUM_LABELS) # model.model = BertForNextSentencePrediction.from_pretrained(pre_trained_model_name) model.model.load_state_dict( torch.load(f'{model_name}', map_location=f'cuda:{device}')) print(model.val_accu_list) preds = model.predict(testset) test_df['prob'] = preds groups = test_df.groupby('question') ans = [] for index, data in groups: if 'candidate_id' in test_df.columns: ans.append(data.loc[data['prob'].idxmax(), 'candidate_id']) else: ans.append(data.loc[data['prob'].idxmax(), 'B']) pred_df = pd.DataFrame() # pred_df['id'] = [f'{i}' for i in range(80000,82000)] pred_df['id'] = [f'{80000 + i}' for i in range(0, len(ans))] # pred_df['id'] = [82000] pred_df['candidate-id'] = ans pred_df.to_csv(output_path, index=False)
def run_xlnet(device, results_file): set_seed(args.seed) torch.cuda.empty_cache() #get the data logging.info('Constructing datasets...') train_data, dev_data, test_data = read_samples_xlnet() #prepare the model and data model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=args.num_label, output_attentions=False, output_hidden_states=False) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=1e-6) epoch = args.epochs train_iter = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=32) dev_iter = DataLoader(dev_data, sampler=SequentialSampler(dev_data), batch_size=32) test_iter = DataLoader(test_data, sampler=SequentialSampler(test_data), batch_size=32) #create model save directory checkpoint_dir = os.path.join(args.checkpoint_dir, args.model_name) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #run the tests logging.info( "Number of training samples {train}, number of dev samples {dev}, number of test samples {test}".format( train=len(train_data), dev=len(dev_data), test=len(test_data))) train_xlnet(epoch, model, train_iter, dev_iter, optimizer, device, checkpoint_dir, results_file) model = load_model(checkpoint_dir) acc, f1, recall, prec, f1_ave, recall_ave, prec_ave = test_xlnet(test_iter, model, device) del model return acc, f1, recall, prec, f1_ave, recall_ave, prec_ave
def _config_model(model_name: AvailableClassificationModels, num_labels: int, use_gpu: bool): model_name = str(model_name.value) model = None if 'bert' in model_name: model = BertForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions=True) elif 'xlnet' in model_name: model = XLNetForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions=True) elif 'roberta' in model_name: model = RobertaForSequenceClassification.from_pretrained( model_name, num_labels=num_labels, output_attentions=True) if use_gpu: model.cuda() return model