def prepare_pretrained(): convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( BERT_BASE_DIR + '/bert_model.ckpt', BERT_BASE_DIR + '/bert_config.json', WORK_DIR + 'pytorch_model.bin') shutil.copyfile(BERT_BASE_DIR + '/bert_config.json', WORK_DIR + 'bert_config.json')
def setting(BERT_MODEL_PATH, WORK_DIR): convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( BERT_MODEL_PATH + 'bert_model.ckpt', BERT_MODEL_PATH + 'bert_config.json', WORK_DIR + 'pytorch_model.bin') shutil.copyfile(BERT_MODEL_PATH + 'bert_config.json', WORK_DIR + 'bert_config.json')
def convert_tf_to_pytorch(model_path, output_path): os.makedirs(output_path, exist_ok=True) convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( os.path.join(model_path, 'bert_model.ckpt'), os.path.join(model_path, 'bert_config.json'), os.path.join(output_path, 'pytorch_model.bin')) shutil.copyfile(os.path.join(model_path, 'bert_config.json'), os.path.join(output_path, 'bert_config.json'))
def setup_bert_model(path_to_pretrained_model, epochs, lrate, lrate_clf, batch_size, accum_steps, lin_dim, lin_dropout_prob, warmup, apex_mixed_precision, seed, device, train_loader, clf_class=BertForSequencePairClassification): """ :param path_to_pretrained_model: Path to a folder with pretrained BERT model :param epochs: ... :param lrate: ... :param lrate_clf: ... :param batch_size: ... :param accum_steps: ... :param lin_dim: :param lin_dropout_prob: :param warmup: Percent of iterations to perform warmup :param apex_mixed_precision: Whether to use nvidia apex mixed-precision training :param seed: ... :param device: ... :param train_loader: ... :param clf_class: ... :return: model, optimizer PyTorch model and optimizer """ path_to_pretrained_model = Path(path_to_pretrained_model) convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( str(path_to_pretrained_model / 'bert_model.ckpt'), str(path_to_pretrained_model / 'bert_config.json'), str(path_to_pretrained_model / 'pytorch_model.bin')) seed_everything(seed) model = clf_class.from_pretrained(path_to_pretrained_model, lin_dim=lin_dim, lin_dropout_prob=lin_dropout_prob, cache_dir=None, num_labels=1) model.zero_grad() model = model.to(device) return setup_bert_optimizer_for_model(model, epochs, lrate, lrate_clf, batch_size, accum_steps, warmup, apex_mixed_precision, train_loader)
def __call__(self, base_dir, output_dir): config_file = os.path.join(base_dir, 'bert_config.json') vocab_file = os.path.join(base_dir, 'vocab.txt') config_file_dest = os.path.join(output_dir, 'bert_config.json') vocab_file_dest = os.path.join(output_dir, 'vocab.txt') # Convert convert_tf_checkpoint_to_pytorch( os.path.join(base_dir, 'model.ckpt-1000000'), config_file, os.path.join(output_dir, 'pytorch_model.bin')) # copy config file copyfile(config_file, config_file_dest) # copy vocab file copyfile(vocab_file, vocab_file_dest)
num_to_load = 1700000 #Train size to match time limit valid_size = 100000 #Validation Size TOXICITY_COLUMN = 'target' # Add the Bart Pytorch repo to the PATH # using files from: https://github.com/huggingface/pytorch-pretrained-BERT package_dir_a = "../input/ppbert/pytorch-pretrained-bert/pytorch-pretrained-BERT" sys.path.insert(0, package_dir_a) from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch from pytorch_pretrained_bert import BertTokenizer, BertForSequenceClassification, BertAdam # Translate model from tensorflow to pytorch BERT_MODEL_PATH = '../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/' convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( BERT_MODEL_PATH + 'bert_model.ckpt', BERT_MODEL_PATH + 'bert_config.json', WORK_DIR + 'pytorch_model.bin') shutil.copyfile(BERT_MODEL_PATH + 'bert_config.json', WORK_DIR + 'bert_config.json') os.listdir("../working") # This is the Bert configuration file from pytorch_pretrained_bert import BertConfig bert_config = BertConfig( '../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/' + 'bert_config.json')
def train(self): if self.debug_mode: self.epochs = 1 # 加载 dataloader train_loader, valid_loader = self.create_dataloader() # 训练 self.seed_everything() lr = 2e-5 accumulation_steps = math.ceil(self.batch_size / self.base_batch_size) # 预训练 bert 转成 pytorch if os.path.exists(self.bert_model_path + "pytorch_model.bin") is False: convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( self.bert_model_path + 'bert_model.ckpt', self.bert_model_path + 'bert_config.json', self.bert_model_path + 'pytorch_model.bin') # 加载预训练模型 model = BertNeuralNet.from_pretrained(self.bert_model_path, cache_dir=None) model.zero_grad() model = model.to(self.device) # 不同的参数组设置不同的 weight_decay param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int(self.epochs * self.train_len / self.base_batch_size / accumulation_steps) optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.05, t_total=num_train_optimization_steps) # 渐变学习速率 #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.6 ** epoch) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # 开始训练 for epoch in range(self.epochs): start_time = time.time() model.train() optimizer.zero_grad() # 加载每个 batch 并训练 for i, batch_data in enumerate(train_loader): x_batch = batch_data[0] y_batch = batch_data[1] target_weight_batch = batch_data[2] aux_weight_batch = batch_data[3] identity_weight_batch = batch_data[4] y_pred = model(x_batch.to(self.device), attention_mask=(x_batch > 0).to(self.device), labels=None) target_loss, aux_loss, identity_loss = self.custom_loss( y_pred, y_batch, epoch, target_weight_batch, aux_weight_batch, identity_weight_batch) loss = target_loss + aux_loss + identity_loss with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (i + 1) % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() # 计算验证集 model.eval() y_pred = np.zeros((len(self.train_df) - self.train_len)) for i, batch_data in enumerate(valid_loader): x_batch = batch_data[0] batch_y_pred = self.sigmoid( model(x_batch.to(self.device), attention_mask=(x_batch > 0).to(self.device), labels=None).detach().cpu().numpy())[:, 0] y_pred[i * self.base_batch_size:(i + 1) * self.base_batch_size] = batch_y_pred # 计算得分 auc_score = self.evaluator.get_final_metric(y_pred) print("epoch: %d duration: %d min auc_score: %.4f" % (epoch, int((time.time() - start_time) / 60), auc_score)) if not self.debug_mode: state_dict = model.state_dict() torch.save( state_dict, os.path.join( self.data_dir, "model/model_%d_%s_%d_%.5f" % (self.seed, self.model_name, epoch, auc_score))) # del 训练相关输入和模型 training_history = [ train_loader, valid_loader, model, optimizer, param_optimizer, optimizer_grouped_parameters ] for variable in training_history: del variable gc.collect()
def __init__(self, data_dir, model_name, epochs=4, batch_size=64, part=1., seed=1234, debug_mode=False): self.device = torch.device('cuda') self.data_dir = "../input/jigsaw-unintended-bias-in-toxicity-classification" self.bert_model_path = '../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/' self.input_dir = "../input" self.word_dir = "../working/" self.debug_mode = debug_mode self.model_name = model_name self.seed = seed self.identity_list = [ 'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish', 'muslim', 'black', 'white', 'psychiatric_or_mental_illness' ] self.toxicity_type_list = [ 'severe_toxicity', 'obscene', 'identity_attack', 'insult', 'threat' ] self.weight_dict = { "severe_toxicity": 1000, "obscene": 234, "identity_attack": 235, "insult": 21, "threat": 645, "male": 44, "female": 34, "homosexual_gay_or_lesbian": 175, "christian": 49, "jewish": 248, "muslim": 90, "black": 129, "white": 74, "psychiatric_or_mental_illness": 441, "np": 12, "pn": 15 } self.stopwords = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n“”’\'∞θ÷α•à−β∅³π‘₹´°£€\×™√²—' self.seed_everything() self.max_len = 220 self.epochs = epochs self.base_batch_size = 32 self.batch_size = batch_size self.split_ratio = 0.95 self.sample_num = 1804874 if not self.debug_mode: self.train_df = pd.read_csv( os.path.join(self.data_dir, "train.csv")).sample(int(self.sample_num * part), random_state=1234).fillna(0.) self.test_df = pd.read_csv(os.path.join(self.data_dir, "test.csv")) else: self.train_df = pd.read_csv( os.path.join(self.data_dir, "train.csv")).head(1000).fillna(0.) self.test_df = pd.read_csv(os.path.join(self.data_dir, "test.csv")).head(1000) self.train_len = int(len(self.train_df) * self.split_ratio) self.evaluator = self.init_evaluator() self.bert_config = BertConfig( os.path.join(self.data_dir, "uncased_L-12_H-768_A-12/bert_config.json")) convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( self.bert_model_path + 'bert_model.ckpt', self.bert_model_path + 'bert_config.json', self.word_dir + 'pytorch_model.bin') shutil.copyfile(self.bert_model_path + 'bert_config.json', self.word_dir + 'bert_config.json')
cache_dir = '/home/dpappas/bert_cache/' if (not os.path.exists(cache_dir)): os.makedirs(cache_dir) tokenizer = BertTokenizer.from_pretrained( # pretrained_model_name='bert-large-uncased', pretrained_model_name= '/home/dpappas/bert_cache/bert-large-uncased-vocab.txt', cache_dir=cache_dir) (test_data, test_docs, dev_data, dev_docs, train_data, train_docs, bioasq6_data) = load_all_data(dataloc=dataloc) if (not os.path.exists(os.path.join(init_checkpoint_pt, 'pytorch_model.bin'))): convert_tf_checkpoint_to_pytorch( os.path.join(init_checkpoint_pt, 'bert_model.ckpt'), os.path.join(init_checkpoint_pt, 'bert_config.json'), os.path.join(init_checkpoint_pt, 'pytorch_model.bin')) elmo = Elmo(options_file, weight_file, 1, dropout=0) model = BertModel.from_pretrained(init_checkpoint_pt, cache_dir=cache_dir) # model = model.cuda() model.eval() nof_threads = 16 ####################################################### # test_data['queries'] = Parallel(n_jobs=nof_threads, verbose=0, backend="threading")(map(delayed(work), tqdm(test_data['queries']))) # with open(dataloc + 'bioasq_bm25_top100_bert_elmo.test.pkl', 'wb') as f: # pickle.dump(test_data, f) # f.close()
def train(self): if self.debug_mode: self.epochs = 1 # 加载 dataloader train_loader, valid_loader = self.create_dataloader() # 训练 self.seed_everything() lr = 7e-6 accumulation_steps = math.ceil(self.batch_size / self.base_batch_size) # 预训练 bert 转成 pytorch if os.path.exists(self.bert_model_path + "pytorch_model.bin") is False: convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( self.bert_model_path + 'bert_model.ckpt', self.bert_model_path + 'bert_config.json', self.bert_model_path + 'pytorch_model.bin') # 加载预训练模型 model = BertNeuralNet.from_pretrained(self.bert_model_path, cache_dir=None) #model.load_state_dict(torch.load("/root/nb/data/model/model[bert][1234][2][17][train2_simple_target][0.9419].bin")) model.load_state_dict( torch.load( "/root/nb/data/model/model[bert][1234][1][20][train2_simple_target][0.9395].bin" )) model.zero_grad() model = model.to(self.device) # 不同的参数组设置不同的 weight_decay param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] epoch_steps = int(self.train_len / self.base_batch_size / accumulation_steps) num_train_optimization_steps = int(self.epochs * epoch_steps) valid_every = math.floor(epoch_steps / 10) optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.05, t_total=num_train_optimization_steps) # 渐变学习速率 #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.6 ** epoch) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # 开始训练 for epoch in range(self.epochs): train_start_time = time.time() model.train() optimizer.zero_grad() # 加载每个 batch 并训练 for i, batch_data in enumerate(train_loader): x_batch = batch_data[0] y_batch = batch_data[1] target_weight_batch = batch_data[2] aux_weight_batch = batch_data[3] identity_weight_batch = batch_data[4] x_mask = batch_data[5] y_pred = model(x_batch, attention_mask=x_mask, labels=None) target_loss = self.custom_loss(y_pred, y_batch, epoch, target_weight_batch, aux_weight_batch, identity_weight_batch) loss = target_loss with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (i + 1) % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() # 验证 if (i + 1) % valid_every == 0: valid_start_time = time.time() model.eval() y_pred = np.zeros((len(self.train_df) - self.train_len)) for j, valid_batch_data in enumerate(valid_loader): x_batch = valid_batch_data[0] x_mask = valid_batch_data[2] batch_y_pred = self.sigmoid( model(x_batch, attention_mask=x_mask, labels=None).detach().cpu().numpy())[:, 0] y_pred[j * self.base_batch_size:(j + 1) * self.base_batch_size] = batch_y_pred # 计算得分 auc_score = self.evaluator.get_final_metric(y_pred) print( "epoch: %d duration: %d min auc_score: %.4f" % (epoch, int( (time.time() - train_start_time) / 60), auc_score)) if not self.debug_mode: state_dict = model.state_dict() stage = int((i + 1) / valid_every) train_duration = int( (time.time() - train_start_time) / 60) valid_duration = int( (time.time() - valid_start_time) / 60) if epoch == 0 and stage == 1: # model[bert][seed][epoch][stage][model_name][stage_train_duration][valid_duration][score].bin model_name = "model2/model_bert_%d_%d_%d_%s_%dmin_%dmin_%.4f.bin" % ( self.seed, epoch + 1, stage, self.model_name, train_duration, valid_duration, auc_score) else: # model[bert][seed][epoch][stage][model_name][score].bin model_name = "model2/model_bert_%d_%d_%d_%s_%.4f.bin" % ( self.seed, epoch + 1, stage, self.model_name, auc_score) torch.save(state_dict, os.path.join(self.data_dir, model_name)) model.train() # del 训练相关输入和模型 training_history = [ train_loader, valid_loader, model, optimizer, param_optimizer, optimizer_grouped_parameters ] for variable in training_history: del variable gc.collect()
import os from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch model_path = "/root/nb/data/nl2sql_data/chinese_L-12_H-768_A-12/" if os.path.exists(model_path + "pytorch_model.bin") is False: convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( model_path + 'bert_model.ckpt', model_path + 'bert_config.json', model_path + 'pytorch_model.bin') """ get /root/nb/data/nl2sql_data/chinese_L-12_H-768_A-12/pytorch_model.bin /Users/hedongfeng/Desktop/ put /Users/hedongfeng/PycharmProjects/unintended_bias/data/nl2sql_data.zip /root/nb/data/ """
MAX_SEQUENCE_LENGTH = 300 SEED = 1234 EPOCHS = 10 data_dir = './data/text_data.csv' working_dir = './' valid_size = 100000 num_to_load = 100000 target_column = 'target' from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch from pytorch_pretrained_bert import BertTokenizer, BertForSequenceClassification, BertAdam BERT_MODEL_PATH = './bert_model/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/' convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( BERT_MODEL_PATH + 'bert_model.ckpt', BERT_MODEL_PATH + 'bert_config.json', working_dir + 'pytorch_model.bin') shutil.copyfile(BERT_MODEL_PATH + 'bert_config.json', working_dir + 'bert_config.json') #bert config file from pytorch_pretrained_bert import BertConfig bert_config = BertConfig( '../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/' + 'bert_config.json') #convert text to bert format def convert_lines(example, max_seq_length, tokenizer):
import torch from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM BERT_MODEL_PATH = '../models/rubert_cased_deeppavlov/' tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH, cache_dir=None,do_lower_case=False) convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( BERT_MODEL_PATH + 'bert_model.ckpt', BERT_MODEL_PATH + 'bert_config.json', '../models/rubert_cased_torch/pytorch_model.bin')
import os from pytorch_pretrained_bert.convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch # in_dir = '/media/dpappas/dpappas_data/biobert/pubmed_pmc_470k/' in_dir = '/media/dpappas/dpappas_data/biobert/biobert_pubmed/' out_bin_path = os.path.join(in_dir, 'pytorch_model.bin') if (not os.path.exists(out_bin_path)): convert_tf_checkpoint_to_pytorch(os.path.join(in_dir, 'biobert_model.ckpt'), os.path.join(in_dir, 'bert_config.json'), out_bin_path)
BERT_MODEL_PATH = Path('uncased_L-12_H-768_A-12/') #BERT_MODEL_PATH = Path('/content/BERT') !pip install pytorch-pretrained-bert # Add the Bert Pytorch repo to the PATH using files from: https://github.com/huggingface/pytorch-pretrained-BERT package_dir_a = "../input/ppbert/pytorch-pretrained-bert/pytorch-pretrained-BERT" sys.path.insert(0, package_dir_a) from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch from pytorch_pretrained_bert import BertTokenizer, BertForSequenceClassification, BertAdam, BertConfig # Translate model from tensorflow to pytorch convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( str(BERT_MODEL_PATH / 'bert_model.ckpt'), str(BERT_MODEL_PATH / 'bert_config.json'), str(WORK_DIR / 'pytorch_model.bin') ) shutil.copyfile(BERT_MODEL_PATH / 'bert_config.json', WORK_DIR / 'bert_config.json') bert_config = BertConfig(str(BERT_MODEL_PATH / 'bert_config.json')) # Converting the lines to BERT format def convert_lines(example, max_seq_length, tokenizer): max_seq_length -= 2 all_tokens = [] longer = 0 for text in tqdm_notebook(example): tokens_a = tokenizer.tokenize(text) if len(tokens_a) > max_seq_length: tokens_a = tokens_a[:max_seq_length]
#encoding:utf-8 import os from pybert.config.basic_config import configs as config from pytorch_pretrained_bert.convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch if __name__ == "__main__": os.system('cp {config} {save_path}'.format(config = config['pretrained']['bert']['bert_config_file'], save_path =config['pretrained']['bert']['bert_model_dir'])) convert_tf_checkpoint_to_pytorch(config['pretrained']['bert']['tf_checkpoint_path'], config['pretrained']['bert']['bert_config_file'], config['pretrained']['bert']['pytorch_model_path']) # coding=utf-8 # Copyright 2018 The HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Convert BERT checkpoint.""" #
tokenizer = BertTokenizer.from_pretrained( args['bert_model'], do_lower_case=args['do_lower_case']) train_examples = None num_train_steps = None if args['do_train']: train_examples = processor.get_train_examples(args['full_data_dir'], size=args['train_size']) # train_examples = processor.get_train_examples(args['data_dir'], size=args['train_size']) num_train_steps = int( len(train_examples) / args['train_batch_size'] / args['gradient_accumulation_steps'] * args['num_train_epochs']) convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch('/scratch/cc5048/bert/uncased_L-12_H-768_A-12/bert_model.ckpt', '/scratch/cc5048/bert/uncased_L-12_H-768_A-12/bert_config.json', \ '/scratch/cc5048/bert/uncased_L-12_H-768_A-12/pytorch_model.bin') # Prepare model def get_model(): # pdb.set_trace() if model_state_dict: model = BertForMultiLabelSequenceClassification.from_pretrained( args['bert_model'], num_labels=num_labels, state_dict=model_state_dict) else: model = BertForMultiLabelSequenceClassification.from_pretrained( args['bert_model'], num_labels=num_labels) return model model = get_model()
import torch import os, sys from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM, BertForSequenceClassification if sys.argv[2] == 'cased': VOCAB = './bert_cased/vocab.txt' MODEL = './bert_cased' else: VOCAB = './bert_uncased/vocab.txt' MODEL = './bert_uncased' #convert model to pytorch_model if os.path.exists(MODEL + '/pytorch_model.bin') == False: from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( MODEL + '/bert_model.ckpt', MODEL + '/bert_config.json', MODEL + '/pytorch_model.bin') #load data train_data = pd.read_csv('./project2_data/olid-training-v1.0.tsv', sep='\t', index_col='id') if sys.argv[1] == 'A': test = pd.read_csv('./project2_data/testset-levela.tsv', sep='\t', index_col='id') train = pd.DataFrame({ 'tweet': train_data['tweet'], 'label': train_data['subtask_a'] }) elif sys.argv[1] == 'B':
def train(self): if self.debug_mode: self.epochs = 1 # 加载 dataloader train_loader, valid_loader = self.create_dataloader() # 训练 self.seed_everything() lr = 2e-5 accumulation_steps = math.ceil(self.batch_size / self.base_batch_size) # 预训练 bert 转成 pytorch if os.path.exists(self.work_dir + 'pytorch_model.bin') is False: print("Convert pre-trained model") convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( self.bert_model_path + 'bert_model.ckpt', self.bert_model_path + 'bert_config.json', self.work_dir + 'pytorch_model.bin') shutil.copyfile(self.bert_model_path + 'bert_config.json', self.work_dir + 'bert_config.json') # 加载预训练模型 print("Load pre-trained model") model = BertNeuralNet.from_pretrained(self.work_dir, cache_dir=None) model.zero_grad() model = model.to(self.device) # 不同的参数组设置不同的 weight_decay param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] epoch_steps = int(self.train_len * 0.5 / self.base_batch_size / accumulation_steps) num_train_optimization_steps = int(self.epochs * epoch_steps) valid_every = math.floor(epoch_steps * accumulation_steps / 5) optimizer = BertAdam(optimizer_grouped_parameters, lr=lr, warmup=0.05, t_total=num_train_optimization_steps) # 渐变学习速率 #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.6 ** epoch) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # 开始训练 print("Train") best_auc_score_1 = 0 best_auc_score_2 = 0 best_auc_score_3 = 0 best_auc_score_4 = 0 f_log = open("train_log.txt", "w") for epoch in range(self.epochs): model.train() optimizer.zero_grad() # 加载每个 batch 并训练 train_start_time = time.time() for i, batch_data in enumerate(train_loader): x_batch = batch_data[0] y_batch = batch_data[1] target_weight_batch = batch_data[2] aux_weight_batch = batch_data[3] identity_weight_batch = batch_data[4] np_weight_batch = batch_data[5] np_identity_weight_batch = batch_data[6] y_pred = model(x_batch.to(self.device), attention_mask=(x_batch > 0).to(self.device), labels=None) target_loss, aux_loss, identity_loss, np_loss = self.custom_loss(y_pred, y_batch, epoch, target_weight_batch, aux_weight_batch, identity_weight_batch, np_weight_batch) loss = target_loss + aux_loss + identity_loss + np_loss with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (i + 1) % accumulation_steps == 0: optimizer.step() optimizer.zero_grad() # 验证 if (i + 1) % valid_every == 0: model.eval() stage = int((i + 1) / valid_every) train_stage_duration = int((time.time() - train_start_time) / 60) valid_start_time = time.time() y_pred = np.zeros((len(self.train_df) - self.train_len)) for j, valid_batch_data in enumerate(valid_loader): x_batch = valid_batch_data[0] batch_y_pred = self.sigmoid(model(x_batch.to(self.device), attention_mask=(x_batch > 0).to(self.device), labels=None).detach().cpu().numpy())[:, 0] y_pred[j * self.base_batch_size: (j + 1) * self.base_batch_size] = batch_y_pred # 计算得分 auc_score = self.evaluator.get_final_metric(y_pred) valid_duration = int((time.time() - valid_start_time) / 60) train_start_time = time.time() f_log.write("epoch: %d stage: %d train_stage_duration: %dmin valid_duration: %dmin auc_score: %.4f\n" % (epoch, stage, train_stage_duration, valid_duration, auc_score)) print("epoch: %d stage: %d train_stage_duration: %dmin valid_duration: %dmin auc_score: %.4f" % (epoch, stage, train_stage_duration, valid_duration, auc_score)) if auc_score > best_auc_score_4: state_dict = model.state_dict() if auc_score > best_auc_score_1: best_auc_score_1 = auc_score torch.save(state_dict, "model1.bin") elif auc_score > best_auc_score_2: best_auc_score_2 = auc_score torch.save(state_dict, "model2.bin") elif auc_score > best_auc_score_3: best_auc_score_3 = auc_score torch.save(state_dict, "model3.bin") else: best_auc_score_4 = auc_score torch.save(state_dict, "model4.bin") with open("model_score.txt", "w") as f: f.write("model1: %.4f model2: %.4f model3: %.4f model4: %.4f" % (best_auc_score_1, best_auc_score_2, best_auc_score_3, best_auc_score_4)) print("model1: %.4f model2: %.4f model3: %.4f model4: %.4f" % (best_auc_score_1, best_auc_score_2, best_auc_score_3, best_auc_score_4)) model.train() if self.last is True: state_dict = model.state_dict() torch.save(state_dict, "model_last.bin") # del 训练相关输入和模型 training_history = [train_loader, valid_loader, model, optimizer, param_optimizer, optimizer_grouped_parameters] for variable in training_history: del variable gc.collect()
#!/usr/bin/python # coding: utf-8 import shutil from pytorch_pretrained_bert import convert_tf_checkpoint_to_pytorch BERT_MODEL_PATH = "models/chinese_L-12_H-768_A-12/" if __name__ == "__main__": convert_tf_checkpoint_to_pytorch.convert_tf_checkpoint_to_pytorch( BERT_MODEL_PATH + "bert_model.ckpt", BERT_MODEL_PATH + "bert_config.json", "models/pytorch_pretrain/pytorch_model.bin", ) shutil.copyfile( BERT_MODEL_PATH + "bert_config.json", "models/pytorch_pretrain/bert_config.json", ) shutil.copyfile(BERT_MODEL_PATH + "vocab.txt", "models/pytorch_pretrain/vocab.txt")