def do_infer(args): device = "cuda" if torch.cuda.is_available() else "cpu" config = BertConfig.from_pretrained(args.name) tokenizer = BertTokenizer.from_pretrained(args.name) cpe = ChineseAndPunctuationExtractor() label_map, id2label = json.load( open("../origin_data/labels2idx.json", encoding="utf8")) config.num_labels = len(label_map) features = from_file("../origin_data/duie_test1.json", tokenizer, label_map, cpe, args.max_length) bert = BertForTokenClassification.from_pretrained("./model_2.pt", config=config).to(device) infer_generator = batch_generator(features, args.batch_size, False) predict_logits = [] for step, batch in enumerate(infer_generator): batch_input_ids = batch[0].to(device=device) batch_input_mask = batch[1].to(device=device) batch_type_ids = batch[2].to(device=device) outputs = bert(batch_input_ids, batch_input_mask, batch_type_ids) batch_predict = torch.sigmoid(outputs.logits).cpu().detach().numpy() predict_logits.append(batch_predict) predict_logits = np.concatenate(predict_logits, axis=0) assert predict_logits.shape[0] == len(features) predict_logits[predict_logits >= 0.3] = 1 predict_logits[predict_logits < 0.3] = 0 predict_logits = predict_logits.astype(np.int).tolist() result = decoding(features, predict_logits, id2label) with open("result.json", "w", encoding="utf8") as outp: for line in result: outp.write(json.dumps(line, ensure_ascii=False) + "\n")
# 查看现在使用的设备 print('current device:', torch.cuda.current_device()) n_gpu = 1 params.n_gpu = n_gpu # Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) params.seed = args.seed if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) # Set the logger utils.set_logger(save=True, log_path=os.path.join(params.params_path, 'train.log')) logging.info("Model type: ") logging.info("device: {}".format(params.device)) logging.info('Init pre-train model...') bert_config = BertConfig.from_json_file(os.path.join(params.bert_model_dir, 'bert_config.json')) model = BertForTokenClassification(config=bert_config, params=params) nezha_utils.torch_init_model(model, os.path.join(params.bert_model_dir, 'pytorch_model.bin')) # 保存bert config model.to(params.device) if params.n_gpu > 1 and args.multi_gpu: model = torch.nn.DataParallel(model) logging.info('-done') # Train and evaluate the model logging.info("Starting training for {} epoch(s)".format(args.epoch_num)) train_and_evaluate(model, params, args.restore_file)
# 查看现在使用的设备 print('current device:', torch.cuda.current_device()) n_gpu = 1 params.n_gpu = n_gpu # Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) params.seed = args.seed if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) # Set the logger utils.set_logger(save=True, log_path=os.path.join(params.params_path, 'train.log')) logging.info("Model type: ") logging.info("device: {}".format(params.device)) logging.info('Init pre-train model...') model = BertForTokenClassification.from_pretrained(params.bert_model_dir, params=params) # 保存bert config model.to(params.device) if params.n_gpu > 1 and args.multi_gpu: model = torch.nn.DataParallel(model) logging.info('-done') # Train and evaluate the model logging.info("Starting training for {} epoch(s)".format(args.epoch_num)) train_and_evaluate(model, params, args.restore_file)
params.seed = args.seed if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) # Set the logger utils.set_logger(save=True, log_path=os.path.join(params.params_path, 'train.log')) logging.info( f"Model type: {params.pre_model_type}_{params.ds_encoder_type}_CRF") logging.info("device: {}".format(params.device)) logging.info('Init pre-train model...') if params.pre_model_type == 'NEZHA': bert_config = NEZHAConfig.from_json_file( os.path.join(params.bert_model_dir, 'bert_config.json')) model = BertForTokenClassification(config=bert_config, params=params) # NEZHA init torch_init_model( model, os.path.join(params.bert_model_dir, 'pytorch_model.bin')) elif params.pre_model_type == 'RoBERTa': bert_config = BertConfig.from_json_file( os.path.join(params.bert_model_dir, 'bert_config.json')) model = BertForTokenClassification.from_pretrained( config=bert_config, pretrained_model_name_or_path=params.bert_model_dir, params=params) else: raise ValueError( 'Pre-train Model type must be NEZHA or ELECTRA or RoBERTa!') logging.info('-done')
def do_train(args): device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = BertTokenizer.from_pretrained(args.name) cpe = ChineseAndPunctuationExtractor() label_map, _ = json.load( open("../origin_data/labels2idx.json", encoding="utf8")) train_features = from_file("../origin_data/duie_train.json", tokenizer, label_map, cpe, args.max_length) dev_features = from_file("../origin_data/duie_dev.json", tokenizer, label_map, cpe, args.max_length) counts = len(train_features) logger.info( f"Train dataset size: {counts}, Dev dataset size: {len(dev_features)}") if len(train_features) % args.batch_size == 0: one_epoch_steps = len(train_features) // args.batch_size else: one_epoch_steps = len(train_features) // args.batch_size + 1 total_steps = one_epoch_steps * args.epochs logger.info(f"Training step: {total_steps}") bert = BertForTokenClassification.from_pretrained( args.name, num_labels=len(label_map)).to(device) optimizer = AdamW(params=bert.parameters(), lr=args.lr) scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int( total_steps * 0.1), num_training_steps=total_steps) best_eval_f1 = 0 min_eval_loss = float("inf") for epoch in range(args.epochs): train_generator = batch_generator(train_features, args.batch_size) valid_generator = batch_generator(dev_features, args.batch_size, False) logger.info(f"======== Epoch {epoch + 1:} / {args.epochs:} ========") logger.info("Training...") bert.train() start_train = time.time() total_train_loss = 0 for step, batch in enumerate(train_generator): batch_input_ids = batch[0].to(device=device) batch_input_mask = batch[1].to(device=device) batch_type_ids = batch[2].to(device=device) batch_labels = batch[3].to(device=device) outputs = bert(batch_input_ids, batch_input_mask, batch_type_ids, labels=batch_labels) bert.zero_grad() outputs.loss.backward() torch.nn.utils.clip_grad_norm_(bert.parameters(), 1.0) optimizer.step() scheduler.step() total_train_loss += outputs.loss.item() if step % 100 == 0: logger.info( f" Step: {step+1:>5}/{one_epoch_steps:>1}, current loss: {outputs.loss.item():.6f}" ) average_train_loss = total_train_loss / (step + 1) trainingtime = time.time() - start_train logger.info( f" Average training BCELoss: {average_train_loss:.6f}; Take time: {trainingtime:.3f}" ) logger.info("Running Validation...") bert.eval() start_eval = time.time() total_eval_loss = 0 total_eval_f1 = 0 for step, batch in enumerate(valid_generator): batch_input_ids = batch[0].to(device=device) batch_input_mask = batch[1].to(device=device) batch_type_ids = batch[2].to(device=device) batch_labels = batch[3].to(device=device) with torch.no_grad(): outputs = bert(batch_input_ids, batch_input_mask, batch_type_ids, labels=batch_labels) total_eval_loss += outputs.loss.item() # total_eval_f1+=metric(outputs.logits, batch_labels) average_eval_loss = total_eval_loss / (step + 1) # average_eval_f1=total_eval_f1/(step+1) validation_time = time.time() - start_eval logger.info( f" Average eval BCELoss: {average_eval_loss:.6f}; Take time: {validation_time:.3f}" ) # if average_eval_f1>best_eval_f1: # best_eval_f1=average_eval_f1 # logger.info(" Save model...") # torch.save(bert.state_dict(),f"model_{epoch}.pt") if average_eval_loss < min_eval_loss: min_eval_loss = average_eval_loss logger.info(" Save model...") torch.save(bert.state_dict(), f"model_{epoch}.pt")
def train(train_iter, test_iter, config): """""" # Prepare model # Prepare model # reload weights from restore_file if specified 如果指定就加载已经训练的权重 if config.pretrainning_model == 'nezha': #哪吒模型 Bert_config = BertConfig.from_json_file(config.bert_config_file) model = BertForTokenClassification(config=Bert_config, params=config) nezha_utils.torch_init_model(model, config.bert_file) elif config.pretrainning_model == 'albert': Bert_config = AlbertConfig.from_pretrained(config.model_path) model = BertForTokenClassification.from_pretrained(config.model_path, config=Bert_config) else: Bert_config = RobertaConfig.from_pretrained(config.bert_config_file, output_hidden_states=True) model = BertForTokenClassification.from_pretrained( config=Bert_config, params=config, pretrained_model_name_or_path=config.model_path) Bert_config.output_hidden_states = True # 获取每一层的输出 model.to(device) """多卡训练""" if n_gpu > 1: model = torch.nn.DataParallel(model) # optimizer # Prepare optimizer # fine-tuning # 取模型权重 param_optimizer = list(model.named_parameters()) # pretrain model param 预训练的参数 param_pre = [(n, p) for n, p in param_optimizer if 'bert' in n or 'electra' in n] # nezha的命名为bert # middle model param 中等参数 param_middle = [ (n, p) for n, p in param_optimizer if not any([s in n for s in ('bert', 'crf', 'electra', 'albert')]) or 'dym_weight' in n ] # crf param # 不进行衰减的权重 no_decay = ['bias', 'LayerNorm', 'dym_weight', 'layer_norm'] # 将权重分组 optimizer_grouped_parameters = [ # pretrain model param 预训练的参数 # 衰减 { 'params': [p for n, p in param_pre if not any(nd in n for nd in no_decay)], 'weight_decay': config.decay_rate, 'lr': config.embed_learning_rate }, # 不衰减 { 'params': [p for n, p in param_pre if any(nd in n for nd in no_decay)], 'weight_decay': 0.0, 'lr': config.embed_learning_rate }, # middle model 中等参数 # 衰减 { 'params': [ p for n, p in param_middle if not any(nd in n for nd in no_decay) ], 'weight_decay': config.decay_rate, 'lr': config.learning_rate }, # 不衰减 { 'params': [p for n, p in param_middle if any(nd in n for nd in no_decay)], 'weight_decay': 0.0, 'lr': config.learning_rate }, ] num_train_optimization_steps = train_iter.num_records // config.gradient_accumulation_steps * config.train_epoch optimizer = BertAdam(optimizer_grouped_parameters, warmup=config.warmup_proportion, schedule="warmup_cosine", t_total=num_train_optimization_steps) logger.info("***** Running training *****") logger.info(" Batch size = %d", config.batch_size) logger.info(" Num epochs = %d", config.train_epoch) logger.info(" Learning rate = %f", config.learning_rate) cum_step = 0 timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(config.save_model, "runs_" + str(gpu_id), timestamp)) if not os.path.exists(out_dir): os.makedirs(out_dir) print("Writing to {}\n".format(out_dir)) draw_step_list = [] draw_loss_list = [] for i in range(config.train_epoch): model.train() for input_ids_list, input_mask_list, segment_ids_list, label_ids_list, tokens_list in tqdm( train_iter): # 转成张量 loss = model(input_ids=list2ts2device(input_ids_list), token_type_ids=list2ts2device(segment_ids_list), attention_mask=list2ts2device(input_mask_list), labels=list2ts2device(label_ids_list)) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. # 梯度累加 if config.gradient_accumulation_steps > 1: loss = loss / config.gradient_accumulation_steps if cum_step % 10 == 0: draw_step_list.append(cum_step) draw_loss_list.append(loss) if cum_step % 100 == 0: format_str = 'step {}, loss {:.4f} lr {:.5f}' print( format_str.format(cum_step, loss, config.learning_rate)) loss.backward() # 反向传播,得到正常的grad if (cum_step + 1) % config.gradient_accumulation_steps == 0: # performs updates using calculated gradients optimizer.step() model.zero_grad() cum_step += 1 p, r, f1 = set_test(model, test_iter) # lr_scheduler学习率递减 step print('dev set : step_{},precision_{}, recall_{}, F1_{}'.format( cum_step, p, r, f1)) # 保存模型 model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self output_model_file = os.path.join( os.path.join( out_dir, 'model_{:.4f}_{:.4f}_{:.4f}_{}.bin'.format( p, r, f1, str(cum_step)))) torch.save(model_to_save, output_model_file) with open(Config().processed_data + 'step_loss_data.pickle', 'wb') as mf: draw_dict = {'step': draw_step_list, 'loss': draw_loss_list} pickle.dump(draw_dict, mf)
else: # 设置模型使用的gpu torch.cuda.set_device(args.device_id) # 查看现在使用的设备 print('current device:', torch.cuda.current_device()) n_gpu = 1 params.n_gpu = n_gpu # Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) params.seed = args.seed if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) # Set the logger utils.set_logger(save=True, log_path=os.path.join(params.params_path, 'train.log')) logging.info("Model type: ") logging.info("device: {}".format(params.device)) logging.info('Init pre-train model...') bert_config = NEZHAConfig.from_json_file(os.path.join(params.bert_model_dir, 'bert_config.json')) model = BertForTokenClassification(config=bert_config, params=params) # NEZHA init torch_init_model(model, os.path.join(params.bert_model_dir, 'pytorch_model.bin')) logging.info('-done') # Train and evaluate the model logging.info("Starting training for {} epoch(s)".format(args.epoch_num)) train_and_evaluate(model, params, args.restore_file)