def do_predict(): device = torch.device(args.device) logger = log("test") # Reads label_map. label_map_path = os.path.join(args.data_path, "predicate2id.json") if not (os.path.exists(label_map_path) and os.path.isfile(label_map_path)): sys.exit("{} dose not exists or is not a file.".format(label_map_path)) with open(label_map_path, 'r', encoding='utf8') as fp: label_map = json.load(fp) num_classes = (len(label_map.keys()) - 2) * 2 + 2 # Loads pretrained model ERNIE logger.info("Loading the model and tokenizer...") try: model = ErnieForTokenClassification(num_classes=num_classes) except Exception as e: logger.error(e) raise Exception("Loading model error: ", e) tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0") criterion = BCELossForDuIE() logger.info("Finish loading.") # Loads dataset. test_dataset = DuIEDataset.from_file(args.predict_data_file, tokenizer, args.max_seq_length, True) collator = DataCollator() test_data_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collator) # Loads model parameters. if not (os.path.exists(args.init_checkpoint) and os.path.isfile(args.init_checkpoint)): sys.exit("wrong directory: init checkpoints {} not exist".format( args.init_checkpoint)) state_dict = torch.load(args.init_checkpoint) model.load_state_dict(state_dict) model.to(device) # Does predictions. logger.info("\n=====start predicting=====") evaluate(model, criterion, test_data_loader, args.predict_data_file, "predict", logger) logger.info("=====predicting complete=====")
def do_predict(): paddle.set_device(args.device) # Reads label_map. label_map_path = os.path.join(args.data_path, "predicate2id.json") if not (os.path.exists(label_map_path) and os.path.isfile(label_map_path)): sys.exit("{} dose not exists or is not a file.".format(label_map_path)) with open(label_map_path, 'r', encoding='utf8') as fp: label_map = json.load(fp) num_classes = (len(label_map.keys()) - 2) * 2 + 2 # Loads pretrained model ERNIE model = ErnieForTokenClassification.from_pretrained( "ernie-1.0", num_classes=num_classes) tokenizer = ErnieTokenizer.from_pretrained("ernie-1.0") criterion = BCELossForDuIE() # Loads dataset. test_dataset = DuIEDataset.from_file(args.predict_data_file, tokenizer, args.max_seq_length, True) collator = DataCollator() test_batch_sampler = paddle.io.BatchSampler(test_dataset, batch_size=args.batch_size, shuffle=False, drop_last=True) test_data_loader = DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, collate_fn=collator, return_list=True) # Loads model parameters. if not (os.path.exists(args.init_checkpoint) and os.path.isfile(args.init_checkpoint)): sys.exit("wrong directory: init checkpoints {} not exist".format( args.init_checkpoint)) state_dict = paddle.load(args.init_checkpoint) model.set_dict(state_dict) # Does predictions. print("\n=====start predicting=====") evaluate(model, criterion, test_data_loader, args.predict_data_file, "predict") print("=====predicting complete=====")
def do_train(): if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() # Reads label_map. label_map_path = os.path.join(args.data_path, "predicate2id.json") if not (os.path.exists(label_map_path) and os.path.isfile(label_map_path)): sys.exit("{} dose not exists or is not a file.".format(label_map_path)) with open(label_map_path, 'r', encoding='utf8') as fp: label_map = json.load(fp) num_classes = (len(label_map.keys()) - 2) * 2 + 2 # Loads pretrained model ERNIE model = ErnieForTokenClassification.from_pretrained( "ernie-1.0", num_classes=num_classes) model = paddle.DataParallel(model) tokenizer = ErnieTokenizer.from_pretrained("ernie-1.0") criterion = BCELossForDuIE() # Loads dataset. train_dataset = DuIEDataset.from_file( os.path.join(args.data_path, 'train_data.json'), tokenizer, args.max_seq_length, True) train_batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True) collator = DataCollator() train_data_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, collate_fn=collator, return_list=True) eval_file_path = os.path.join(args.data_path, 'dev_data.json') test_dataset = DuIEDataset.from_file(eval_file_path, tokenizer, args.max_seq_length, True) test_batch_sampler = paddle.io.BatchSampler(test_dataset, batch_size=args.batch_size, shuffle=False, drop_last=True) test_data_loader = DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, collate_fn=collator, return_list=True) # Defines learning rate strategy. steps_by_epoch = len(train_data_loader) num_training_steps = steps_by_epoch * args.num_train_epochs lr_scheduler = LinearDecayWithWarmup(args.learning_rate, num_training_steps, args.warmup_ratio) optimizer = paddle.optimizer.AdamW( learning_rate=lr_scheduler, parameters=model.parameters(), weight_decay=args.weight_decay, apply_decay_param_fun=lambda x: x in [ p.name for n, p in model.named_parameters() if not any(nd in n for nd in ["bias", "norm"]) ]) # Starts training. global_step = 0 logging_steps = 50 save_steps = 10000 tic_train = time.time() for epoch in range(args.num_train_epochs): print("\n=====start training of %d epochs=====" % epoch) tic_epoch = time.time() model.train() for step, batch in enumerate(train_data_loader): input_ids, seq_lens, tok_to_orig_start_index, tok_to_orig_end_index, labels = batch logits = model(input_ids=input_ids) mask = (input_ids != 0).logical_and((input_ids != 1)).logical_and( (input_ids != 2)) loss = criterion(logits, labels, mask) loss.backward() optimizer.step() lr_scheduler.step() optimizer.clear_grad() loss_item = loss.numpy().item() if global_step % logging_steps == 0 and paddle.distributed.get_rank( ) == 0: print( "epoch: %d / %d, steps: %d / %d, loss: %f, speed: %.2f step/s" % (epoch, args.num_train_epochs, step, steps_by_epoch, loss_item, logging_steps / (time.time() - tic_train))) tic_train = time.time() if global_step % save_steps == 0 and global_step != 0 and paddle.distributed.get_rank( ) == 0: print("\n=====start evaluating ckpt of %d steps=====" % global_step) precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval") print("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1)) if (not args.n_gpu > 1) or paddle.distributed.get_rank() == 0: print("saving checkpoing model_%d.pdparams to %s " % (global_step, args.output_dir)) paddle.save( model.state_dict(), os.path.join(args.output_dir, "model_%d.pdparams" % global_step)) model.train() # back to train mode global_step += 1 tic_epoch = time.time() - tic_epoch print("epoch time footprint: %d hour %d min %d sec" % (tic_epoch // 3600, (tic_epoch % 3600) // 60, tic_epoch % 60)) # Does final evaluation. if paddle.distributed.get_rank() == 0: print("\n=====start evaluating last ckpt of %d steps=====" % global_step) precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval") print("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1)) if (not args.n_gpu > 1) or paddle.distributed.get_rank() == 0: paddle.save( model.state_dict(), os.path.join(args.output_dir, "model_%d.pdparams" % global_step)) print("\n=====training complete=====")
def do_train(): device = torch.device(args.device) logger = log("train") # Reads label_map. label_map_path = os.path.join(args.data_path, "predicate2id.json") if not (os.path.exists(label_map_path) and os.path.isfile(label_map_path)): sys.exit("{} dose not exists or is not a file.".format(label_map_path)) with open(label_map_path, 'r', encoding='utf8') as fp: label_map = json.load(fp) num_classes = (len(label_map.keys()) - 2) * 2 + 2 # Loads pretrained model ERNIE logger.info("Loading the model and tokenizer...") try: model = ErnieForTokenClassification(num_classes=num_classes) except Exception as e: logger.error(e) raise Exception("Loading model error: ", e) tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0") logger.info("Finish loading ernie model and tokenizer.") criterion = BCELossForDuIE() # Loads dataset. logger.info("Loading the train and develop dataset...") train_dataset = DuIEDataset.from_file( os.path.join(args.data_path, 'train_data.json'), tokenizer, args.max_seq_length, True) collator = DataCollator() train_data_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collator) eval_file_path = os.path.join(args.data_path, 'dev_data.json') test_dataset = DuIEDataset.from_file(eval_file_path, tokenizer, args.max_seq_length, True) test_data_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=collator) logger.info("Finish loading dataset.") steps_by_epoch = len(train_data_loader) # Generate parameter names needed to perform weight decay. # All bias and LayerNorm parameters are excluded. decay_params, no_decay_params = [], [] for name, param in model.named_parameters(): if "bias" in name or "norm" in name: no_decay_params.append(param) else: decay_params.append(param) optimizer = torch.optim.AdamW(params=[{ 'params': decay_params, 'weight_decay': args.weight_decay }, { 'params': no_decay_params, 'weight_decay': 0.0 }], lr=args.learning_rate) # Starts training. model.to(device) global_step = 0 logging_steps = 50 save_steps = 10000 tic_train = time.time() for epoch in range(args.num_train_epochs): logger.info("\n=====start training of %d epochs=====" % epoch) tic_epoch = time.time() model.train() for step, batch in enumerate(train_data_loader): input_ids, _, _, _, labels = batch if args.device == 'cuda': input_ids = input_ids.cuda() labels = labels.cuda() input_ids = Variable(input_ids) labels = Variable(labels) logits = model(input_ids=input_ids) mask = (input_ids != 0) & (input_ids != 1) & (input_ids != 2) loss = criterion(logits, labels, mask) optimizer.zero_grad() loss.backward() optimizer.step() loss_item = loss.detach().cpu().numpy().item() global_step += 1 if global_step % logging_steps == 0: print( "epoch: %d / %d, steps: %d / %d, loss: %f, speed: %.2f step/s" % (epoch, args.num_train_epochs, step, steps_by_epoch, loss_item, logging_steps / (time.time() - tic_train))) tic_train = time.time() if global_step % save_steps == 0: logger.info("\n=====start evaluating ckpt of %d steps=====" % global_step) precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval", logger) logger.info("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1)) logger.info("saving checkpoing model_%d.pt to %s " % (global_step, args.output_dir)) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.save( model.state_dict(), os.path.join(args.output_dir, "model_%d.pt" % global_step)) model.train() # back to train mode tic_epoch = time.time() - tic_epoch logger.info("epoch time footprint: %d hour %d min %d sec" % (tic_epoch // 3600, (tic_epoch % 3600) // 60, tic_epoch % 60)) # Does final evaluation. logger.info("\n=====start evaluating last ckpt of %d steps=====" % global_step) precision, recall, f1 = evaluate(model, criterion, test_data_loader, eval_file_path, "eval", logger) logger.info("precision: %.2f\t recall: %.2f\t f1: %.2f\t" % (100 * precision, 100 * recall, 100 * f1)) torch.save(model.state_dict(), os.path.join(args.output_dir, "model_%d.pt" % global_step)) logger.info("\n=====training complete=====")