tokenizer=tokenizer, max_seq_len=args.max_seq_len) # Construct transfer learning network # Use "pooled_output" for classification tasks on an entire sentence. # Use "sequence_output" for token-level output. pooled_output = outputs["pooled_output"] # Select fine-tune strategy, setup config and fine-tune strategy = hub.AdamWeightDecayStrategy( warmup_proportion=args.warmup_proportion, weight_decay=args.weight_decay, learning_rate=args.learning_rate) # Setup RunConfig for PaddleHub Fine-tune API config = hub.RunConfig(use_data_parallel=args.use_data_parallel, use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # Define a regression fine-tune task by PaddleHub's API reg_task = hub.RegressionTask(dataset=dataset, feature=pooled_output, config=config) # Fine-tune and evaluate by PaddleHub's API # will finish training, evaluation, testing, save model automatically reg_task.finetune_and_eval()
inputs["segment_ids"].name, inputs["input_mask"].name, ] if args.use_taskid: feed_list.append(inputs["task_ids"].name) # Select finetune strategy, setup config and finetune strategy = hub.AdamWeightDecayStrategy(weight_decay=args.weight_decay, learning_rate=args.learning_rate, lr_scheduler="linear_decay") # Setup runing config for PaddleHub Finetune API config = hub.RunConfig(use_data_parallel=args.use_data_parallel, use_pyreader=args.use_pyreader, use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # Define a regression finetune task by PaddleHub's API reg_task = hub.RegressionTask(data_reader=reader, feature=pooled_output, feed_list=feed_list, config=config) # Finetune and evaluate by PaddleHub's API # will finish training, evaluation, testing, save model automatically reg_task.finetune_and_eval()
# Construct transfer learning network # Use "pooled_output" for classification tasks on an entire sentence. # Use "sequence_output" for token-level output. pooled_output = outputs["pooled_output"] # Setup RunConfig for PaddleHub Fine-tune API config = hub.RunConfig(use_data_parallel=False, use_cuda=args.use_gpu, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=hub.AdamWeightDecayStrategy()) # Define a regression fine-tune task by PaddleHub's API reg_task = hub.RegressionTask( feature=pooled_output, config=config, ) # STS-B has provided the predict data, and the dataset has process it. If you want to process customized data, # see the predict.py in text_classification demo # Use the appropriate tokenizer to preprocess the data # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9 if module.name == "ernie_tiny": tokenizer = hub.ErnieTinyTokenizer( vocab_file=module.get_vocab_path(), spm_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path()) else: tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path()) dataset = hub.dataset.GLUE("STS-B", tokenizer=tokenizer,