# Construct transfer learning network # Use "pooled_output" for classification tasks on an entire sentence. # Use "sequence_output" for token-level output. pooled_output = outputs["pooled_output"] # Setup feed list for data feeder # Must feed all the tensor of module need feed_list = [ inputs["input_ids"].name, inputs["position_ids"].name, inputs["segment_ids"].name, inputs["input_mask"].name, ] # Select finetune strategy, setup config and finetune strategy = hub.ULMFiTStrategy(learning_rate=args.learning_rate, params_layer=module.get_params_layer()) # Setup runing config for PaddleHub Finetune API config = hub.RunConfig(use_data_parallel=args.use_data_parallel, use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy, eval_interval=100) # Define a classfication finetune task by PaddleHub's API cls_task = hub.TextClassifierTask(data_reader=reader, feature=pooled_output, feed_list=feed_list, num_classes=dataset.num_labels,
# Use the appropriate tokenizer to preprocess the data set # For ernie_tiny, it use BertTokenizer too. tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path()) dataset = hub.dataset.Couplet( tokenizer=tokenizer, max_seq_len=args.max_seq_len) # Construct transfer learning network # Use "pooled_output" for classification tasks on an entire sentence. # Use "sequence_output" for token-level output. pooled_output = outputs["pooled_output"] sequence_output = outputs["sequence_output"] # Select fine-tune strategy, setup config and fine-tune strategy = hub.ULMFiTStrategy( learning_rate=args.learning_rate, optimizer_name="adam", cut_fraction=args.cut_fraction, dis_params_layer=module.get_params_layer(), frz_params_layer=module.get_params_layer()) # Setup RunConfig for PaddleHub Fine-tune API config = hub.RunConfig( use_data_parallel=args.use_data_parallel, use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # Define a classfication fine-tune task by PaddleHub's API gen_task = hub.TextGenerationTask( dataset=dataset,