Exemplo n.º 1
0
    # Construct transfer learning network
    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_output" for token-level output.
    pooled_output = outputs["pooled_output"]

    # Setup feed list for data feeder
    # Must feed all the tensor of module need
    feed_list = [
        inputs["input_ids"].name,
        inputs["position_ids"].name,
        inputs["segment_ids"].name,
        inputs["input_mask"].name,
    ]

    # Select finetune strategy, setup config and finetune
    strategy = hub.ULMFiTStrategy(learning_rate=args.learning_rate,
                                  params_layer=module.get_params_layer())

    # Setup runing config for PaddleHub Finetune API
    config = hub.RunConfig(use_data_parallel=args.use_data_parallel,
                           use_cuda=args.use_gpu,
                           num_epoch=args.num_epoch,
                           batch_size=args.batch_size,
                           checkpoint_dir=args.checkpoint_dir,
                           strategy=strategy,
                           eval_interval=100)

    # Define a classfication finetune task by PaddleHub's API
    cls_task = hub.TextClassifierTask(data_reader=reader,
                                      feature=pooled_output,
                                      feed_list=feed_list,
                                      num_classes=dataset.num_labels,
Exemplo n.º 2
0
    # Use the appropriate tokenizer to preprocess the data set
    # For ernie_tiny, it use BertTokenizer too.
    tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())
    dataset = hub.dataset.Couplet(
        tokenizer=tokenizer, max_seq_len=args.max_seq_len)

    # Construct transfer learning network
    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_output" for token-level output.
    pooled_output = outputs["pooled_output"]
    sequence_output = outputs["sequence_output"]

    # Select fine-tune strategy, setup config and fine-tune
    strategy = hub.ULMFiTStrategy(
        learning_rate=args.learning_rate,
        optimizer_name="adam",
        cut_fraction=args.cut_fraction,
        dis_params_layer=module.get_params_layer(),
        frz_params_layer=module.get_params_layer())

    # Setup RunConfig for PaddleHub Fine-tune API
    config = hub.RunConfig(
        use_data_parallel=args.use_data_parallel,
        use_cuda=args.use_gpu,
        num_epoch=args.num_epoch,
        batch_size=args.batch_size,
        checkpoint_dir=args.checkpoint_dir,
        strategy=strategy)

    # Define a classfication fine-tune task by PaddleHub's API
    gen_task = hub.TextGenerationTask(
        dataset=dataset,