示例#1
0
def predict_cls(args, ext_results):
    # load dict
    model_name = "skep_ernie_1.0_large_ch"
    cls_label2id, cls_id2label = load_dict(args.cls_label_path)

    tokenizer = SkepTokenizer.from_pretrained(model_name)
    test_ds = MapDataset(ext_results)
    trans_func = partial(convert_example_to_feature_cls,
                         tokenizer=tokenizer,
                         label2id=cls_label2id,
                         max_seq_len=args.cls_max_seq_len,
                         is_test=True)
    test_ds = test_ds.map(trans_func, lazy=False)

    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id), Stack(dtype="int64")
    ): fn(samples)

    # set shuffle is False
    test_batch_sampler = paddle.io.BatchSampler(test_ds,
                                                batch_size=args.batch_size,
                                                shuffle=False)
    test_loader = paddle.io.DataLoader(test_ds,
                                       batch_sampler=test_batch_sampler,
                                       collate_fn=batchify_fn)
    print("test data loaded.")

    # load cls model
    cls_state_dict = paddle.load(args.cls_model_path)
    cls_model = SkepForSequenceClassification.from_pretrained(
        model_name, num_classes=len(cls_label2id))
    cls_model.load_dict(cls_state_dict)
    print("classification model loaded.")

    cls_model.eval()

    results = []
    for bid, batch_data in enumerate(test_loader):
        input_ids, token_type_ids, seq_lens = batch_data
        logits = cls_model(input_ids, token_type_ids=token_type_ids)

        predictions = logits.argmax(axis=1).numpy().tolist()
        results.extend(predictions)

    results = [cls_id2label[pred_id] for pred_id in results]
    return results
示例#2
0
if __name__ == "__main__":
    # yapf: disable
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_type", type=str, default="extraction", choices=["extraction", "classification", "pp_minilm"], help="The model type that you wanna export.")
    parser.add_argument("--base_model_name", type=str, default="skep_ernie_1.0_large_ch", help="The base model of experiment, skep or ppminilm")
    parser.add_argument("--model_path", type=str, default=None, help="The path of model that you want to load.")
    parser.add_argument("--save_path", type=str, default=None, help="The path of the exported static model.")
    args = parser.parse_args()
    # yapf: enbale

    # load model with saved state_dict
    if args.model_type == "extraction":
        model = SkepForTokenClassification.from_pretrained(args.base_model_name, num_classes=5)
    elif args.model_type == "classification":
        model = SkepForSequenceClassification.from_pretrained(args.base_model_name, num_classes=2)
    else:
        model = PPMiniLMForSequenceClassification.from_pretrained(args.base_model_name, num_classes=2)

    loaded_state_dict = paddle.load(args.model_path)
    model.load_dict(loaded_state_dict)
    print(f"Loaded parameters from {args.model_path}")

    model.eval()
    # convert to static graph with specific input description
    model = paddle.jit.to_static(
        model,
        input_spec=[
            paddle.static.InputSpec(
                shape=[None, None], dtype="int64"),  # input_ids
            paddle.static.InputSpec(
    return results


if __name__ == "__main__":
    paddle.set_device(args.device)

    # These data samples is in Chinese.
    # If you use the english model, you should change the test data in English.
    data = [
        '这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般',
        '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片',
        '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。',
    ]
    label_map = {0: 'negative', 1: 'positive'}

    model = SkepForSequenceClassification.from_pretrained(
        args.model_name, num_classes=len(label_map))
    tokenizer = SkepTokenizer.from_pretrained(args.model_name)

    if args.params_path and os.path.isfile(args.params_path):
        state_dict = paddle.load(args.params_path)
        model.set_dict(state_dict)
        print("Loaded parameters from %s" % args.params_path)

    results = predict(model,
                      data,
                      tokenizer,
                      label_map,
                      batch_size=args.batch_size)
    for idx, text in enumerate(data):
        print('Data: {} \t Label: {}'.format(text, results[idx]))
示例#4
0
    parser.add_argument("--ext_model_path", type=str, default=None, help="The path of extraction model path that you want to load.")
    parser.add_argument("--cls_model_path", type=str, default=None, help="The path of classification model path that you want to load.")
    parser.add_argument("--ext_label_path", type=str, default=None, help="The path of extraction label dict.")
    parser.add_argument("--cls_label_path", type=str, default=None, help="The path of classification label dict.")
    parser.add_argument("--ext_max_seq_len", type=int, default=512, help="The maximum total input sequence length after tokenization for extraction model.")
    parser.add_argument("--cls_max_seq_len", type=int, default=512, help="The maximum total input sequence length after tokenization for classification model.")
    args = parser.parse_args()
    # yapf: enbale

    # load dict
    model_name = "skep_ernie_1.0_large_ch"
    ext_label2id, ext_id2label = load_dict(args.ext_label_path)
    cls_label2id, cls_id2label = load_dict(args.cls_label_path)
    tokenizer = SkepTokenizer.from_pretrained(model_name)
    print("label dict loaded.")

    # load ext model
    ext_state_dict = paddle.load(args.ext_model_path)
    ext_model = SkepForTokenClassification.from_pretrained(model_name, num_classes=len(ext_label2id))
    ext_model.load_dict(ext_state_dict)
    print("extraction model loaded.")

    # load cls model
    cls_state_dict = paddle.load(args.cls_model_path)
    cls_model = SkepForSequenceClassification.from_pretrained(model_name, num_classes=len(cls_label2id))
    cls_model.load_dict(cls_state_dict)
    print("classification model loaded.")

    # do predict
    predict(args, ext_model, cls_model, tokenizer, ext_id2label, cls_id2label)
示例#5
0
    return paddle.io.DataLoader(dataset=dataset,
                                batch_sampler=batch_sampler,
                                collate_fn=batchify_fn,
                                return_list=True)


if __name__ == "__main__":
    set_seed(args.seed)
    paddle.set_device(args.device)
    rank = paddle.distributed.get_rank()
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    train_ds = load_dataset("seabsa16", "phns", splits=["train"])

    model = SkepForSequenceClassification.from_pretrained(
        'skep_ernie_1.0_large_ch', num_classes=len(train_ds.label_list))
    tokenizer = SkepTokenizer.from_pretrained('skep_ernie_1.0_large_ch')

    trans_func = partial(convert_example,
                         tokenizer=tokenizer,
                         max_seq_length=args.max_seq_length)
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # token_type_ids
        Stack(dtype="int64")  # labels
    ): [data for data in fn(samples)]

    train_data_loader = create_dataloader(train_ds,
                                          mode='train',
                                          batch_size=args.batch_size,
                                          batchify_fn=batchify_fn,