Exemplo n.º 1
0
def do_predict(args):
    paddle.set_device(args.device)
    wordtag = Taskflow("text2knowledge",
                       model="wordtag",
                       batch_size=args.batch_size,
                       max_seq_length=args.max_seq_len)
    txts = ["《孤女》是2010年九州出版社出版的小说,作者是余兼羽。", "热梅茶是一道以梅子为主要原料制作的茶饮"]
    res = wordtag(txts)
    print(res)
Exemplo n.º 2
0
    plt.figure(figsize=(10, 5))
    plt.bar(range(len(cnt)), cnt.values(),
            tick_label=list(cnt.keys()))  # , orientation="horizontal"
    # for a, b in zip(x_list, y_list):
    #     plt.text(a, b + 0.05, '%.0f' % b, ha='center', va='bottom', fontsize=10)
    plt.xticks(rotation=45)
    plt.savefig('./img.png')
    plt.show()


if __name__ == '__main__':
    # 模型路径下必须含有model和params文件
    ocr = PaddleOCR(
        det_model_dir='./PaddleOCR/output/ch_db_mv3_inference/inference',
        use_angle_cls=True)
    lac = Taskflow("pos_tagging")

    enti_list = []
    pdfFolder = './ResearchReport'
    for p in os.listdir(pdfFolder):
        if os.path.isdir(os.path.join(pdfFolder, p)):
            print('Processing folder:', p)
            imgPath = pdfFolder + '/' + p
            res_list = []
            for i in os.listdir(imgPath):
                img_path = os.path.join(imgPath, i)
                result = ocr.ocr(img_path, cls=True)
                res_list.append(result)

            enti = LAC(lac, res_list)
            enti_list += enti
Exemplo n.º 3
0
 def __init__(self, model="csc-ernie-1.0"):
     self.text_correction = Taskflow("text_correction", model=model)
Exemplo n.º 4
0

import paddlenlp
from paddlenlp import Taskflow
# from paddlenlp.transformers import SkepTokenizer,SkepForTokenClassification
#
# modle=SkepForTokenClassification.from_pretrained(pretrained_model_name_or_path="skep_ernie_1.0_large_ch")
#
#
# tokenizer = paddlenlp.transformers.SkepTokenizer.from_pretrained(
#     "skep_ernie_1.0_large_ch")
senta = Taskflow("sentiment_analysis",model="skep_ernie_1.0_large_ch")
data=senta("每一次的失败,都要总结原因,才能赚钱")
print(data)
Exemplo n.º 5
0
    if os.path.isfile(args.audio_file):
        audios = [args.audio_file]
    elif os.path.isdir(args.audio_file):
        audios = [x for x in os.listdir(args.audio_file)]
        audios = [os.path.join(args.audio_file, x) for x in audios]
    else:
        raise Exception('%s is neither valid path nor file!' % args.audio_file)

    audios = [x for x in audios if x.endswith('.wav')]
    if len(audios) == 0:
        raise Exception('No valid .wav file! Please check %s.' %
                        args.audio_file)

    if args.uie_model is None:
        parser = Taskflow('information_extraction', schema=args.schema)
    else:
        parser = Taskflow('information_extraction',
                          schema=args.schema,
                          task_path=args.uie_model)

    with open(args.save_file, 'w') as fp:
        for audio_file in tqdm(audios):
            # automatic speech recognition
            text = mandarin_asr_api(args.api_key, args.secret_key, audio_file)
            # extract entities according to schema
            result = parser(text)
            fp.write(text + '\n')
            fp.write(json.dumps(result, ensure_ascii=False) + '\n\n')
            print(text)
            pprint.pprint(result)