def do_predict(args): paddle.set_device(args.device) wordtag = Taskflow("text2knowledge", model="wordtag", batch_size=args.batch_size, max_seq_length=args.max_seq_len) txts = ["《孤女》是2010年九州出版社出版的小说,作者是余兼羽。", "热梅茶是一道以梅子为主要原料制作的茶饮"] res = wordtag(txts) print(res)
plt.figure(figsize=(10, 5)), cnt.values(), tick_label=list(cnt.keys())) # , orientation="horizontal" # for a, b in zip(x_list, y_list): # plt.text(a, b + 0.05, '%.0f' % b, ha='center', va='bottom', fontsize=10) plt.xticks(rotation=45) plt.savefig('./img.png') if __name__ == '__main__': # 模型路径下必须含有model和params文件 ocr = PaddleOCR( det_model_dir='./PaddleOCR/output/ch_db_mv3_inference/inference', use_angle_cls=True) lac = Taskflow("pos_tagging") enti_list = [] pdfFolder = './ResearchReport' for p in os.listdir(pdfFolder): if os.path.isdir(os.path.join(pdfFolder, p)): print('Processing folder:', p) imgPath = pdfFolder + '/' + p res_list = [] for i in os.listdir(imgPath): img_path = os.path.join(imgPath, i) result = ocr.ocr(img_path, cls=True) res_list.append(result) enti = LAC(lac, res_list) enti_list += enti
def __init__(self, model="csc-ernie-1.0"): self.text_correction = Taskflow("text_correction", model=model)
import paddlenlp from paddlenlp import Taskflow # from paddlenlp.transformers import SkepTokenizer,SkepForTokenClassification # # modle=SkepForTokenClassification.from_pretrained(pretrained_model_name_or_path="skep_ernie_1.0_large_ch") # # # tokenizer = paddlenlp.transformers.SkepTokenizer.from_pretrained( # "skep_ernie_1.0_large_ch") senta = Taskflow("sentiment_analysis",model="skep_ernie_1.0_large_ch") data=senta("每一次的失败,都要总结原因,才能赚钱") print(data)
if os.path.isfile(args.audio_file): audios = [args.audio_file] elif os.path.isdir(args.audio_file): audios = [x for x in os.listdir(args.audio_file)] audios = [os.path.join(args.audio_file, x) for x in audios] else: raise Exception('%s is neither valid path nor file!' % args.audio_file) audios = [x for x in audios if x.endswith('.wav')] if len(audios) == 0: raise Exception('No valid .wav file! Please check %s.' % args.audio_file) if args.uie_model is None: parser = Taskflow('information_extraction', schema=args.schema) else: parser = Taskflow('information_extraction', schema=args.schema, task_path=args.uie_model) with open(args.save_file, 'w') as fp: for audio_file in tqdm(audios): # automatic speech recognition text = mandarin_asr_api(args.api_key, args.secret_key, audio_file) # extract entities according to schema result = parser(text) fp.write(text + '\n') fp.write(json.dumps(result, ensure_ascii=False) + '\n\n') print(text) pprint.pprint(result)