testtesttest#!/usr/bin/python # -*- coding:utf-8 -*- from __future__ import unicode_literals # compatible with python3 unicode import deepnlp deepnlp.download('ner') # download the NER pretrained models from github if installed from pip from deepnlp import ner_tagger tagger = ner_tagger.load_model(name = 'zh') # Base LSTM Based Model tagger.load_dict("zh_o2o") # Change to other dict text = "北京 望京 最好吃 的 黑椒 牛排 在哪里" words = text.split(" ") # Use the prefix dict and merging function to combine separated words tagging = tagger._predict_ner_tags_dict(words, merge = True) print ("DEBUG: NER tagger zh_o2o dictionary") for (w,t) in tagging: pair = w + "/" + t print (pair) #北京/city #望京/area #最好吃/nt #的/nt #黑椒牛排/dish #在哪里/nt # Word Sense Disambuguition
#coding:utf-8 from __future__ import unicode_literals import deepnlp deepnlp.download( 'pos' ) # download the POS pretrained models from github if installed from pip from deepnlp import pos_tagger tagger = pos_tagger.load_model( lang='en') # Loading English model, lang code 'en' #Segmentation text = "I want to see a funny movie" words = text.split(" ") print(" ".join(words).encode('utf-8')) #POS Tagging tagging = tagger.predict(words) for (w, t) in tagging: str = w + "/" + t print(str.encode('utf-8')) #Results #I/nn #want/vb #to/to #see/vb #a/at #funny/jj #movie/nn
#coding:utf-8 from __future__ import unicode_literals # compatible with python3 unicode import sys,os import codecs import deepnlp deepnlp.download('segment') # download all the required pretrained models from github if installed from pip deepnlp.download('pos') deepnlp.download('ner') from deepnlp import pipeline p = pipeline.load_model('zh') # concatenate tuples into one string "w1/t1 w2/t2 ..." def _concat_tuples(tagging): TOKEN_BLANK = " " wl = [] # wordlist for (x, y) in tagging: wl.append(x + "/" + y) # unicode concat_str = TOKEN_BLANK.join(wl) return concat_str # input file BASE_DIR = os.path.dirname(os.path.abspath(__file__)) docs = [] file = codecs.open(os.path.join(BASE_DIR, 'docs_test.txt'), 'r', encoding='utf-8') for line in file: line = line.replace("\n", "").replace("\r", "") docs.append(line)
#coding=utf-8 from __future__ import unicode_literals import tensorflow as tf import deepnlp # Download module and domain-specific model deepnlp.download(module='segment', name='zh_entertainment') deepnlp.download(module='pos', name='en') deepnlp.download(module='ner', name='zh_o2o') # Download module deepnlp.download('segment') deepnlp.download('pos') deepnlp.download('ner') deepnlp.download('parse') # deepnlp.download() ## 测试 load model from deepnlp import segmenter try: tokenizer = segmenter.load_model(name='zh') tokenizer = segmenter.load_model(name='zh_o2o') tokenizer = segmenter.load_model(name='zh_entertainment') except Exception as e: print("DEBUG: ERROR Found...") print(e) ## pos from deepnlp import pos_tagger
import deepnlp deepnlp.download('ner') from deepnlp import segmenter from deepnlp import ner_tagger tokenizer = segmenter.load_model(name='zh') tagger = ner_tagger.load_model(name='zh') file_r = open('./data/person_resume.txt', 'r') file_w = open('./data/person_resume02.txt', 'w') for line in file_r: text = line.rstrip() words = tokenizer.seg(text) tagging = tagger.predict(words) for (w, t) in tagging: pair = w + ' ' + t file_w.write(pair) file_w.close() file_r.close()
.::::. .::::::::. ::::::::::: ..:::::::::::' '::::::::::::' .:::::::::: '::::::::::::::.. ..::::::::::::. ``:::::::::::::::: ::::``:::::::::' .:::. ::::' ':::::' .::::::::. .::::' :::: .:::::::'::::. .:::' ::::: .:::::::::' ':::::. .::' :::::.:::::::::' ':::::. .::' ::::::::::::::' ``::::. ...::: ::::::::::::' ``::. ```` ':. ':::::::::' ::::.. '.:::::' ':'````.. """ #coding=utf-8 import deepnlp # Download all the modules deepnlp.download() #coding=utf-8 from deepnlp import segmenter tokenizer = segmenter.load_model(name = 'zh_entertainment') text = "我刚刚在浙江卫视看了电视剧老九门,觉得陈伟霆很帅" segList = tokenizer.seg(text) text_seg = " ".join(segList)
#coding:utf-8 from __future__ import unicode_literals import deepnlp deepnlp.download(module='pos',name='en') # download the POS pretrained models from github if installed from pip from deepnlp import pos_tagger tagger = pos_tagger.load_model(name = 'en') # Loading English model, lang code 'en' #Segmentation text = "I want to see a funny movie" words = text.split(" ") print (" ".join(words)) #POS Tagging tagging = tagger.predict(words) for (w,t) in tagging: pair = w + "/" + t print (pair) #Results #I/nn #want/vb #to/to #see/vb #a/at #funny/jj #movie/nn
#!/usr/bin/python # -*- coding:utf-8 -*- import deepnlp from deepnlp import segmenter try: deepnlp.download("segment", "zh_finance") except Exception as e: print (e) deepnlp.register_model("segment", "zh_finance") deepnlp.download("segment", "zh_finance") try: seg_tagger = segmenter.load_model("zh_finance") except Exception as e: print (e) from deepnlp import pos_tagger try: deepnlp.download("pos", "zh_finance") except Exception as e: print (e) deepnlp.register_model("pos", "zh_finance") deepnlp.download("pos", "zh_finance") try: pos_tagger.load_model("zh_finance") except Exception as e: print (e)