Пример #1
0
 def __init__(self, name):
     print("Starting new Tensorflow session...")
     self.session = tf.Session()
     print("Loading pipeline modules...")
     self.tokenizer = segmenter.load_model(name)
     self.tagger_pos = pos_tagger.load_model(name)  # class tagger_pos
     self.tagger_ner = ner_tagger.load_model(name)  # class tagger_ner
Пример #2
0
# -*- coding:utf-8 -*-

import jieba
from deepnlp import ner_tagger

if __name__ == '__main__':

    tagger = ner_tagger.load_model(lang='en')

    test = 'UK London and United States are both big city.'

    wordList = ' '.join(jieba.cut(test)).split()
    print(wordList)

    taggedZip = tagger.predict(wordList)
    itemReStr = ''
    for (w, t) in taggedZip:
        itemReStr = itemReStr + w + '/' + t + ' '
    print(itemReStr)

Пример #3
0
testtesttest#!/usr/bin/python
# -*- coding:utf-8 -*-

from __future__ import unicode_literals # compatible with python3 unicode

import deepnlp
deepnlp.download('ner')  # download the NER pretrained models from github if installed from pip

from deepnlp import ner_tagger
tagger = ner_tagger.load_model(name = 'zh')    # Base LSTM Based Model
tagger.load_dict("zh_o2o")                     # Change to other dict

text = "北京 望京 最好吃 的 黑椒 牛排 在哪里"
words = text.split(" ")

# Use the prefix dict and merging function to combine separated words
tagging = tagger._predict_ner_tags_dict(words, merge = True)
print ("DEBUG: NER tagger zh_o2o dictionary")
for (w,t) in tagging:
    pair = w + "/" + t
    print (pair)

#北京/city
#望京/area
#最好吃/nt
#的/nt
#黑椒牛排/dish
#在哪里/nt


# Word Sense Disambuguition
Пример #4
0
#!/usr/bin/python
# -*- coding:utf-8 -*-

from __future__ import unicode_literals  # compatible with python3 unicode

import deepnlp
deepnlp.download(
    'ner'
)  # download the NER pretrained models from github if installed from pip

from deepnlp import ner_tagger

# Example 1. Change to other dict
tagger = ner_tagger.load_model(
    name='zh_o2o')  # Base LSTM Based Model + zh_o2o dictionary
text = "北京 望京 最好吃 的 小龙虾 在 哪里"
words = text.split(" ")
tagging = tagger.predict(words, tagset=['city', 'area', 'dish'])
for (w, t) in tagging:
    pair = w + "/" + t
    print(pair)

#Result
#北京/city
#望京/area
#最好吃/nt
#的/nt
#小/nt
#龙虾/dish
#在/nt
#哪里/nt
#coding:utf-8
from __future__ import unicode_literals

import sys,os
import codecs

from deepnlp import segmenter
from deepnlp import pos_tagger # module: pos_tagger
from deepnlp import ner_tagger # module: ner_tagger

# Create new tagger instance
tagger_pos = pos_tagger.load_model(lang = 'zh')
tagger_ner = ner_tagger.load_model(lang = 'zh')

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

# concatenate tuples into one string "w1/t1 w2/t2 ..."
def _concat_tuples(tagging):
  TOKEN_BLANK = " "
  wl = [] # wordlist
  for (x, y) in tagging:
    wl.append(x + "/" + y)
  concat_str = TOKEN_BLANK.join(wl)
  return concat_str

# read input file
docs = []
file = codecs.open(os.path.join(BASE_DIR, 'docs_test.txt'), 'r', encoding='utf-8')
for line in file:
    line = line.replace("\n", "").replace("\r", "")
    docs.append(line)
Пример #6
0
    print(e)

## pos
from deepnlp import pos_tagger
try:
    tagger = pos_tagger.load_model(
        name='en')  # Loading English model, lang code 'en'
    tagger = pos_tagger.load_model(
        name='zh')  # Loading English model, lang code 'en'
except Exception as e:
    print("DEBUG: ERROR Found...")
    print(e)

## ner
from deepnlp import ner_tagger
try:
    my_tagger = ner_tagger.load_model(name='zh')
    my_tagger = ner_tagger.load_model(name='zh_o2o')
    my_tagger = ner_tagger.load_model(name='zh_entertainment')
except Exception as e:
    print("DEBUG: ERROR Found...")
    print(e)

## parse
from deepnlp import nn_parser
try:
    parser = nn_parser.load_model(name='zh')
except Exception as e:
    print("DEBUG: ERROR Found...")
    print(e)
Пример #7
0
import deepnlp
deepnlp.download('ner')

from deepnlp import segmenter
from deepnlp import ner_tagger

tokenizer = segmenter.load_model(name='zh')
tagger = ner_tagger.load_model(name='zh')

file_r = open('./data/person_resume.txt', 'r')
file_w = open('./data/person_resume02.txt', 'w')

for line in file_r:
    text = line.rstrip()
    words = tokenizer.seg(text)
    tagging = tagger.predict(words)
    for (w, t) in tagging:
        pair = w + ' ' + t
        file_w.write(pair)

file_w.close()
file_r.close()
Пример #8
0
 def __init__(self, lang):
     print("Starting new Tensorflow session...")
     self.session = tf.Session()
     print("Loading pipeline modules...")
     self.tagger_pos = pos_tagger.load_model(lang)  # class tagger_pos
     self.tagger_ner = ner_tagger.load_model(lang)  # class tagger_ner
Пример #9
0
deepnlp.download("pos", "zh_finance")
try:
    pos_tagger.load_model("zh_finance")
except Exception as e:
    print (e)

from deepnlp import ner_tagger
try:
    deepnlp.download("ner", "zh_finance")
except Exception as e:
    print (e)

deepnlp.register_model("ner", "zh_finance")
deepnlp.download("ner", "zh_finance")
try:
    ner_tagger.load_model("zh_finance")
except Exception as e:
    print (e)

from deepnlp import nn_parser
try:
    deepnlp.download("parse", "zh_finance")
except Exception as e:
    print (e)

deepnlp.register_model("parse", "zh_finance")
deepnlp.download("parse", "zh_finance")
try:
    nn_parser.load_model("zh_finance")
except Exception as e:
    print (e)
# -*- coding:utf-8 -*-

import deepnlp
import jieba

deepnlp.download('ner')
# deepnlp.download()

from deepnlp import ner_tagger
tagger = ner_tagger.load_model(lang='zh')  # Loading Chinese NER model
exit(0)
text = "习近平的妻子是彭丽媛"
words = jieba.cut(text)
print(" ".join(words).encode('utf-8'))

print('---------------------------------')

# tagging = tagger.predict(words)
# for (w,t) in tagging:
#     str = w + "/" + t
#     print (str.encode('utf-8'))