示例#1
0
 def test_ner(self):
     text = '厦门明天会不会下雨'
     ner = jiagu.ner(text)  # 命名实体识别
     print('NER result:',
           [(c, p) for c, p in zip(text, ner)])  # Character-level labeling
     self.assertEqual(len(ner), len(text))
     self.assertEqual(ner,
                      ['B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O'])
def nercontent(chapter_id):
    wordsegss = getContentSeg(chapter_id)

    if NerWordSeg.query.filter_by(word_id=wordsegss[0][0].id).all() == []:
        for wordsegs in wordsegss:
            ners = jiagu.ner([wordseg.wordseg for wordseg in wordsegs])  # 词性标注
            for i, ner in enumerate(ners):
                wordseg = NerWordSeg(nertag=ner_dict[ner],
                                     word_id=wordsegs[i].id)
                db.session.add(wordseg)

    ners = [[
        NerWordSeg.query.filter_by(word_id=wordseg.id).first().nertag
        for wordseg in wordsegs
    ] for wordsegs in wordsegss]

    return jsonify({'ners': ners}), 200
示例#3
0
def read_txt(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        all_data = [line.strip('\n') for line in f.readlines()]
    result = []
    for data in all_data:
        # print(jiagu.seg(''.join(data)))
        one = []
        for ner in jiagu.ner(data):
            if ner is not 'O':
                one.append(ner)
        if one:
            result.append(one)

    text = '''
4. 香农的信息定义
假定事物状态可以用一个以经典集合论为基础的概率模型来描述,则信息就是用来消除不确定性的东西,或信息是事物运动状态或存在方式的不确定性描述。
但在实际中要寻找一个合适的概率模型往往是非常困难的,有时是否存在这样一种模型还值得探讨。此外,信息有很强的主观性和实用性,但该定义没有考虑信息接收者的主观特性和主观意义,不顾信息的具体含义、具体用途、重要程度和可能引起的后果等因素,这就与实际情况不完全一致。
'''
    keywords = jiagu.knowledge(text)  # 关键词
    print(keywords)
示例#4
0
import jiagu

# jiagu.init() # 可手动初始化,也可以动态初始化

text = '苏州的天气不错'

words = jiagu.seg(text)  # 分词
print(words)

words = jiagu.cut(text)  # 分词
print(words)

pos = jiagu.pos(words)  # 词性标注
print(pos)

ner = jiagu.ner(words)  # 命名实体识别
print(ner)

# 字典模式分词
text = '思知机器人挺好用的'
words = jiagu.seg(text)
print(words)

# jiagu.load_userdict('dict/user.dict') # 加载自定义字典,支持字典路径、字典列表形式。
jiagu.load_userdict(['思知机器人'])

words = jiagu.seg(text)
print(words)

text = '''
该研究主持者之一、波士顿大学地球与环境科学系博士陈池(音)表示,“尽管中国和印度国土面积仅占全球陆地的9%,但两国为这一绿化过程贡献超过三分之一。考虑到人口过多的国家一般存在对土地过度利用的问题,这个发现令人吃惊。”
示例#5
0
文件: test.py 项目: glenlivet/jiagu
import jiagu

# jiagu.init() # 可手动初始化,也可以动态初始化

text = '厦门明天会不会下雨'

words = jiagu.seg(text)  # 分词
print(words)

words = jiagu.cws(text, model="mmseg")  # mmseg分词
print(words)

pos = jiagu.pos(words)  # 词性标注
print(pos)

ner = jiagu.ner(text)  # 命名实体识别
print(ner)
示例#6
0
import jiagu

jiagu.load_userdict('dict/user.dict')

# load file
contract_file = open("contract.txt", "r")
contract_contents = contract_file.read()
segs = jiagu.seg(contract_contents)
ner = jiagu.ner(segs)
print(segs)
print(ner)
content_arr = list(segs)
for i in range(len(content_arr)):
    if (content_arr[i] != '\n'):
        print('index: ' + str(i) + ':' + content_arr[i] + ' ' + ner[i])
    else:
        print()