def task(param):
    print('加载模型中...')
    LtpModel=ltpModel()
    W2vModel=w2vModel(LtpModel)
    TripleModel=tripleModel(LtpModel)
    QAModel=qaModel('qa_data\\weibokb'+str(param['id'])+'.json',LtpModel,TripleModel,W2vModel)   
    
    qaResult=[]
    noqaResult=[]
    qlines=param['qlines']
    alines=param['alines']
    sentence_number=0
    knowledge_number=0
    for (postLine,responseLine) in zip(qlines,alines):
        k=remove_emoji(postLine.replace(" ","").replace("\n",""))
        while(len(k)>1):
            if u'\u4e00' <= k[-1] <= u'\u9fff':
                break
            else:
                k=k[:-1]
        while(len(k)>1):
            if u'\u4e00' <= k[0] <= u'\u9fff':
                break
            else:
                k=k[1:]
        if len(k)<minlen:
            continue
        v=remove_emoji(responseLine.replace(" ","").replace("\n",""))
        while(len(v)>1):
            if u'\u4e00' <= v[-1] <= u'\u9fff':
                break
            else:
                v=v[:-1]
        while(len(v)>1):
            if u'\u4e00' <= v[0] <= u'\u9fff':
                break
            else:
                v=v[1:]		
        if len(v)<minlen or abs(len(k)-len(v)) >= dos or k.find('@')!=-1 or v.find('@')!=-1 or k.find('http')!=-1 or v.find('http')!=-1:
            continue
        question=k
        answer=v	
        try:
            score,reason=QAModel.getMatchScore(question,answer)
            if score>0.7:
                knowledge_number += 1
                qaResult.append(question+'\t'+answer+'\n') 
            else:
                noqaResult.append(question+'\t'+answer+'\n') 
        except:
            traceback.print_exc()
        sentence_number += 1
        if sentence_number % 100 == 0:
            try:
                print('process:'+str(param['id'])+' done'+str(sentence_number))
            except:
                traceback.print_exc()
    QAModel.saveKB()
    result={'qaResult':qaResult,'noqaResult':noqaResult,'sentence_number':sentence_number,'knowledge_number':knowledge_number}
    return result
示例#2
0
def task(param):
    print('加载模型中...')
    NlpModel = nlpModel()
    W2vModel = w2vModel()
    TripleModel = tripleModel(NlpModel)
    QAModel = qaModel('qa_data\\rkb' + str(param['id']) + '.json', TripleModel,
                      W2vModel)

    qaResult = []
    noqaResult = []
    lines = param['lines']
    sentence_number = 0
    knowledge_number = 0
    for line in lines:
        try:
            qaList = line.strip().split('\t')
            if len(qaList) != 2:
                continue
            question = qaList[0]
            answer = qaList[1]
            score, reason = QAModel.getMatchScore(question, answer)
            if score > 0.7:
                knowledge_number += 1
                qaResult.append(question + '\t' + answer + '\n')
            else:
                noqaResult.append(question + '\t' + answer + '\n')
        except:
            print('process:' + str(param['id']) + ' error ' + line)
            traceback.print_exc()
        sentence_number += 1
        if sentence_number % 100 == 0:
            print(
                'process:%d done%d qa%d noqa%d' %
                (param['id'], sentence_number, len(qaResult), len(noqaResult)))
    print('process:%d finish%d qa%d noqa%d' %
          (param['id'], sentence_number, len(qaResult), len(noqaResult)))
    QAModel.saveKB()
    result = {
        'qaResult': qaResult,
        'noqaResult': noqaResult,
        'sentence_number': sentence_number,
        'knowledge_number': knowledge_number
    }
    return result
示例#3
0
# -*- coding: utf-8 -*-
import sys
import traceback
from nlpModel import nlpModel
from qaModel import qaModel
from w2vModel import w2vModel
from tripleModel import tripleModel

modelname = 'beam1.gm'

print('加载模型中...')
NlpModel = nlpModel()
W2vModel = w2vModel()
TripleModel = tripleModel(NlpModel)
QAModel = qaModel('qa_data/kb.json', TripleModel, W2vModel)

print('开始读取文件')

scoreNums = [0] * 11
scoreSum = 0

with open('Twitter.100w.test.key', 'r') as keyFile, open('Twitter.100w.test.beam1.gm.output', 'r') as valueFile,\
open('Twitter.100w.rule.'+modelname+'.score','w') as out_file:

    id = 1

    for key, value in zip(keyFile, valueFile):

        key = key.strip()
        value = value.strip()
        try:
示例#4
0
 def __init__(self, model_dir, tokenize):
     self.model = load_pretrained_rnn(model_dir)
     self.oie = Trained_oie(self.model, tokenize=tokenize)
     self.w2v = w2vModel()
示例#5
0
import traceback
from ltpModel import ltpModel
from qaModel import qaModel
from w2vModel import w2vModel
from tripleModel import tripleModel
import myLog

#win10 1709版本控制台存在bug,需要引入这个包防止print意外报错
import win_unicode_console
win_unicode_console.enable()

logger = myLog.getLogging('baiduqalog.txt')  #log文件

logger.info('加载模型中...')
LtpModel = ltpModel()
W2vModel = w2vModel(LtpModel)
TripleModel = tripleModel(LtpModel)
QAModel = qaModel('qa_data\\baiduqakb.json', LtpModel, TripleModel, W2vModel)

logger.info('开始读取文件')
#读取文件
in_file_name = "test_file\\baiduqa.txt"
out_file_name = "test_file\\baiduqa_output.txt"
if len(sys.argv) > 1:
    in_file_name = sys.argv[1]

if len(sys.argv) > 2:
    out_file_name = sys.argv[2]

#默认一行记录一问一答,用tab分隔开
with open(in_file_name, 'r',