def train(): fileName = "trainData.model" test = ReadFile.QAData("training.data") test.readFile() ProD.wordSeg(test) ProD.delHighFre_useless(test) ProD.delHighFre_psg(test) trainModel(test, fileName)
def train(): fileName = "trainData.model" test = ReadFile.QAData("training.data") test.readFile() ProD.wordSeg(test) ProD.delHighFre_useless(test) ProD.delHighFre_psg(test) trainModel(test, fileName) if __name__ == "__main__": # ''' fileName = "trainData.model" test = ReadFile.QAData("training.data") test.readFile() ProD.wordSeg(test) ProD.delHighFre_useless(test) ProD.delHighFre_psg(test) test.calFre() trainModel(test, fileName) # ''' # ''' # print(model.most_similar("性格")) # writeScore1(test, "score.data", "trainData.model") # writeScore2(test,"score.data") # '''
corrAns = aIndex break highscore = float(scoreDic[qIndex][corrAns]) flag = 1 for i in range(1, len(scoreDic[qIndex])): if highscore < float(scoreDic[qIndex][i]): flag = 0 showNum += 1 print("\n\n=========================================") print(qAData.questionList[qIndex]) print(qAData.qAnswersDic[qIndex][corrAns], "\t", scoreDic[qIndex][corrAns]) print(qAData.qAnswersDic[qIndex][i], "\t", scoreDic[qIndex][i]) break ''' if flag==0: print("\n\n=========================================") print(qAData.questionList[qIndex]) for aIndex in range(len(scoreDic[qIndex])): print(qAData.qAnswersDic[qIndex][aIndex], "\t", scoreDic[qIndex][aIndex]) ''' if showNum >= Num: break if __name__ == "__main__": test = ReadFile.QAData("develop.data") test.readFile() scoreDic = readScores(test, "score.data") showErr(test, scoreDic, 100)
python QA_main.py fileName scoreFile 參數說明: fileName:問答句測試集文件名 scoreFile:得分輸出文件名 ''' import ReadFile import ProcessData as ProD import WordVec import QueTypeWay import sys if __name__ == "__main__": assert sys.argv.__len__() == 3 fileName = sys.argv[1] scoreFile = sys.argv[2] WordVec.train() test = ReadFile.QAData(fileName) test.readFile() ProD.wordSeg(test) ProD.delHighFre_useless(test) ProD.delHighFre_psg(test) QueTypeWay.digitToSeg(test) test.calFre() WordVec.writeScore1(test, scoreFile, "trainData.model") #WordVec.writeScore2(test, scoreFile) #WordVec.writeScore3(test, scoreFile, "trainData.model") print("OK, DONE.")
def calQueSegFre(qAData, fileName): freDic = {} sum = 0 for qIndex in range(qAData.questionList.__len__()): for tmp in qAData.questionList[qIndex]: sum += 1 if tmp not in freDic.keys(): freDic[tmp] = 1 else: freDic[tmp] += 1 delFile(fileName) for key, item in sorted(freDic.items(), key=lambda Item: Item[1], reverse=True): if (float(item) / freDic.__len__()) > (freThres / 10): writeFile(fileName, [key, item, float(item) / sum]) if __name__ == "__main__": dataFile = "training.data" test = ReadFile.QAData(dataFile) test.readFile() calWordFre(test, "calWordFre.data") ProD.wordSeg(test) calSegFre(test, "calSegFre.data") ProD.delHighFre_useless(test) calQueSegFre(test, "queSegFre.data") statQueAns(test, "statQueAns.data") print(test.questionList.__len__())