def testSaveLinksClassifier(): linksPredictObj = MedLinksPredict() # layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' '''1''' trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_train_links301-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_test_links1-300.txt' '''2''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_train_links1-300,601-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_test_links301-600.txt' '''3''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_train_links1-600,901-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_test_links601-900.txt' '''4''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_train_links1-900,1201-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_test_links901-1200.txt' '''5''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_test_links1201-1500.txt' # storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifiecr_cnnlstmT' storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifiecr_cnnsT' '''cnns + lstm part''' linksPredictObj.trainHybirdLinksClassifier_file(gensimModelPath, trainLinksDataPath, testLinksDataPath, v_ratio=0.15, storeFilePath=storeFilePath) '''cnns part'''
def testLoadPreEmbedingMat(): linksPredictObj = MedLinksPredict() layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' testLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' linksDataPathList = [trainLinksDataPath, testLinksDataPath] totalSequenceList, totalTextList, interBoundary, labelLists = linksPredictObj.loadDetachedLinksReps( linksDataPathList, testWithLabel=True) nb_words, EMBEDDING_DIM, embedding_matrix = layerObj.prodPreWordEmbedingMat( gensimModelPath, totalSequenceList) MAX_SEQUENCE_LENGTH, pad_data = layerObj.prodPadData( totalTextList, nb_words) x_train, y_train = layerObj.prodTrainTestData(pad_data, interBoundary, labelLists[0]) x_test, y_test = layerObj.prodTrainTestData( pad_data, interBoundary - len(totalSequenceList), labelLists[1]) print('x_train:---------------------------') print(x_train) print('x_test:----------------------------') print(x_test)
def testEvaluateLinksClassify(): linksPredictObj = MedLinksPredict() gensimModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' layerModel = linksPredictObj.trainHybirdLinksClassifier_file(gensimModelPath, trainLinksDataPath, testLinksDataPath, testWithLabel=True, v_ratio=0.15) score = linksPredictObj.evalLayerLinksClassifier_file(layerModel, gensimModelPath, trainLinksDataPath, testLinksDataPath) print(score)
def testSVMLinksTrainTest(): linksPredictObj = MedLinksPredict() # svmObj = SupportVectorMachine() trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' estimator = linksPredictObj.trainSVMLinksClassifier_file(trainLinksDataPath, testLinksDataPath) accuracy, recall = linksPredictObj.evalEstimatorlinksClasses_file(estimator, trainLinksDataPath, testLinksDataPath) print('accuracy: ' + str(accuracy)) print('recall: ' + str(recall))
def writePredictResIntoFile(classes): originalDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_predict_links1501-2834.txt' predictResFilePath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_res_links1501-2834.txt' orgFile = open(originalDataPath, 'r') lines = orgFile.readlines() newLines = '' for i in range(len(lines)): newLines += (lines[i][:len(lines[i]) - 1] + str(classes[i][0]) + '\n') orgFile.close() resFile = open(predictResFilePath, 'w') resFile.write(newLines) resFile.close()
def testPredictNewLinks(): linksPredictObj = MedLinksPredict layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' predictLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_predict_links1501-2834.txt' storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifier_cnnlstm' layerModel = layerObj.loadStoredModel(storeFilePath, recompile=False) classes, proba = linksPredictObj.testLayerLinksClasses_file(layerModel, gensimModelPath, trainLinksDataPath, predictLinksDataPath, testWithLabel=False) return classes, proba
def writePredictResIntoFile(classes): originalDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_predict_links1501-2834.txt' predictResFilePath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_res_links1501-2834.txt' orgFile = open(originalDataPath, 'r') lines = orgFile.readlines() newLines = '' for i in range(len(lines)): newLines += (lines[i][:len(lines[i]) - 1] + str(classes[i][0]) + '\n') orgFile.close() resFile = open(predictResFilePath, 'w') resFile.write(newLines) resFile.close()
def testClassifyLinks(): linksPredict =MedLinksPredict() gensimModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' '''2''' trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_train_links1-300,601-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_test_links301-600.txt' layerModel = linksPredict.trainHybirdLinksClassifier_file(gensimModelPath, trainLinksDataPath, testLinksDataPath) classes, proba = linksPredict.testLayerLinksClasses_file(layerModel, gensimModelPath, trainLinksDataPath, testLinksDataPath) # for i in range(len(classes)): # print(str(classes[i]) + ': ' + str(proba[i])) return classes, proba
def testSVMLinksTrainTest(): linksPredictObj = MedLinksPredict() # svmObj = SupportVectorMachine() trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' estimator = linksPredictObj.trainSVMLinksClassifier_file( trainLinksDataPath, testLinksDataPath) accuracy, recall = linksPredictObj.evalEstimatorlinksClasses_file( estimator, trainLinksDataPath, testLinksDataPath) print('accuracy: ' + str(accuracy)) print('recall: ' + str(recall))
def testGetBasicRecForbResFromBZ(bzList): ''' in this function, bzList must be the dic--confBZRes's keys ''' rfCacheFilePath = ROOT_PATH.auto_config_root() + u'model_cache/find_cache/bzRecForbDics.json' recommend_start = time.clock() rfCacheFile = open(rfCacheFilePath, 'r') line = rfCacheFile.readline() bzRecForbDics = JSONDecoder().decode(line) rfCacheFile.close() recSCList = [] forbSCList = [] for bz in bzList: if bz in bzRecForbDics.keys(): recSCList.extend(bzRecForbDics[bz]['0']) forbSCList.extend(bzRecForbDics[bz]['1']) recSCList = list(set(recSCList)) # de-duplication, same below forbSCList = list(set(forbSCList)) recommend_end = time.clock() print('recommend foods run time: %f s' % (recommend_end - recommend_start)) print('推荐食材(基础):') for sc in recSCList: print(sc + ', '), print('\n----------------------------------------------------------') print('禁忌食材(基础):') for sc in forbSCList: print(sc + ', '), print('\n----------------------------------------------------------')
def testEvaluateLinksClassify(): linksPredictObj = MedLinksPredict() gensimModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' testLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' layerModel = linksPredictObj.trainHybirdLinksClassifier_file( gensimModelPath, trainLinksDataPath, testLinksDataPath, testWithLabel=True, v_ratio=0.15) score = linksPredictObj.evalLayerLinksClassifier_file( layerModel, gensimModelPath, trainLinksDataPath, testLinksDataPath) print(score)
def testLoadPreEmbedingMat(): linksPredictObj = MedLinksPredict() layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' linksDataPathList = [trainLinksDataPath, testLinksDataPath] totalSequenceList, totalTextList, interBoundary, labelLists = linksPredictObj.loadDetachedLinksReps(linksDataPathList, testWithLabel=True) nb_words, EMBEDDING_DIM, embedding_matrix = layerObj.prodPreWordEmbedingMat(gensimModelPath, totalSequenceList) MAX_SEQUENCE_LENGTH, pad_data = layerObj.prodPadData(totalTextList, nb_words) x_train, y_train = layerObj.prodTrainTestData(pad_data, interBoundary, labelLists[0]) x_test, y_test = layerObj.prodTrainTestData(pad_data, interBoundary - len(totalSequenceList), labelLists[1]) print('x_train:---------------------------') print(x_train) print('x_test:----------------------------') print(x_test)
def testClassifyLinks(): linksPredict = MedLinksPredict() gensimModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' '''2''' trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_train_links1-300,601-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_test_links301-600.txt' layerModel = linksPredict.trainHybirdLinksClassifier_file( gensimModelPath, trainLinksDataPath, testLinksDataPath) classes, proba = linksPredict.testLayerLinksClasses_file( layerModel, gensimModelPath, trainLinksDataPath, testLinksDataPath) # for i in range(len(classes)): # print(str(classes[i]) + ': ' + str(proba[i])) return classes, proba
def testLoadLinksReps(): linksPredictObj = MedLinksPredict() trainLinksFilePath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1000.txt' textWordsList, maxTextLength, labelList = linksPredictObj.loadSingleLinksReps(trainLinksFilePath) for textWords in textWordsList: print(' '.join(textWords)) print(len(textWordsList)) print(maxTextLength) print(labelList)
def testLoadLinksClassifier(): linksPredictObj = MedLinksPredict() layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' '''1''' trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_train_links301-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_test_links1-300.txt' '''2''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_train_links1-300,601-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_test_links301-600.txt' '''3''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_train_links1-600,901-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_test_links601-900.txt' '''4''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_train_links1-900,1201-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_test_links901-1200.txt' '''5''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_test_links1201-1500.txt' # storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifiecr_cnnlstmT' storeFilePath = ROOT_PATH.auto_config_root( ) + u'model/keras/links(sc2bz)_classifiecr_cnnsT' layerModel = layerObj.loadStoredModel(storeFilePath, recompile=True) # print(layerModel.to_json()) #=========================================================================== # classes, proba = linksPredictObj.testLayerLinksClasses_file(layerModel, gensimModelPath, testLinksDataPath) # # return classes, proba #=========================================================================== score = linksPredictObj.evalLayerLinksClassifier_file( layerModel, gensimModelPath, trainLinksDataPath, testLinksDataPath) print(score)
def testPredictNewLinks(): linksPredictObj = MedLinksPredict layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' predictLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_predict_links1501-2834.txt' storeFilePath = ROOT_PATH.auto_config_root( ) + u'model/keras/links(sc2bz)_classifier_cnnlstm' layerModel = layerObj.loadStoredModel(storeFilePath, recompile=False) classes, proba = linksPredictObj.testLayerLinksClasses_file( layerModel, gensimModelPath, trainLinksDataPath, predictLinksDataPath, testWithLabel=False) return classes, proba
def loadSCforBZDicIntoFile(): labeledLinksFilePath = ROOT_PATH.auto_config_root() + u'model_cache/shicai2bingzheng_res_links(all).txt' trans_start = time.clock() labeledLinksFile = open(labeledLinksFilePath, 'r') lines = labeledLinksFile.readlines() ''' linkTupleList looks like this: [(start1, end1, label1),(start2, end2, label2),...] ''' linkTupleList = [] for line in lines: link = line[:line.find('{')] start = link.split('-->')[0] end = link.split('-->')[1] label = line[line.find('}') + 1 :line.find('}') + 2] linkTupleList.append((start, end, label)) bzRecForbDics = {} for tuple in linkTupleList: if tuple[1] not in bzRecForbDics.keys(): bzRecForbDics[tuple[1]] = {'0':[], '1':[]} # print(tuple[2]) if tuple[2] == '0': bzRecForbDics[tuple[1]]['0'].append(tuple[0]) else: bzRecForbDics[tuple[1]]['1'].append(tuple[0]) # trans dic into json code and store it on file bzRFDicsJson = JSONEncoder().encode(bzRecForbDics) rfCacheFilePath = ROOT_PATH.auto_config_root() + u'model_cache/find_cache/bzRecForbDics.json' rfCacheFile = open(rfCacheFilePath, 'w') rfCacheFile.write(bzRFDicsJson) rfCacheFile.close() trans_end = time.clock() print('trans data into json run time: %f s' % (trans_end - trans_start))
def testLoadLinksReps(): linksPredictObj = MedLinksPredict() trainLinksFilePath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1000.txt' textWordsList, maxTextLength, labelList = linksPredictObj.loadSingleLinksReps( trainLinksFilePath) for textWords in textWordsList: print(' '.join(textWords)) print(len(textWordsList)) print(maxTextLength) print(labelList)
def compMaxTextLength(): linksTextFilePath = ROOT_PATH.auto_config_root() + u'model_cache/shicai2bingzheng_links.txt' file = open(linksTextFilePath) max_len = 0 for line in file.readlines(): textCnt = line[line.find('{') + 1:line.find('}')] wordList = textCnt.split(',') # print(len(wordList)) if len(wordList) > max_len: max_len = len(wordList) file.close() print('max_length:'), print(max_len)
def testLoadLinksClassifier(): linksPredictObj = MedLinksPredict() layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' '''1''' trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_train_links301-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_test_links1-300.txt' '''2''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_train_links1-300,601-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_test_links301-600.txt' '''3''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_train_links1-600,901-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_test_links601-900.txt' '''4''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_train_links1-900,1201-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_test_links901-1200.txt' '''5''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_test_links1201-1500.txt' # storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifiecr_cnnlstmT' storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifiecr_cnnsT' layerModel = layerObj.loadStoredModel(storeFilePath, recompile=True) # print(layerModel.to_json()) #=========================================================================== # classes, proba = linksPredictObj.testLayerLinksClasses_file(layerModel, gensimModelPath, testLinksDataPath) # # return classes, proba #=========================================================================== score = linksPredictObj.evalLayerLinksClassifier_file(layerModel, gensimModelPath, trainLinksDataPath, testLinksDataPath) print(score)
def testSaveLinksClassifier(): linksPredictObj = MedLinksPredict() # layerObj = NeuralLayerClassifier() gensimModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/shicai2bingzheng_test_links1201-1500.txt' '''1''' trainLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_train_links301-1500.txt' testLinksDataPath = ROOT_PATH.auto_config_root( ) + u'model_cache/relation_learning/cross_test/1/shicai2bingzheng_test_links1-300.txt' '''2''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_train_links1-300,601-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/2/shicai2bingzheng_test_links301-600.txt' '''3''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_train_links1-600,901-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/3/shicai2bingzheng_test_links601-900.txt' '''4''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_train_links1-900,1201-1500.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/4/shicai2bingzheng_test_links901-1200.txt' '''5''' # trainLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_train_links1-1200.txt' # testLinksDataPath = ROOT_PATH.auto_config_root() + u'model_cache/relation_learning/cross_test/5/shicai2bingzheng_test_links1201-1500.txt' # storeFilePath = ROOT_PATH.auto_config_root() + u'model/keras/links(sc2bz)_classifiecr_cnnlstmT' storeFilePath = ROOT_PATH.auto_config_root( ) + u'model/keras/links(sc2bz)_classifiecr_cnnsT' '''cnns + lstm part''' linksPredictObj.trainHybirdLinksClassifier_file( gensimModelPath, trainLinksDataPath, testLinksDataPath, v_ratio=0.15, storeFilePath=storeFilePath) '''cnns part'''
def compMaxTextLength(): linksTextFilePath = ROOT_PATH.auto_config_root( ) + u'model_cache/shicai2bingzheng_links.txt' file = open(linksTextFilePath) max_len = 0 for line in file.readlines(): textCnt = line[line.find('{') + 1:line.find('}')] wordList = textCnt.split(',') # print(len(wordList)) if len(wordList) > max_len: max_len = len(wordList) file.close() print('max_length:'), print(max_len)
# -*- coding: UTF-8 -*- ''' Created on 2016年8月16日 @author: hylovedd ''' import time from datastore.graph.neoDataAdvanceOpt import NeoDataAdvanceOpt from datastore.graph.neoDataGraphOpt import NeoDataGraphOpt from knowledge_graph import medGraphSupOpt from tools.cache import ROOT_PATH _medW2VModelPath = ROOT_PATH.auto_config_root( ) + u'model/word2vec/zongheword2vecModel.vector' # _scDictPath = ROOT_PATH.seg_dictwin64 + u'jieba_shicai.txt' # _bzDictPath = ROOT_PATH.seg_dictwin64 + u'jieba_yixuebaike.txt' _medBZEntities = [] _medSCEntities = [] class MedGraphMining(object): def __init__(self, medW2VModelPath=_medW2VModelPath): self.medW2VModelPath = medW2VModelPath self.wordVecOptObj, self.model = medGraphSupOpt.loadW2VModelFromDisk( medW2VModelPath) def initEntityDict(self): neoDataGraphObj = NeoDataGraphOpt()
''' Created on 2016年8月16日 @author: hylovedd ''' import time from datastore.graph.neoDataAdvanceOpt import NeoDataAdvanceOpt from datastore.graph.neoDataGraphOpt import NeoDataGraphOpt from knowledge_graph import medGraphSupOpt from tools.cache import ROOT_PATH _medW2VModelPath = ROOT_PATH.auto_config_root() + u'model/word2vec/zongheword2vecModel.vector' # _scDictPath = ROOT_PATH.seg_dictwin64 + u'jieba_shicai.txt' # _bzDictPath = ROOT_PATH.seg_dictwin64 + u'jieba_yixuebaike.txt' _medBZEntities = [] _medSCEntities = [] class MedGraphMining(object): def __init__(self, medW2VModelPath=_medW2VModelPath): self.medW2VModelPath = medW2VModelPath self.wordVecOptObj, self.model = medGraphSupOpt.loadW2VModelFromDisk(medW2VModelPath) def initEntityDict(self): neoDataGraphObj = NeoDataGraphOpt() neoDataAdvanceObj = NeoDataAdvanceOpt()