def PPR(): todayDate = graphUtils.getTodayDateFolder() lastRecommendationnDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_RECOMM_DONE) #lastRecommendationnDate = None if todayDate == lastRecommendationnDate: graphUtils.logger.info( "Simple Graph recommendation PPR done for today ") return graphUtils.logger.info("Simple graph recommendation PPR last done for =" + str(lastRecommendationnDate)) #Get the current version of stored graphs G = None graph_path = os.path.join(graphConstants.ROOT_FOLDER, graphConstants.GRAPH_DIR, graphConstants.GRAPH_DIR, graphConstants.TYPE_MAIN) graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) G = nx.read_gexf(graph_file) list_nodes = {x: i for i, x in enumerate(G.nodes())} R = get_init_R(G, list_nodes) #Normalize edge transition weights M = normalize_edge_Weights(list_nodes, G) S, list_seednode_names = graphSeedNodes.findSeedNodes(G, list_nodes) for idx, node in enumerate(list_seednode_names): graphUtils.logger.info( str(idx) + " seed node for simple graph today = " + node) newR = personalizedPageRank(R, M, S) printGraphRecommendedDocs(G, list_nodes, newR) writeNewR(G, list_nodes, newR, graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_RECOMM_DONE, todayDate) pass
def Relevance(): todayDate = graphUtils.getTodayDateFolder() lastRelevanceDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_RELEVANCE_DIR) lastSuggRelevanceDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_SUGG_RELEVANCE_DIR) if lastRelevanceDate: graphUtils.logger.info("Graph Relevance done last for =" + lastRelevanceDate) else: graphUtils.logger.info("Graph Relevance done last for None") if lastSuggRelevanceDate: graphUtils.logger.info("GraphSugg Relevance done last for =" + lastSuggRelevanceDate) else: graphUtils.logger.info("GraphSugg Relevance done last for None") if todayDate == lastRelevanceDate and todayDate == lastSuggRelevanceDate: graphUtils.logger.info( "Graph Relevance signal already done for today :" + todayDate) return True graph_path = os.path.join(graphConstants.ROOT_FOLDER, graphConstants.GRAPH_DIR, graphConstants.GRAPH_DIR, graphConstants.TYPE_MAIN) graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) write_graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) if not os.path.exists(graph_path): os.makedirs(graph_path) G = nx.read_gexf(graph_file) trainFiles, trainFileNames = graphUtils.findRecommTrainGraphFiles() trainCorpus = graphUtils.findCorpus(trainFiles) all_tokens = sum(trainCorpus, []) tokens_once = set(word for word in set(all_tokens) if all_tokens.count(word) == 1) texts = [[word for word in text if word not in tokens_once] for text in trainCorpus] dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] tfidf = models.TfidfModel(corpus=corpus, id2word=dictionary, normalize=True) index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features=len(dictionary)) if todayDate != lastRelevanceDate: testFiles, testFileName = graphUtils.findRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) mini = 100 maxi = -1 count = 0 for idx, text in enumerate(testCorpus): #Add this recommendation node recomm_nodename = testFileName[idx] if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename]['type'] = graphConstants.TYPE_GOOGLE vec = dictionary.doc2bow(text) sims = index[tfidf[vec]] for idxsim, prob in enumerate(sims): if prob < 0.1: continue trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob G.add_edge(recomm_nodename, trainNode, weight=prob) G.add_edge(trainNode, recomm_nodename, weight=prob) count = count + 1 text = readFromFile(testFiles[idx]) #NERFunc(text,G, recomm_nodename) graphUtils.logger.info( "Simple graph relevance completed for today. Stats follow") graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("Relevance count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_RELEVANCE_DIR, todayDate) if todayDate != lastRelevanceDate: testFiles, testFileName = graphUtils.findSuggRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) mini = 100 maxi = -1 count = 0 for idx, text in enumerate(testCorpus): #Add this recommendation node recomm_nodename = testFileName[idx] if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename]['type'] = graphConstants.TYPE_SUGG vec = dictionary.doc2bow(text) sims = index[tfidf[vec]] for idxsim, prob in enumerate(sims): if prob < 0.1: continue trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob G.add_edge(recomm_nodename, trainNode, weight=prob) G.add_edge(trainNode, recomm_nodename, weight=prob) count = count + 1 text = readFromFile(testFiles[idx]) #NERFunc(text,G, recomm_nodename) graphUtils.logger.info( "Simple graph relevance completed for suggestGoogle today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("Relevance count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_SUGG_RELEVANCE_DIR, todayDate) pass
def buildGraph(): #Load up the current graph we may have #Last Graph date done date_LAST_GRAPH_DONE = graphUtils.loadSettings(graphConstants.date_LAST_GRAPH_DONE) date_LAST_TEXTCORPUS_DONE = graphUtils.loadSettings(graphConstants.date_LAST_TEXTCORPUS_DONE) date_LAST_LDA_DONE = graphUtils.loadSettings(graphConstants.date_LAST_LDA_DONE) yesterdayFolder = graphUtils.getYesterdayDateFolder() if date_LAST_GRAPH_DONE == yesterdayFolder: graphUtils.logger.info("simple Graph already built till yesterday") return graphFiles, graphFileNames = graphUtils.findGraphFiles() graphCorpus = graphUtils.findCorpus(graphFiles) all_tokens = sum(graphCorpus, []) tokens_once = set(word for word in set(all_tokens) if all_tokens.count(word) == 1) new_texts = [[word for word in text if word not in tokens_once] for text in graphCorpus] pass #Retrieve text corpus txtcorpus_path = os.path.join(graphConstants.ROOT_FOLDER,graphConstants.GRAPH_DIR, graphConstants.TEXTCORPUS_DIR, graphConstants.TYPE_MAIN) txtcorpus_file = os.path.join(txtcorpus_path,graphConstants.TEXTCORPUS_FILE) if not os.path.exists(txtcorpus_path): os.makedirs(txtcorpus_path) objtxt_corpus = None if date_LAST_TEXTCORPUS_DONE == None: objtxt_corpus = MyCorpus(new_texts) objtxt_corpus.save(txtcorpus_file) graphUtils.saveSettings(graphConstants.date_LAST_TEXTCORPUS_DONE, yesterdayFolder) elif date_LAST_TEXTCORPUS_DONE != yesterdayFolder: objtxt_corpus = MyCorpus.load(txtcorpus_file) objtxt_corpus.update_corpus(new_texts) objtxt_corpus.save(txtcorpus_file) graphUtils.saveSettings(graphConstants.date_LAST_TEXTCORPUS_DONE, yesterdayFolder) else: objtxt_corpus = MyCorpus.load(txtcorpus_file) txt_dictionary = objtxt_corpus.dictionary corpus = [txt_dictionary.doc2bow(text) for text in objtxt_corpus.corpus] newtxt_corpus = [txt_dictionary.doc2bow(text) for text in new_texts] tfidf = models.TfidfModel(corpus=corpus, id2word=txt_dictionary,normalize=True) idf = models.tfidfmodel.precompute_idfs(tfidf.wglobal,txt_dictionary.dfs,len(corpus)) if date_LAST_LDA_DONE!= None: graphUtils.logger.info("Simple graph nodes lda after ="+date_LAST_LDA_DONE+" starts") else: graphUtils.logger.info("Simple graph nodes lda after = none starts") t0 = time() #Do lda #Retrieve text corpus lda_path = os.path.join(graphConstants.ROOT_FOLDER,graphConstants.GRAPH_DIR, graphConstants.LDA_DIR, graphConstants.TYPE_MAIN) lda_file = os.path.join(lda_path,graphConstants.LDA_FILE) lda = None if not os.path.exists(lda_path): os.makedirs(lda_path) date_LAST_LDA_DONE = None if date_LAST_LDA_DONE == None: lda = models.LdaModel(corpus=corpus, id2word=txt_dictionary, num_topics=50, \ update_every=1, chunksize=10000, passes=LDA_PASSES) lda.save(lda_file) graphUtils.saveSettings(graphConstants.date_LAST_LDA_DONE, yesterdayFolder) elif date_LAST_LDA_DONE != yesterdayFolder: lda = models.LdaModel.load(lda_file) lda.update(newtxt_corpus) lda.save(lda_file) graphUtils.saveSettings(graphConstants.date_LAST_LDA_DONE, yesterdayFolder) else: lda = models.LdaModel.load(lda_file) t1= time() graphUtils.logger.info("Simple graph nodes lda time ="+str(t1-t0)+" seconds ends") #Develop graph G = None graph_path = os.path.join(graphConstants.ROOT_FOLDER,graphConstants.GRAPH_DIR, graphConstants.GRAPH_DIR, graphConstants.TYPE_MAIN) graph_file = os.path.join(graph_path,graphConstants.GRAPH_FILE) if not os.path.exists(graph_path): os.makedirs(graph_path) if date_LAST_GRAPH_DONE == None: G=nx.DiGraph() elif date_LAST_GRAPH_DONE != yesterdayFolder: G = nx.read_gexf(graph_file) if date_LAST_GRAPH_DONE != None: graphUtils.logger.info("Simple graph nodes addition after ="+date_LAST_GRAPH_DONE+" starts") else: graphUtils.logger.info("Simple graph nodes addition after = None") #Add nodes and edges for current new corpus which is supposed to be added if date_LAST_GRAPH_DONE != yesterdayFolder: for topic in [50]: for index,document in enumerate(newtxt_corpus): node_name = graphFileNames[index] G.add_node(node_name) G.node[node_name]['type'] = graphConstants.TYPE_HISTORY topics = lda[document] #print "Document start" for topicObj,topicProb in topics: #Compare topicProb with some threshold value if topicProb > 0.1: topicid = topicObj words = lda.show_topic(topicid, topn=10) for wordProb, word in words: wordId = txt_dictionary.doc2bow([word])[0][0] idfWord = idf[wordId] if idfWord > 3.0: word = word.lower() #If this topic doesn't exist as a node then add it if word not in G.nodes(): G.add_node(word) G.node[word]['type'] = graphConstants.TYPE_TOPICS #If the edge between this doc and topic is already present or not if G.has_edge(node_name,word) is False: G.add_edge(node_name,word, weight = 1) else: G[node_name][word]["weight"] = G[node_name][word]["weight"] + 1 if G.has_edge(word,node_name) is False: G.add_edge(word,node_name, weight = 1) else: G[word][node_name]["weight"] = G[word][node_name]["weight"] + 1 # print "word = "+word + " document ="+node_name graphUtils.logger.info("word = "+word + " document ="+node_name) #print "Word = "+word + " idf="+str(idfWord) pass #f=open('lda_topics_'+str(topic)+'_'+str(LDA_PASSES)+'_'+'.txt','w') # Prints the topics. #for top in lda.print_topics(num_words=1000,num_topics=topic): # f.write(top+' \n') #print 'Document topic printed for'+str(topic) #G = NER.NERFunc(graphFiles, graphFileNames, G) nx.write_gexf(G, graph_file) graphUtils.saveSettings(graphConstants.date_LAST_GRAPH_DONE, yesterdayFolder) graphUtils.logger.info("Simple graph nodes addition after ="+str(date_LAST_GRAPH_DONE)+" ends")
def Smoothness(): todayDate = graphUtils.getTodayDateFolder() lastSmoothnessDate = graphUtils.loadSettings( graphConstants.LAST_GRAPHNER_SMOOTHNESS_DIR) lastSuggSmoothnessDate = graphUtils.loadSettings( graphConstants.LAST_GRAPHNER_SUGG_SMOOTHNESS_DIR) if lastSmoothnessDate: graphUtils.logger.info("NERGraph Smoothness done last for =" + lastSmoothnessDate) else: graphUtils.logger.info("NERGraph Smoothness done last for None") if lastSuggSmoothnessDate: graphUtils.logger.info("NERGraphSugg Smoothness done last for =" + lastSuggSmoothnessDate) else: graphUtils.logger.info("NERGraphSugg Smoothness done last for None") if todayDate == lastSmoothnessDate and todayDate == lastSuggSmoothnessDate: graphUtils.logger.info( "NERGraph Smoothness signal already done for today :" + todayDate) return True graph_path = os.path.join(graphConstants.ROOT_FOLDER, graphConstants.GRAPH_DIR, graphConstants.GRAPH_DIR, graphConstants.TYPE_NER) graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) write_graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) if not os.path.exists(graph_path): os.makedirs(graph_path) G = nx.read_gexf(graph_file) trainFiles, trainFileNames = graphUtils.findRecommTrainGraphNerFiles() trainCorpus = graphUtils.findCorpus(trainFiles) bm25obj = Bm25(trainCorpus) trainUniqueWords = [] for trainText in trainCorpus: trainUniqueWords.append(set(trainText)) if todayDate != lastSmoothnessDate: testFiles, testFileName = graphUtils.findRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) testUniqueWords = [] mini = 100 maxi = -1 count = 0 smoothness = zeros((len(testCorpus), len(trainCorpus))) for testText in testCorpus: testUniqueWords.append(set(testText)) for testDoc in range(len(testCorpus)): recomm_nodename = testFileName[testDoc] uniqueTest = testUniqueWords[testDoc] SminusDcontext = zeros(bm25obj.N) DminusScontext = zeros(bm25obj.N) for trainDoc in range(len(trainCorpus)): uniqueTrain = trainUniqueWords[trainDoc] SminusD = [ word for word in trainCorpus[trainDoc] if word not in uniqueTest ] DminusS = [ word for word in testCorpus[testDoc] if word not in uniqueTrain ] SminusDcontext = bm25obj.BM25Score(SminusD) DminusScontext = bm25obj.BM25Score(DminusS) smoothness[testDoc][trainDoc] = np.dot(SminusDcontext, DminusScontext) dict_arr = { key: value for (key, value) in enumerate(smoothness[testDoc]) } sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1)) sorted_x.reverse() sorted_x = sorted_x[:graphConstants.MAX_SMOOTHNESS_EDGE] total = sum([pair[1] for pair in sorted_x]) for (idxsim, val) in sorted_x: prob = val / total if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename][ 'type'] = graphConstants.TYPE_GOOGLE trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob if G.has_edge(recomm_nodename, trainNode) is False: G.add_edge(recomm_nodename, trainNode, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[recomm_nodename][trainNode][ 'weight'] = G[recomm_nodename][trainNode][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT if G.has_edge(trainNode, recomm_nodename) is False: G.add_edge(trainNode, recomm_nodename, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[trainNode][recomm_nodename][ 'weight'] = G[trainNode][recomm_nodename][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT count = count + 1 #print smoothness[testDoc] graphUtils.logger.info( " ner graph Smoothness completed for normalGoogle today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("Smoothness edges count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPHNER_SMOOTHNESS_DIR, todayDate) pass if todayDate != lastSuggSmoothnessDate: testFiles, testFileName = graphUtils.findSuggRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) testUniqueWords = [] mini = 100 maxi = -1 count = 0 smoothness = zeros((len(testCorpus), len(trainCorpus))) for testText in testCorpus: testUniqueWords.append(set(testText)) for testDoc in range(len(testCorpus)): recomm_nodename = testFileName[testDoc] uniqueTest = testUniqueWords[testDoc] SminusDcontext = zeros(bm25obj.N) DminusScontext = zeros(bm25obj.N) for trainDoc in range(len(trainCorpus)): uniqueTrain = trainUniqueWords[trainDoc] SminusD = [ word for word in trainCorpus[trainDoc] if word not in uniqueTest ] DminusS = [ word for word in testCorpus[testDoc] if word not in uniqueTrain ] SminusDcontext = bm25obj.BM25Score(SminusD) DminusScontext = bm25obj.BM25Score(DminusS) smoothness[testDoc][trainDoc] = np.dot(SminusDcontext, DminusScontext) dict_arr = { key: value for (key, value) in enumerate(smoothness[testDoc]) } sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1)) sorted_x.reverse() sorted_x = sorted_x[:graphConstants.MAX_SMOOTHNESS_EDGE] total = sum([pair[1] for pair in sorted_x]) for (idxsim, val) in sorted_x: prob = val / total if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename]['type'] = graphConstants.TYPE_SUGG trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob if G.has_edge(recomm_nodename, trainNode) is False: G.add_edge(recomm_nodename, trainNode, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[recomm_nodename][trainNode][ 'weight'] = G[recomm_nodename][trainNode][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT if G.has_edge(trainNode, recomm_nodename) is False: G.add_edge(trainNode, recomm_nodename, weight=prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT) else: G[trainNode][recomm_nodename][ 'weight'] = G[trainNode][recomm_nodename][ 'weight'] + prob * graphConstants.SMOOTHNESS_EDGE_WEIGHT count = count + 1 #print smoothness[testDoc] graphUtils.logger.info( " ner graph Smoothness completed for suggestGoogle today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("Smoothness edges count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings( graphConstants.LAST_GRAPHNER_SUGG_SMOOTHNESS_DIR, todayDate) pass
def ConnectionClarity(): todayDate = graphUtils.getYesterdayDateFolder() lastClarityDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_CLARITY_DIR) lastSuggClarityDate = graphUtils.loadSettings( graphConstants.LAST_GRAPH_SUGG_CLARITY_DIR) if lastClarityDate: graphUtils.logger.info("Graph Google Clarity done last for =" + lastClarityDate) else: graphUtils.logger.info("Graph Google Clarity done last for none") if lastSuggClarityDate: graphUtils.logger.info("Graph Sugg Clarity done last for =" + lastSuggClarityDate) else: graphUtils.logger.info("Graph Sugg Clarity done last for none") if todayDate == lastClarityDate and todayDate == lastSuggClarityDate: graphUtils.logger.info("graph Clarity signal done for today =" + todayDate) return True graph_path = os.path.join(graphConstants.ROOT_FOLDER, graphConstants.GRAPH_DIR, graphConstants.GRAPH_DIR, graphConstants.TYPE_MAIN) graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) write_graph_file = os.path.join(graph_path, graphConstants.GRAPH_FILE) if not os.path.exists(graph_path): os.makedirs(graph_path) G = nx.read_gexf(graph_file) trainFiles, trainFileNames = graphUtils.findRecommTrainGraphFiles() trainCorpus = graphUtils.findCorpus(trainFiles) if todayDate != lastClarityDate: testFiles, testFileName = graphUtils.findRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) clarityobj = Clarity(trainCorpus, testCorpus) clarityScore = clarityobj.ClarityScore() mini = 100 maxi = -1 count = 0 for testidx, text in enumerate(testCorpus): recomm_nodename = testFileName[testidx] dict_arr = { key: value for (key, value) in enumerate(clarityScore[testidx]) } sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1)) sorted_x.reverse() sorted_x = sorted_x[:graphConstants.MAX_CLARITY_EDGE] total = sum([pair[1] for pair in sorted_x]) for (idxsim, val) in sorted_x: prob = val / total if prob < 0.0: break if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename][ 'type'] = graphConstants.TYPE_GOOGLE trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob if G.has_edge(recomm_nodename, trainNode) is False: G.add_edge(recomm_nodename, trainNode, weight=prob * graphConstants.CLARITY_EDGE_WEIGHT) else: G[recomm_nodename][trainNode][ 'weight'] = G[recomm_nodename][trainNode][ 'weight'] + prob * graphConstants.CLARITY_EDGE_WEIGHT if G.has_edge(trainNode, recomm_nodename) is False: G.add_edge(trainNode, recomm_nodename, weight=prob * graphConstants.CLARITY_EDGE_WEIGHT) else: G[trainNode][recomm_nodename][ 'weight'] = G[trainNode][recomm_nodename][ 'weight'] + prob * graphConstants.CLARITY_EDGE_WEIGHT count = count + 1 graphUtils.logger.info( "Simple graph clarity completed for googlenews today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("clarity edges count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_CLARITY_DIR, todayDate) pass if todayDate != lastSuggClarityDate: testFiles, testFileName = graphUtils.findSuggRecommFiles() testCorpus = graphUtils.findCorpus(testFiles) clarityobj = Clarity(trainCorpus, testCorpus) clarityScore = clarityobj.ClarityScore() mini = 100 maxi = -1 count = 0 for testidx, text in enumerate(testCorpus): recomm_nodename = testFileName[testidx] dict_arr = { key: value for (key, value) in enumerate(clarityScore[testidx]) } sorted_x = sorted(dict_arr.items(), key=operator.itemgetter(1)) sorted_x.reverse() sorted_x = sorted_x[:graphConstants.MAX_CLARITY_EDGE] total = sum([pair[1] for pair in sorted_x]) for (idxsim, val) in sorted_x: prob = val / total if prob < 0.0: break if recomm_nodename not in G.nodes(): G.add_node(recomm_nodename) G.node[recomm_nodename]['type'] = graphConstants.TYPE_SUGG trainNode = trainFileNames[idxsim] if trainNode in G.nodes(): if prob < mini: mini = prob if prob > maxi: maxi = prob if G.has_edge(recomm_nodename, trainNode) is False: G.add_edge(recomm_nodename, trainNode, weight=prob * graphConstants.CLARITY_EDGE_WEIGHT) else: G[recomm_nodename][trainNode][ 'weight'] = G[recomm_nodename][trainNode][ 'weight'] + prob * graphConstants.CLARITY_EDGE_WEIGHT if G.has_edge(trainNode, recomm_nodename) is False: G.add_edge(trainNode, recomm_nodename, weight=prob * graphConstants.CLARITY_EDGE_WEIGHT) else: G[trainNode][recomm_nodename][ 'weight'] = G[trainNode][recomm_nodename][ 'weight'] + prob * graphConstants.CLARITY_EDGE_WEIGHT count = count + 1 graphUtils.logger.info( "Simple graph clarity completed for SuggestGoogle today. Stats follow" ) graphUtils.logger.info("mini =" + str(mini)) graphUtils.logger.info("maxi =" + str(maxi)) graphUtils.logger.info("clarity edges count =" + str(count)) nx.write_gexf(G, write_graph_file) graphUtils.saveSettings(graphConstants.LAST_GRAPH_SUGG_CLARITY_DIR, todayDate) pass