def generateGraphForDay(endTime): beginTime = endTime - 3*24*3600 articles = app.getArticlesBetweenTimes(beginTime, endTime) art = db.qdoc.find({'$query': {}, '$orderby': {'topic': -1}}).limit(1) maxTopic = art[0]['topic'] print maxTopic nodesClean = [] edgesClean = [] g = Graph() g.add_vertices(len(articles)) for i in range(0, len(articles)-2): for j in range(i+1, len(articles)-1): commonKeywords = list(set(articles[i].keywords).intersection(articles[j].keywords)) if len(commonKeywords) > 2: edgesClean.append({"source": articles[i].guid, "target": articles[j].guid, "value": len(commonKeywords)}) g.add_edges([(i, j)]) coloring = g.community_infomap() memberships = coloring.membership memberCounts = Counter(memberships) bigCommList = [k for k,v in memberCounts.iteritems() if v>=3] oldTopics = [] for i, membership in zip(range(0,len(articles)-1), memberships): oldTopics.append(articles[i].topic) nodesClean.append({"id": articles[i].guid, "name": articles[i].title.encode('utf-8').replace('"', ''), "group": str(membership), "keywords": articles[i].keywords[:5], "img": '', 'source': articles[i].source, 'url': articles[i].url}) for bigComm in bigCommList: oldTopicList = [] idList = [] for i, membership, oldTopic in zip(range(0,len(articles)-1), memberships, oldTopics): if membership == bigComm: idList.append(articles[i].guid) oldTopicList.append(oldTopic) newTopic = 0 nonZeroTopic = [v for v in oldTopicList if v != 0] if len(nonZeroTopic) == 0: maxTopic += 1 newTopic = maxTopic else: counter = Counter(nonZeroTopic) newTopic = counter.most_common(1)[0][0] db.qdoc.update({"_id": {'$in': idList}}, {"$set": {"topic": newTopic}}, multi=True)
def generateGraphForDay(daysAgo): endTime = time.time() - daysAgo*24*3600 beginTime = endTime - 1.5*24*3600 articles = app.getArticlesBetweenTimes(beginTime, endTime) i = 0 nodesClean = [] edgesClean = [] connectedNodes = [] g = Graph() g.add_vertices(len(articles)) for i in range(0, len(articles)-2): for j in range(i+1, len(articles)-1): commonKeywords = list(set(articles[i].keywords).intersection(articles[j].keywords)) if len(commonKeywords) > 1: edgesClean.append({"source": articles[i].guid, "target": articles[j].guid, "value": len(commonKeywords)}) connectedNodes.extend([i,j]) g.add_edges([(i, j)]) connectedNodes = list(set(connectedNodes)) coloring = g.community_infomap() memberships = coloring.membership for i, membership in zip(range(0,len(articles)-1), memberships): if i in connectedNodes: nodesClean.append({"id": articles[i].guid, "name": articles[i].title.encode('utf-8').replace('"', ''), "group": str(membership), "keywords": articles[i].keywords[:5], "img": '', 'source': articles[i].source, 'url': articles[i].url}) #articles[i].img endDate = datetime.utcfromtimestamp(endTime) date1 = datetime(endDate.year, endDate.month, endDate.day) time1 = (date1 - datetime(1970,1,1)).total_seconds() time2 = time1 + 86399 date2 = datetime.utcfromtimestamp(time2) print datetime.utcfromtimestamp(endTime) db.graph_topics.update({'$and': [{'date': {'$gte': datetime.utcfromtimestamp(time1)}}, {'date': {'$lte': datetime.utcfromtimestamp(time2)}}]}, {'$set': {'date': datetime.utcfromtimestamp(endTime), 'graph': {'nodes': nodesClean, 'edges': edgesClean}}}, upsert=True)