def start(self): self.working = True self.dataLayer = DataLayer(self.config) # start status report service self.reportThread = ReportThread(self) self.reportThread.start() self.taskGen = ClusterTaskQueue(self.dataLayer.getJobRedis()) tnum = self.config.getint('cluster', 'threadnum') cslogger.info("start %d worker threads" % (tnum)) self.threads = [] for i in range(tnum): workThread = ClusterThread(self) workThread.start() self.threads.append(workThread) # waiting for shutdown while len(self.threads) > 0: try: self.threads[0].join() self.threads.pop(0) except: pass cslogger.info("cluster worker shuts down")
def dumpSN(dataDir): config = ConfigParser() cpath = os.path.join(os.getenv("COMMUNITY_HOME", os.getcwd()), "./conf/dworker.conf") print "load config file:", cpath config.read(cpath) dataLayer = DataLayer(config) snredis = dataLayer.getSNRedis() files = os.listdir(dataDir) for file in files: dataFile = os.path.join(dataDir, file) print "loading social network from file:%s" % (dataFile) fp = open(dataFile, "r") for line in fp: nodes = line.split("\t") snredis.getRedis(nodes[0], SN_DB).sadd(nodes[0], nodes[1]) fp.close()
if t: for word in jieba.cut(t.text, cut_all=False): if not word in uwordSet: self.globalstats[word] = self.globalstats.get( word, 1) + 1 wordHist[word] = wordHist.get(word, 1) + 1 uwordSet.add(word) self.groupstats[k] = wordHist if __name__ == "__main__": from dao.datalayer import DataLayer from ConfigParser import ConfigParser config = ConfigParser() cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf") print "load config file:", cpath config.read(cpath) dataLayer = DataLayer(config) com = ComSummarize(dataLayer) gcache = dataLayer.getGraphCache() ego = gcache.egoNetwork("1650507560") comm = Community(ego, 0.01, 10, 3) comm.initCommunity() comm.startCluster() comm.printCommunity() #com.detect("1707446764") #com.detect("1650507560") #"1707446764"