def loadCompTopUsers(): config = ConfigParser() cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf") config.read(cpath) datalayer = DataLayer(config) queue = ClusterTaskQueue(datalayer.getJobRedis()) task = ClusterTask(1646586724, force=True) #1646586724 pingan #1897953162 ali #queue.addTask(task) import sys from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from userquery import * from userquery.ttypes import * fd = open("/home/xiafan/KuaiPan/dataset/user/mhxkeyword.txt") for line in fd.readlines(): fields = line.split("\t") transport = TSocket.TSocket('localhost', 10010) # Buffering is critical. Raw sockets are very slow transport = TTransport.TBufferedTransport(transport) # Wrap in a protocol protocol = TBinaryProtocol.TBinaryProtocol(transport) # Create a client to use the protocol encoder client = TweetService.Client(protocol) # Connect! transport.open() query=UserQuery(fields[0], 2) uids=client.search(query) for uid in uids: task = ClusterTask(uid=uid, force=True) queue.addTask(task)
def start(self): self.working = True self.dataLayer = DataLayer(self.config) # start status report service self.reportThread = ReportThread(self) self.reportThread.start() self.taskGen = ClusterTaskQueue(self.dataLayer.getJobRedis()) tnum = self.config.getint('cluster', 'threadnum') cslogger.info("start %d worker threads" % (tnum)) self.threads = [] for i in range(tnum): workThread = ClusterThread(self) workThread.start() self.threads.append(workThread) # waiting for shutdown while len(self.threads) > 0: try: self.threads[0].join() self.threads.pop(0) except: pass cslogger.info("cluster worker shuts down")
def dumpSN(dataDir): config = ConfigParser() cpath = os.path.join(os.getenv("COMMUNITY_HOME", os.getcwd()), "./conf/dworker.conf") print "load config file:", cpath config.read(cpath) dataLayer = DataLayer(config) snredis=dataLayer.getSNRedis() files= os.listdir(dataDir) for file in files: dataFile = os.path.join(dataDir, file) print "loading social network from file:%s"%(dataFile) fp = open(dataFile,"r") for line in fp: nodes = line.split("\t") snredis.getRedis(nodes[0], SN_DB).sadd(nodes[0],nodes[1]) fp.close()
def dumpSN(dataDir): config = ConfigParser() cpath = os.path.join(os.getenv("COMMUNITY_HOME", os.getcwd()), "./conf/dworker.conf") print "load config file:", cpath config.read(cpath) dataLayer = DataLayer(config) snredis = dataLayer.getSNRedis() files = os.listdir(dataDir) for file in files: dataFile = os.path.join(dataDir, file) print "loading social network from file:%s" % (dataFile) fp = open(dataFile, "r") for line in fp: nodes = line.split("\t") snredis.getRedis(nodes[0], SN_DB).sadd(nodes[0], nodes[1]) fp.close()
def loadAllCompUsers(): config = ConfigParser() cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf") config.read(cpath) datalayer = DataLayer(config) queue = ClusterTaskQueue(datalayer.getJobRedis()) task = ClusterTask(1897953162, force=True) #1646586724 pingan #1897953162 ali queue.addTask(task) import sys sys.exit() fd = open("/home/xiafan/KuaiPan/dataset/user/comidbyidx.txt") for line in fd.readlines(): task = ClusterTask(long(line), force=False) queue.addTask(task) fd.close()
class ClusterWorker: def __init__(self, config): self.config = config self.workStatus = WorkStatus() try: self.id = config.getint('workder', 'id') except: self.id = random.randint(0, 10000000) def start(self): self.working = True self.dataLayer = DataLayer(self.config) # start status report service self.reportThread = ReportThread(self) self.reportThread.start() self.taskGen = ClusterTaskQueue(self.dataLayer.getJobRedis()) tnum = self.config.getint('cluster', 'threadnum') cslogger.info("start %d worker threads" % (tnum)) self.threads = [] for i in range(tnum): workThread = ClusterThread(self) workThread.start() self.threads.append(workThread) # waiting for shutdown while len(self.threads) > 0: try: self.threads[0].join() self.threads.pop(0) except: pass cslogger.info("cluster worker shuts down") """ the following function are status report function """ def reportStatus(self): return self.workStatus def reAssignJob(self, jobQueueID): self.workStatus.jobQueueID = jobQueueID def clusterForNode(self, nodeID): self.taskGen.addNewTask(nodeID) # stop processing def stop(self): self.working = False self.workThread.join() self.dataCluster.close() self.taskCluster.close() self.reportThread.stopReport() self.reportThread.join()
t = tdao.getTweet(mid) if t: for word in jieba.cut(t.text, cut_all=False): if not word in uwordSet: self.globalstats[word]=self.globalstats.get(word,1) + 1 wordHist[word] = wordHist.get(word, 1) + 1 uwordSet.add(word) self.groupstats[k] = wordHist if __name__ == "__main__": from dao.datalayer import DataLayer from ConfigParser import ConfigParser config = ConfigParser() cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf") print "load config file:", cpath config.read(cpath) dataLayer = DataLayer(config) com = ComSummarize(dataLayer) gcache = dataLayer.getGraphCache() ego = gcache.egoNetwork("1650507560") comm = Community(ego, 0.01, 10, 3) comm.initCommunity() comm.startCluster() comm.printCommunity() #com.detect("1707446764") #com.detect("1650507560") #"1707446764"
if t: for word in jieba.cut(t.text, cut_all=False): if not word in uwordSet: self.globalstats[word] = self.globalstats.get( word, 1) + 1 wordHist[word] = wordHist.get(word, 1) + 1 uwordSet.add(word) self.groupstats[k] = wordHist if __name__ == "__main__": from dao.datalayer import DataLayer from ConfigParser import ConfigParser config = ConfigParser() cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf") print "load config file:", cpath config.read(cpath) dataLayer = DataLayer(config) com = ComSummarize(dataLayer) gcache = dataLayer.getGraphCache() ego = gcache.egoNetwork("1650507560") comm = Community(ego, 0.01, 10, 3) comm.initCommunity() comm.startCluster() comm.printCommunity() #com.detect("1707446764") #com.detect("1650507560") #"1707446764"