Exemplo n.º 1
0
def loadCompTopUsers():
    config = ConfigParser()
    cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf")
    config.read(cpath)
    datalayer = DataLayer(config)
    queue = ClusterTaskQueue(datalayer.getJobRedis())
    task = ClusterTask(1646586724, force=True)
    #1646586724 pingan
    #1897953162 ali
    #queue.addTask(task)
    import sys
    from thrift import Thrift
    from thrift.transport import TSocket
    from thrift.transport import TTransport
    from thrift.protocol import TBinaryProtocol
    from userquery import *
    from userquery.ttypes import *
    fd = open("/home/xiafan/KuaiPan/dataset/user/mhxkeyword.txt")
    for line in fd.readlines():
        fields = line.split("\t")
        transport = TSocket.TSocket('localhost', 10010)
        # Buffering is critical. Raw sockets are very slow
        transport = TTransport.TBufferedTransport(transport)
        # Wrap in a protocol
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        # Create a client to use the protocol encoder
        client = TweetService.Client(protocol)
        # Connect!
        transport.open()
        query=UserQuery(fields[0], 2)
        uids=client.search(query)
        for uid in uids:
            task = ClusterTask(uid=uid, force=True)
            queue.addTask(task)
Exemplo n.º 2
0
    def start(self):
        self.working = True
        self.dataLayer = DataLayer(self.config)

        # start status report service
        self.reportThread = ReportThread(self)
        self.reportThread.start()
                
        self.taskGen = ClusterTaskQueue(self.dataLayer.getJobRedis())        
        tnum = self.config.getint('cluster', 'threadnum')
        cslogger.info("start %d worker threads" % (tnum))
        self.threads = []
        for i in range(tnum):
            workThread = ClusterThread(self)
            workThread.start()
            self.threads.append(workThread)
        
        # waiting for shutdown
        
        while len(self.threads) > 0:
            try:
                self.threads[0].join()
                self.threads.pop(0)
            except:
                pass
        cslogger.info("cluster worker shuts down")
Exemplo n.º 3
0
def dumpSN(dataDir):
    config = ConfigParser()
    cpath = os.path.join(os.getenv("COMMUNITY_HOME", os.getcwd()), "./conf/dworker.conf")
    print "load config file:", cpath
    config.read(cpath)
    dataLayer = DataLayer(config)
    snredis=dataLayer.getSNRedis()
    files= os.listdir(dataDir)
    for file in files:
        dataFile = os.path.join(dataDir, file)
        print "loading social network from file:%s"%(dataFile)
        fp = open(dataFile,"r")
        for line in fp:
            nodes = line.split("\t")
            snredis.getRedis(nodes[0], SN_DB).sadd(nodes[0],nodes[1])
        fp.close()
Exemplo n.º 4
0
def dumpSN(dataDir):
    config = ConfigParser()
    cpath = os.path.join(os.getenv("COMMUNITY_HOME", os.getcwd()),
                         "./conf/dworker.conf")
    print "load config file:", cpath
    config.read(cpath)
    dataLayer = DataLayer(config)
    snredis = dataLayer.getSNRedis()
    files = os.listdir(dataDir)
    for file in files:
        dataFile = os.path.join(dataDir, file)
        print "loading social network from file:%s" % (dataFile)
        fp = open(dataFile, "r")
        for line in fp:
            nodes = line.split("\t")
            snredis.getRedis(nodes[0], SN_DB).sadd(nodes[0], nodes[1])
        fp.close()
Exemplo n.º 5
0
def loadAllCompUsers():
    config = ConfigParser()
    cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf")
    config.read(cpath)
    datalayer = DataLayer(config)
    queue = ClusterTaskQueue(datalayer.getJobRedis())
    task = ClusterTask(1897953162, force=True)
    #1646586724 pingan
    #1897953162 ali
    queue.addTask(task)
    import sys
    sys.exit()
    fd = open("/home/xiafan/KuaiPan/dataset/user/comidbyidx.txt")
    for line in fd.readlines():
        task = ClusterTask(long(line), force=False)
        queue.addTask(task)
    fd.close()
Exemplo n.º 6
0
class ClusterWorker:
    def __init__(self, config):
        self.config = config
        self.workStatus = WorkStatus()
        try:
            self.id = config.getint('workder', 'id')
        except:
            self.id = random.randint(0, 10000000)
        
    def start(self):
        self.working = True
        self.dataLayer = DataLayer(self.config)

        # start status report service
        self.reportThread = ReportThread(self)
        self.reportThread.start()
                
        self.taskGen = ClusterTaskQueue(self.dataLayer.getJobRedis())        
        tnum = self.config.getint('cluster', 'threadnum')
        cslogger.info("start %d worker threads" % (tnum))
        self.threads = []
        for i in range(tnum):
            workThread = ClusterThread(self)
            workThread.start()
            self.threads.append(workThread)
        
        # waiting for shutdown
        
        while len(self.threads) > 0:
            try:
                self.threads[0].join()
                self.threads.pop(0)
            except:
                pass
        cslogger.info("cluster worker shuts down")
   
    """
    the following function are status report function
    """
    def reportStatus(self):
        return self.workStatus

    def reAssignJob(self, jobQueueID):
        self.workStatus.jobQueueID = jobQueueID

    def clusterForNode(self, nodeID):
        self.taskGen.addNewTask(nodeID)
        
    # stop processing
    def stop(self):
        self.working = False
        self.workThread.join()
        self.dataCluster.close()
        self.taskCluster.close()
        
        self.reportThread.stopReport()
        self.reportThread.join()
Exemplo n.º 7
0
                t = tdao.getTweet(mid)
                if t:
                    for word in jieba.cut(t.text, cut_all=False):
                        if not word in uwordSet:
                            self.globalstats[word]=self.globalstats.get(word,1) + 1
                            wordHist[word] = wordHist.get(word, 1) + 1
                        uwordSet.add(word)
                        
        self.groupstats[k] = wordHist              
    
if __name__ == "__main__":
    from dao.datalayer import DataLayer
    from ConfigParser import ConfigParser
    config = ConfigParser()
    cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf")
    print "load config file:", cpath
    config.read(cpath)
    
    dataLayer = DataLayer(config)
    com = ComSummarize(dataLayer)
    gcache = dataLayer.getGraphCache()
    ego = gcache.egoNetwork("1650507560")
    comm = Community(ego, 0.01, 10, 3)
    comm.initCommunity()
    comm.startCluster()
    comm.printCommunity()
    #com.detect("1707446764")
    #com.detect("1650507560")

    
    #"1707446764"
Exemplo n.º 8
0
                if t:
                    for word in jieba.cut(t.text, cut_all=False):
                        if not word in uwordSet:
                            self.globalstats[word] = self.globalstats.get(
                                word, 1) + 1
                            wordHist[word] = wordHist.get(word, 1) + 1
                        uwordSet.add(word)

        self.groupstats[k] = wordHist


if __name__ == "__main__":
    from dao.datalayer import DataLayer
    from ConfigParser import ConfigParser
    config = ConfigParser()
    cpath = os.path.join(os.getcwd(), "../../../conf/dworker.conf")
    print "load config file:", cpath
    config.read(cpath)

    dataLayer = DataLayer(config)
    com = ComSummarize(dataLayer)
    gcache = dataLayer.getGraphCache()
    ego = gcache.egoNetwork("1650507560")
    comm = Community(ego, 0.01, 10, 3)
    comm.initCommunity()
    comm.startCluster()
    comm.printCommunity()
    #com.detect("1707446764")
    #com.detect("1650507560")

    #"1707446764"