Exemplo n.º 1
0
    def register(self):
        # time.sleep(5)
        # 等待manager设置好watch
        info("register", 'check spider master node and bitmap status..')
        while self.zk.get("/spider/spiders/%s" % self.website())[0] != 'ready' \
                and self.zk.get("/spider/data/running/%s" % self.website())[0] == '':
            time.sleep(0.1)

        info("register", 'spider node and bitmap status is ok')

        time.sleep(0.2)
        self.node_state = SpiderNodeState(SpiderNodeState.INIT, self.website())
        self.data_state = SpiderDataState(SpiderDataState.INIT, self.website())

        self.zk_spider_node = self.zk.create('/spider/spiders/%s/sn_' %
                                             self.website(),
                                             ephemeral=True,
                                             sequence=True,
                                             value=self.node_state.dumps())
        self.data_state.spider_node_path = self.zk_spider_node

        self.zk_data_node = self.zk.create('/spider/data/running/%s/dn_' %
                                           self.website(),
                                           sequence=True,
                                           value=self.data_state.dumps())

        info("register", 'spider node and data node status is created')

        self.ready = False

        def data_alloc_ok(event):

            self.node_state = SpiderNodeState.loads(
                self.zk.get(self.zk_spider_node)[0])
            self.node_state.data_node_path = self.zk_data_node
            self.node_state.state = SpiderNodeState.WORKING

            info("register",
                 'data block %s is alloced' % self.node_state.task_no)

            self.zk.set(self.zk_spider_node, value=self.node_state.dumps())

            self.data_state.state = SpiderNodeState.WORKING
            self.data_state.task_no = self.node_state.task_no
            print self.data_state.task_no
            self.zk.set(self.zk_data_node, value=self.data_state.dumps())
            self.ready = True
            # now can start the spider
            # start()

        state = SpiderNodeState.loads(
            self.zk.get(self.zk_spider_node, watch=data_alloc_ok)[0])
        info("register", 'waiting for data being alloced')
        info("register", "spider node state is %s" % state.state)
        if state.state == SpiderNodeState.READY:
            self.ready = True

        #print 'data' + str(self.zk.get('/spider/spiders/%s' % self.website()))

        # logging config
        logging.basicConfig(filename=os.path.join('./',
                                                  '%s.log' % (self.name)),
                            level=logging.INFO,
                            filemode='w',
                            format='%(asctime)s - %(levelname)s: %(message)s')
        while not self.ready:
            time.sleep(0.1)