Exemplo n.º 1
0
    def tasks(cls):
        queues = [make_key('queue', c, s) for c, s in manager.stages]
        random.shuffle(queues)
        while True:
            task_data_tuple = cls.conn.blpop(queues)
            # blpop blocks until it finds something. But fakeredis has no
            # blocking support. So it justs returns None.
            if not task_data_tuple:
                return

            key, json_data = task_data_tuple
            # Shift the queues list so that the matching key is at the
            # very end of the list, priorising all other crawlers.
            # queues = list(reversed(queues))
            deq = deque(queues)
            deq.rotate((queues.index(key) * -1) - 1)
            queues = list(deq)

            task_data = load_json(json_data)
            stage = task_data["stage"]
            state = task_data["state"]
            data = task_data["data"]
            next_time = task_data.get("next_allowed_exec_time")
            next_time = unpack_datetime(next_time)
            crawler = state.get('crawler')
            cls.conn.decr(make_key('queue_pending', crawler))
            yield (stage, state, data, next_time)
Exemplo n.º 2
0
    def tasks(cls):
        queues = [make_key('queue', c, s) for c, s in manager.stages]
        while True:
            timeout = 1 if settings.DEBUG else 0
            task_data_tuple = conn.blpop(queues, timeout=timeout)
            # blpop blocks until it finds something. But fakeredis has no
            # blocking support. So it justs returns None.
            if task_data_tuple is None:
                return

            key, json_data = task_data_tuple
            # Shift the queues list so that the matching key is at the
            # very end of the list, priorising all other crawlers.
            # queues = list(reversed(queues))
            deq = deque(queues)
            deq.rotate((queues.index(key) * -1) - 1)
            queues = list(deq)

            task_data = load_json(json_data)
            stage = task_data["stage"]
            state = task_data["state"]
            data = task_data["data"]
            next_time = task_data.get("next_allowed_exec_time")
            next_time = unpack_datetime(next_time)
            yield (stage, state, data, next_time)
Exemplo n.º 3
0
 def event_list(cls, key, start, end):
     results = []
     events = cls.conn.lrange(key, start, end)
     if events is None:
         return results
     for event in events:
         result = load_json(event)
         result["timestamp"] = unpack_datetime(result['timestamp'])
         results.append(result)
     return results
Exemplo n.º 4
0
 def get_tag(self, key):
     value = conn.get(make_key(self.crawler, "tag", key))
     if value is not None:
         return load_json(value)
Exemplo n.º 5
0
 def find(cls, crawler, key):
     value = cls.conn.get(make_key(crawler, "tag", key))
     if value is not None:
         return load_json(value)