def runs(cls, crawler): for run_id in cls.run_ids(crawler): start = cls.conn.get(make_key("run", run_id, "start")) end = cls.conn.get(make_key("run", run_id, "end")) total_ops = cls.conn.get(make_key("run", run_id, "total_ops")) yield { 'run_id': run_id, 'total_ops': unpack_int(total_ops), 'start': unpack_datetime(start, datetime.utcnow()), 'end': unpack_datetime(end) }
def runs(cls, crawler): for run_id in cls.conn.lrange(make_key(crawler, "runs_list"), 0, -1): start = cls.conn.get(make_key("run", run_id, "start")) end = cls.conn.get(make_key("run", run_id, "end")) total_ops = cls.conn.get(make_key("run", run_id, "total_ops")) yield { 'run_id': run_id, 'total_ops': unpack_int(total_ops), 'start': unpack_datetime(start), 'end': unpack_datetime(end) }
def tasks(cls): queues = [make_key('queue', c, s) for c, s in manager.stages] random.shuffle(queues) while True: task_data_tuple = cls.conn.blpop(queues) # blpop blocks until it finds something. But fakeredis has no # blocking support. So it justs returns None. if not task_data_tuple: return key, json_data = task_data_tuple # Shift the queues list so that the matching key is at the # very end of the list, priorising all other crawlers. # queues = list(reversed(queues)) deq = deque(queues) deq.rotate((queues.index(key) * -1) - 1) queues = list(deq) task_data = load_json(json_data) stage = task_data["stage"] state = task_data["state"] data = task_data["data"] next_time = task_data.get("next_allowed_exec_time") next_time = unpack_datetime(next_time) crawler = state.get('crawler') cls.conn.decr(make_key('queue_pending', crawler)) yield (stage, state, data, next_time)
def tasks(cls): queues = [make_key('queue', c, s) for c, s in manager.stages] while True: timeout = 1 if settings.DEBUG else 0 task_data_tuple = conn.blpop(queues, timeout=timeout) # blpop blocks until it finds something. But fakeredis has no # blocking support. So it justs returns None. if task_data_tuple is None: return key, json_data = task_data_tuple # Shift the queues list so that the matching key is at the # very end of the list, priorising all other crawlers. # queues = list(reversed(queues)) deq = deque(queues) deq.rotate((queues.index(key) * -1) - 1) queues = list(deq) task_data = load_json(json_data) stage = task_data["stage"] state = task_data["state"] data = task_data["data"] next_time = task_data.get("next_allowed_exec_time") next_time = unpack_datetime(next_time) yield (stage, state, data, next_time)
def event_list(cls, key, start, end): results = [] events = cls.conn.lrange(key, start, end) if events is None: return results for event in events: result = load_json(event) result["timestamp"] = unpack_datetime(result['timestamp']) results.append(result) return results
def last_run(cls, crawler): last_run = cls.conn.get(make_key(crawler, "last_run")) return unpack_datetime(last_run)