def schedule_task_with_lock(self, task): """Crawler scheduler filters tasks according to task type""" if not task.get('enable'): return None task_queue = task.get('task_queue') if task_queue not in self.task_queues: return None conn = get_redis_conn() task_name = task.get('name') interval = task.get('interval') urls = task.get('resource') lock_indentifier = acquire_lock(conn, task_name) if not lock_indentifier: return False pipe = conn.pipeline(True) try: now = int(time.time()) pipe.hget(TIMER_RECORDER, task_name) r = pipe.execute()[0] if not r or (now - int(r.decode('utf-8'))) >= interval * 60: pipe.lpush(task_queue, *urls) pipe.hset(TIMER_RECORDER, task_name, now) pipe.execute() # scheduler_logger.info('crawler task {} has been stored into redis successfully'.format(task_name)) return True else: return None finally: release_lock(conn, task_name, lock_indentifier)
def schedule_task_with_lock(self, task): """Crawler scheduler filters tasks according to task type""" if not task.get('enable'): return None task_queue = task.get('task_queue') if task_queue not in self.task_queues: return None conn = get_redis_conn() task_name = task.get('name') internal = task.get('internal') urls = task.get('resource') lock_indentifier = acquire_lock(conn, task_name) if not lock_indentifier: return False pipe = conn.pipeline(True) try: now = int(time.time()) pipe.hget(TIMER_RECORDER, task_name) r = pipe.execute()[0] if not r or (now - int(r.decode('utf-8'))) >= internal * 60: pipe.lpush(task_queue, *urls) pipe.hset(TIMER_RECORDER, task_name, now) pipe.execute() # scheduler_logger.info('crawler task {} has been stored into redis successfully'.format(task_name)) return True else: return None finally: release_lock(conn, task_name, lock_indentifier)
def get_lock(self, conn, task): if not task.get('enable'): return None task_queue = task.get('task_queue') if task_queue not in self.task_queues: return None task_name = task.get('name') lock_indentifier = acquire_lock(conn, task_name) return lock_indentifier
def schedule_task_with_lock(self, task): """Validator scheduler filters tasks according to task name since it's task name stands for task type""" if not task.get('enable'): return None task_queue = task.get('task_queue') if task_queue not in self.task_queues: return None conn = get_redis_conn() internal = task.get('internal') task_name = task.get('name') resource_queue = task.get('resource') lock_indentifier = acquire_lock(conn, task_name) if not lock_indentifier: return False pipe = conn.pipeline(True) try: now = int(time.time()) pipe.hget(TIMER_RECORDER, task_name) pipe.zrevrangebyscore(resource_queue, '+inf', '-inf') r, proxies = pipe.execute() if not r or (now - int(r.decode('utf-8'))) >= internal * 60: if not proxies: print('fetched no proxies from task {}'.format(task_name)) return None pipe.sadd(task_queue, *proxies) pipe.hset(TIMER_RECORDER, task_name, now) pipe.execute() print( 'validator task {} has been stored into redis successfully' .format(task_name)) return True else: return None finally: release_lock(conn, task_name, lock_indentifier)
def schedule_task_with_lock(self, task): """Validator scheduler filters tasks according to task name since its task name stands for task type""" if not task.get('enable'): return None task_queue = task.get('task_queue') if task_queue not in self.task_queues: return None conn = get_redis_conn() interval = task.get('interval') task_name = task.get('name') resource_queue = task.get('resource') lock_indentifier = acquire_lock(conn, task_name) if not lock_indentifier: return False pipe = conn.pipeline(True) try: now = int(time.time()) pipe.hget(TIMER_RECORDER, task_name) pipe.zrevrangebyscore(resource_queue, '+inf', '-inf') r, proxies = pipe.execute() if not r or (now - int(r.decode('utf-8'))) >= interval * 60: if not proxies: # scheduler_logger.warning('fetched no proxies from task {}'.format(task_name)) print('fetched no proxies from task {}'.format(task_name)) return None pipe.sadd(task_queue, *proxies) pipe.hset(TIMER_RECORDER, task_name, now) pipe.execute() # scheduler_logger.info('validator task {} has been stored into redis successfully'.format(task_name)) return True else: return None finally: release_lock(conn, task_name, lock_indentifier)