class UIDStack(object): """Thin convenience wrapper around gevent.queue.LifoQueue. Each entry in the stack is a pair (uid, metadata), where the metadata may be None.""" def __init__(self): self._lifoqueue = LifoQueue() def empty(self): return self._lifoqueue.empty() def get(self): return self._lifoqueue.get_nowait() def peek(self): # This should be LifoQueue.peek_nowait(), which is currently buggy in # gevent. Can update with gevent version 1.0.2. return self._lifoqueue.queue[-1] def put(self, uid, metadata): self._lifoqueue.put((uid, metadata)) def discard(self, objects): self._lifoqueue.queue = [ item for item in self._lifoqueue.queue if item not in objects ] def qsize(self): return self._lifoqueue.qsize() def __iter__(self): for item in self._lifoqueue.queue: yield item
class UIDStack(object): """Thin convenience wrapper around gevent.queue.LifoQueue. Each entry in the stack is a pair (uid, metadata), where the metadata may be None.""" def __init__(self): self._lifoqueue = LifoQueue() def empty(self): return self._lifoqueue.empty() def get(self): return self._lifoqueue.get_nowait() def peek(self): # This should be LifoQueue.peek_nowait(), which is currently buggy in # gevent. Can update with gevent version 1.0.2. return self._lifoqueue.queue[-1] def put(self, uid, metadata): self._lifoqueue.put((uid, metadata)) def discard(self, objects): self._lifoqueue.queue = [item for item in self._lifoqueue.queue if item not in objects] def qsize(self): return self._lifoqueue.qsize() def __iter__(self): for item in self._lifoqueue.queue: yield item
class Stack(object): """Thin convenience wrapper around gevent.queue.LifoQueue.""" def __init__(self, key, initial_elements=None): self.key = key self._lifoqueue = LifoQueue() if initial_elements is not None: self._lifoqueue.queue = sorted(list(initial_elements), key=self.key) def empty(self): return self._lifoqueue.empty() def get(self): return self._lifoqueue.get_nowait() def peek(self): # This should be LifoQueue.peek_nowait(), which is currently buggy in # gevent. Can update with gevent version 1.0.2. return self._lifoqueue.queue[-1] def put(self, obj): self._lifoqueue.put(obj) def update_from(self, objects): for obj in sorted(list(objects), key=self.key): self._lifoqueue.put(obj) def discard(self, objects): self._lifoqueue.queue = [item for item in self._lifoqueue.queue if item not in objects] def qsize(self): return self._lifoqueue.qsize() def __iter__(self): for item in self._lifoqueue.queue: yield item
class BaseProcessor(LoggerMixin): name = 'base-processor' @classmethod def from_engine(cls, engine, *args, **kwargs): return cls(engine, *args, **kwargs) def _request(self): return self.engine.request request = property(_request) def __init__(self, engine, *args, **kwargs): from time import time from hashlib import md5 from threading import Lock from gevent.queue import LifoQueue self.processor_name = '%s:%s' % (self.name, md5(str( time())).hexdigest()[:6]) LoggerMixin.__init__(self) self.engine = engine self.__redis = None self.redis_lock = Lock() self.progress = 0 self.total = 0 # 忽略统计 self.bypassed_cnt = 0 # 超过这一限制时,add_task就暂停向其中添加任务 self.maxsize = 1000 self.tasks = LifoQueue() self.workers = [] # 默认的polling间隔为1秒 self.polling_interval = 1 import argparse arg_parser = argparse.ArgumentParser() # 并发数量 arg_parser.add_argument('--concur', type=int) args, leftover = arg_parser.parse_known_args() from core import dhaulagiri_settings if args.concur: dhaulagiri_settings['core']['concur'] = args.concur self.concur = dhaulagiri_settings['core']['concur'] self.checkpoint_ts = None self.checkpoint_prog = None self.init_ts = time() # 心跳任务 self.heart_beat = None # worker的Monitor。Worker在每次循环开始的时候,都会在该对象中进行一次状态更新 self.worker_monitor = {} def update_worker_status(self, worker): """ 更新worker的状态 :param worker: :return: """ from time import time name = worker.worker_name self.worker_monitor[name] = time() def get_worker_stat(self): """ 获得worker队列的状态 :return: """ from time import time # 如果60秒都没有状态更新,说明该worker进入zombie状态 time_window = 90 cur = time() active = dict( filter(lambda item: item[1] >= cur - time_window, self.worker_monitor.items())) zombie = dict( filter(lambda item: item[1] < cur - time_window, self.worker_monitor.items())) return {'zombie': zombie, 'active': active} def incr_progress(self): self.progress += 1 def _start_workers(self): def timer(): """ 每30秒启动一次,输出当前进度 """ import time while True: msg = 'Progress: %d / %d.' % (self.progress, self.total) cts = time.time() if self.checkpoint_prog is not None and self.checkpoint_ts is not None: rate = (self.progress - self.checkpoint_prog) / ( cts - self.checkpoint_ts) * 60 msg = '%s %s' % (msg, 'Processing rate: %d items/min' % int(rate)) self.checkpoint_ts = cts self.checkpoint_prog = self.progress # 获得worker monitor统计 stat = self.get_worker_stat() msg += ', active workers: %d, zombie workers: %d' % (len( stat['active']), len(stat['zombie'])) self.log(msg) gevent.sleep(30) self.heart_beat = gevent.spawn(timer) gevent.signal(signal.SIGKILL, gevent.kill) gevent.signal(signal.SIGQUIT, gevent.kill) for i in xrange(self.concur): worker = Worker.from_processor(self, self.tasks) self.workers.append(worker) def add_task(self, task, *args, **kwargs): # 是否启用流量控制 flow_control = True while flow_control: # 如果self.tasks中的项目过多,则暂停添加 if self.tasks.qsize() > self.maxsize: gevent.sleep(self.polling_interval) else: break func = lambda: task(*args, **kwargs) task_key = getattr(task, 'task_key', None) if task_key: setattr(func, 'task_key', task_key) self.tasks.put(func, timeout=120) self.logger.debug( 'New task%s added to the queue. Remaining: %d' % ('(%s)' % task_key if task_key else '', self.tasks.qsize())) gevent.sleep(0) def _wait_for_workers(self): """ 等待所有的worker是否完成。判据:所有的worker都处于idle状态,并且tasks队列已空 :return: """ while True: if not self.tasks.empty(): gevent.sleep(self.polling_interval) continue completed = True for w in self.workers: if not w.idle: gevent.sleep(self.polling_interval) completed = False break if completed: break gevent.killall([w.gevent for w in self.workers]) gevent.kill(self.heart_beat) def run(self): self._start_workers() self.populate_tasks() self._wait_for_workers() import time self.log( 'Processor ended: %d items processed(%d bypassed) in %d minutes' % (self.progress, self.bypassed_cnt, int((time.time() - self.init_ts) / 60.0))) def populate_tasks(self): raise NotImplementedError
class AbstractDatabaseConnectionPool(object): def __init__(self, maxsize=100, maxwait=1.0, expires=None, cleanup=None): """ The pool manages opened connections to the database. The main strategy is to keep the smallest number of alive connections which are required for best web service performance. In most cases connections are taken from the pool. In case of views-peeks, pool creates some extra resources preventing service gone unavailable. In time of low traffic (night) unnecessary connections are released. Parameters ---------- maxsize : int Soft limit of the number of created connections. After reaching this limit taking the next connection first waits `maxwait` time for any returned slot. maxwait : float The time in seconds which is to be wait before creating new connection after the pool gets empty. It may be 0 then immediate connections are created til `maxoverflow` is reached. expires : float The time in seconds indicates how long connection should stay alive. It is also used to close unneeded slots. """ if not isinstance(maxsize, integer_types): raise TypeError('Expected integer, got %r' % (maxsize, )) self._maxsize = maxsize self._maxwait = maxwait self._expires = expires self._cleanup = cleanup self._created_at = {} self._latest_use = {} self._pool = LifoQueue() self._size = 0 self._latest_cleanup = 0 if self._expires or self._cleanup else 0xffffffffffffffff self._interval_cleanup = min( self._expires or self._cleanup, self._cleanup or self._expires) if self._expires or self._cleanup else 0 self._cleanup_lock = Semaphore(value=1) def create_connection(self): raise NotImplementedError() def close_connection(self, item): try: self._size -= 1 self._created_at.pop(id(item), None) self._latest_use.pop(id(item), None) item.close() except Exception: pass def cleanup(self): self._cleanup_queue(time.time()) def _cleanup_queue(self, now): if self._latest_cleanup > now: return with self._cleanup_lock: if self._latest_cleanup > now: return self._latest_cleanup = now + self._interval_cleanup cleanup = now - self._cleanup if self._cleanup else None expires = now - self._expires if self._expires else None # Instead of creating new LIFO for self._pool, the ole one is reused, # beacuse some othere might wait for connetion on it. fresh_slots = [] try: # try to fill self._pool ASAP, preventing creation of new connections. # because after this loop LIFO will be in reversed order while not self._pool.empty(): item = self._pool.get_nowait() if cleanup and self._latest_use.get(id(item), 0) < cleanup: self.close_connection(item) elif expires and self._created_at.get(id(item), 0) < expires: self.close_connection(item) else: fresh_slots.append(item) except Empty: pass # Reverse order back (frestest connections should be at the begining) for conn in reversed(fresh_slots): self._pool.put_nowait(conn) def get(self): try: return self._pool.get_nowait() except Empty: pass if self._size >= self._maxsize: try: return self._pool.get(timeout=self._maxwait) except Empty: pass # It is posiible that after waiting self._maxwait time, non connection has been returned # because of cleaning up old ones on put(), so there is not connection but also LIFO is not full. # In that case new connection shouls be created, otherwise exception is risen. if self._size >= self._maxsize: raise OperationalError( "Too many connections created: {} (maxsize is {})".format( self._size, self._maxsize)) try: self._size += 1 conn = self.create_connection() except: self._size -= 1 raise now = time.time() self._created_at[id(conn)] = now self._latest_use[id(conn)] = now return conn def put(self, conn): now = time.time() self._pool.put(conn) self._latest_use[id(conn)] = now self._cleanup_queue(now) def closeall(self): while not self._pool.empty(): conn = self._pool.get_nowait() try: conn.close() except Exception: pass self._size = 0 @contextlib.contextmanager def connection(self, isolation_level=None): conn = self.get() try: if isolation_level is not None: if conn.isolation_level == isolation_level: isolation_level = None else: conn.set_isolation_level(isolation_level) yield conn except: if conn.closed: conn = None self.closeall() else: conn = self._rollback(conn) raise else: if conn.closed: raise OperationalError( "Cannot commit because connection was closed: %r" % (conn, )) conn.commit() finally: if conn is not None and not conn.closed: if isolation_level is not None: conn.set_isolation_level(isolation_level) self.put(conn) @contextlib.contextmanager def cursor(self, *args, **kwargs): isolation_level = kwargs.pop('isolation_level', None) with self.connection(isolation_level) as conn: yield conn.cursor(*args, **kwargs) def _rollback(self, conn): try: conn.rollback() except: gevent.get_hub().handle_error(conn, *sys.exc_info()) return return conn def execute(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.rowcount def fetchone(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchone() def fetchall(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) return cursor.fetchall() def fetchiter(self, *args, **kwargs): with self.cursor(**kwargs) as cursor: cursor.execute(*args) while True: items = cursor.fetchmany() if not items: break for item in items: yield item