class Apartment(object): def __init__(self, name): print 'Apartment {0} Ready to work'.format(name) self.name = name """ managers and workers are all member of the apartment""" self.members = Group() """ create two managers and add them to members""" self.managers = [ ApartmentManager('John', apartment1_urls), ApartmentManager('Micky', apartment2_urls) ] for manager in self.managers: manager.start() self.members.add(manager) self.workers = list() """ assign 5 worker for Manager John""" for i in range(2): self.workers.append(Worker(i, self.managers[0])) """ assign 5 worker for Manager Micky""" for i in range(2): self.workers.append(Worker(i, self.managers[1])) for worker in self.workers: worker.start() self.members.add(worker) def start(self): self.members.join()
def test_greenlet(self): queue = JoinableQueue() requests_done = Event() g = Greenlet(self._producer, queue, FirstService(), 'Terminator') h = Greenlet(self._producer, queue, SecondService(), 'Terminator') i = Greenlet(self._producer, queue, ThirdService(), 'Terminator') requests = Group() for request in g, h, i: requests.add(request) log.debug('before spawn') c = spawn( self._consumer, done=requests_done, queue=queue, ) [r.start() for r in requests] log.debug('after spawn') requests.join() requests_done.set() log.debug('requests are done') c.join() log.debug('consumer is done')
class CrawlBase(object): spider_count = 0 def __init__(self): self.group = Group() self.queue = Queue() def read_seed(self, file='seeds.txt'): with open(file) as f: for line in f: if len(line) > 0 and line != "\n": yield line.strip() else: return def dispatch(self): for url in self.read_seed(): g = gevent.spawn(Spider, self, url) self.group.add(g) self.group.join() def harvest(self): try: while True: content = self.queue.get(timeout=2) print(content) except Empty: pass
def forward(self, sock, remote_sock, data_timeout=5 * 60): u"""在两个套接字之间转发数据(阻塞调用) 在转发失败时自动关闭连接。在双向都出现超时的情况下会关闭连接。 未专门处理 shutdown ,单方向 shutdown 时会关闭双向链接。 """ try: o = { # 最后一次转发数据的时间 = int(time()*1000) 'forward_data_time': int(time.time() * 1000), } sock.settimeout(data_timeout) remote_sock.settimeout(data_timeout) group = Group() group.add( gevent.spawn(self.__forwardData, sock, remote_sock, o, data_timeout)) group.add( gevent.spawn(self.__forwardData, remote_sock, sock, o, data_timeout)) group.join() finally: sock.close() remote_sock.close()
def greenkill(): group_team = Group() g1 = gevent.spawn(green1, "green1", " so green") g3 = gevent.spawn(green3, g1) group_team.add(g1) group_team.add(g3) group_team.join() print(g1 == None)
def stage_monitor(stage): """ Stage monitor is a worker that monitors a stage while it is being executed. The stage monitor coordinates running stage workers, saving results, and determining the end of any particular stage. """ # Pool of stage function worker greenlets. work_pool = Pool(size=stage.n_workers) # Group of greenlets which save results from workers via callbacks. save_group = Group() def save_result(x): """ Save results onto the output queue as a tuple or if there is only a single returned value, save that instead as that singular item. """ if type(stage) == Reduce: # XXX: This would not work for stream inputs afaict # But, reduction should not work anyway if len(work_pool) + len(save_group) + len(stage.in_q) == 1: stage.out_q.put(x) else: if not stage.returns_many: stage.out_q.put(x) else: try: for i in x: stage.out_q.put(i) except: stage.out_q.put([x]) for x in stage.in_q: """ Iterate the input queue until StopIteration is received. Spawn new workers for work items on the input queue. Keep track of storing results via a group of result saving greenlets. Ignore all DROP items in the input queue. Once we receive a StopIteration, wait for all open workers to finish and once they are finished, bubble the StopIteration to the next stage """ gevent.sleep(0) if x is DROP: continue if x is StopIteration: break func_args = [x] cb_worker = work_pool.apply_async(stage.func, func_args, callback=save_result) save_group.add(cb_worker) logger.debug('Worker Pool: << {} >>'.format(work_pool)) work_pool.join() save_group.join() stage.out_q.put(StopIteration) return stage
def groupkill(): group_team = Group() g1 = gevent.spawn(green1, "green1", " so green") g2 = gevent.spawn(green2, "green2") g3 = gevent.spawn(green3, group_team) group_team.add(g1) group_team.add(g2) group_team.add(g3) group_team.join()
def search(self, query, queue): """Schedules a search and returns the related task information.""" group = Group() for provider in self._providers: group.add(spawn(self._search_wrapper, provider, query, queue, self._middleware)) return group
def Groupadd(): group = Group() g1 = gevent.spawn(green1, "green1", " so green") g2 = gevent.spawn(green2, "green2") g3 = gevent.spawn(green3, "green3") group.add(g1) group.add(g2) group.add(g3) group.join()
def add(self, greenlet, blocking=True): acquired = self._semaphore.acquire(blocking=blocking) if not acquired: return False try: Group.add(self, greenlet) except: self._semaphore.release() raise return True
def stop_users(self, user_count, stop_rate=None): """ Stop `user_count` weighted users at a rate of `stop_rate` """ if user_count == 0 or stop_rate == 0: return bucket = self.weight_users(user_count) user_count = len(bucket) to_stop = [] for g in self.user_greenlets: for l in bucket: user = g.args[0] if l == type(user): to_stop.append(user) bucket.remove(l) break if not to_stop: return if stop_rate == None or stop_rate >= user_count: sleep_time = 0 logger.info("Stopping %i users immediately" % (user_count)) else: sleep_time = 1.0 / stop_rate logger.info("Stopping %i users at rate of %g users/s" % (user_count, stop_rate)) if self.environment.stop_timeout: stop_group = Group() while True: user_to_stop = to_stop.pop(random.randint(0, len(to_stop) - 1)) logger.debug('Stopping %s' % user_to_stop._greenlet.name) if self.environment.stop_timeout: if not user_to_stop.stop(self.user_greenlets, force=False): # User.stop() returns False if the greenlet was not stopped, so we'll need # to add it's greenlet to our stopping Group so we can wait for it to finish it's task stop_group.add(user_to_stop._greenlet) else: user_to_stop.stop(self.user_greenlets, force=True) if to_stop: gevent.sleep(sleep_time) else: break if self.environment.stop_timeout and not stop_group.join( timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout) stop_group.kill(block=True) logger.info("%i Users have been stopped" % user_count)
def add(self, greenlet): acquired = self._semaphore.acquire(blocking=False, timeout=self._timeout) # XXX(Mouad): Checking directly for False because DummySemaphore always # return None https://github.com/gevent/gevent/pull/544. if acquired is False: raise RejectExcecutionError('No more resource available to run %r' % greenlet) try: Group.add(self, greenlet) except: self._semaphore.release() raise
def add(self, greenlet): acquired = self._semaphore.acquire(blocking=False, timeout=self._timeout) # XXX(Mouad): Checking directly for False because DummySemaphore always # return None https://github.com/gevent/gevent/pull/544. if acquired is False: raise RejectExcecutionError("No more resource available to run %r" % greenlet) try: Group.add(self, greenlet) except: self._semaphore.release() raise
def Demo1(): g1 = gevent.spawn(talk, 'bar') g2 = gevent.spawn(talk, 'foo') g3 = gevent.spawn(talk, 'fizz') group = Group() group.add(g1) group.add(g2) group.join() group.add(g3) group.join()
def sample_manager(): g1 = gevent.spawn(talk, 'bar') g2 = gevent.spawn(talk, 'foo') g3 = gevent.spawn(talk, 'fizz') group = Group() group.add(g1) group.add(g2) group.join() group.add(g3) group.join()
def main(): g1 = gevent.spawn(talk, 'foo') g2 = gevent.spawn(talk, 'bar') g3 = gevent.spawn(talk, 'baz') group = Group() group.add(g1) group.add(g1) group.join() group.add(g1) group.join()
def create_connection(self, address, timeout=10): # 尝试连接缓存 route_list = self.get_route_order_ping(address[0], address[1], None) if route_list: try: route = route_list[0] cache_timeout = route['tcp_ping'] if cache_timeout < 1000: cache_timeout = cache_timeout * 2 else: cache_timeout = cache_timeout + 1000 cache_timeout = int(math.ceil(cache_timeout / 1000.0)) _upstream = self.upstream_dict.get(route['proxy_name']) start_time = int(time.time() * 1000) sock = _upstream.create_connection(address, cache_timeout) t = int(time.time() * 1000) - start_time logging.debug( u'[upstream][RouteCache]%s 缓存记录 连接 %s:%s 命中。time:%s' % (_upstream.get_display_name(), address[0], address[1], t)) self.update_route_ping(_upstream.get_name(), address[0], address[1], t) return sock except: t = int(time.time() * 1000) - start_time info = traceback.format_exc() logging.debug( u'[upstream][RouteCache]%s 缓存记录 连接 %s:%s 失败。time:%s' % (_upstream.get_display_name(), address[0], address[1], t)) logging.debug('%s\r\n\r\n' % info) # 缓存失败,连接全部 evt = Event() group = Group() aync_task = MultipathAsyncTask(evt, None, group) for _upstream in self.upstream_dict.values(): group.add( gevent.spawn(self._create_connection, _upstream, aync_task, address, timeout)) # 所有连接失败时发出通知 gevent.spawn(self._create_connection_all_end, aync_task) evt.wait() if aync_task.sock: return aync_task.sock else: raise UpstreamConnectError()
def create_connection(self, address, timeout=10): ip_list = dnslib.dnsQuery(address[0]) # 尝试连接缓存 route_list = self.get_route_order_ping(address[0],address[1],None) if route_list: try: route = route_list[0] hit_ip = route['hit_ip'] if hit_ip in ip_list: cache_timeout = route['tcp_ping'] if cache_timeout<1000: cache_timeout = cache_timeout * 2 else: cache_timeout = cache_timeout+1000 cache_timeout = int(math.ceil(cache_timeout/1000.0)) start_time = int(time.time() * 1000) sock = self._direct_create_connection(address,hit_ip, cache_timeout) t = int(time.time() * 1000) - start_time logging.debug(u'[upstream][RouteCache]%s 缓存记录 连接 %s(%s):%s 命中。time:%s'%(self.get_display_name(),address[0],hit_ip,address[1],t)) self.update_route_ping(address[0],address[1],t,hit_ip) return sock else: logging.debug(u'[upstream][RouteCache]%s 缓存记录 连接 %s(%s):%s IP 不匹配,放弃缓存。'%(self.get_display_name(),address[0],hit_ip,address[1])) except: t = int(time.time() * 1000) - start_time info = traceback.format_exc() logging.debug( u'[upstream][RouteCache]%s 缓存记录 连接 %s(%s):%s 失败。time:%s' % (self.get_display_name(), address[0],hit_ip, address[1],t)) logging.debug('%s\r\n\r\n' % info) # 缓存失败,连接全部 evt = Event() group = Group() aync_task = DirectAsyncTask(evt, None, group) for ip in ip_list: group.add(gevent.spawn(self._create_connection, aync_task, address,ip, timeout)) # 所有连接失败时发出通知 gevent.spawn(self._create_connection_all_end, aync_task) evt.wait() if aync_task.sock: return aync_task.sock else: raise UpstreamConnectError()
class NodeManager(core.Agent): """ The class responsible to create nodes. """ name = 'manager' """ the registered name in the :class:`addressing.AddressBook` """ def __init__(self, graph): """ Creates a new node_manager :param graph: the graph to pass to the agents :type graph: storage.GraphWrapper """ self.graph = graph self.failures = [] self.group = Group() def setup_node(self, node, greenlet): greenlet.link_value(node.deactivate_node) node.graph = self.graph self.group.add(greenlet) def unset_node(self, node, greenlet): del node.graph self.group.discard(greenlet) if isinstance(greenlet.value, core.GreenletExit): self.graph.remove_node(node.id) print 'Removing', node.id def create_node(self, cls, parameters): """ Creates a new node. :param cls: the factory creating the new node. :type cls: callable :param parameters: the parameters that are forwarded to the node for creation :type parameters: dict :return: the actual identifier :rtype: int | str """ node = cls(**parameters) identifier = self.graph.add_node() node.start(self._address_book, self._node_db, identifier) return identifier def simulation_ended(self): self.group.join()
def asynchronous(): """ Creates a group of threads to do the parallel download and pi calculation """ download_group = Group() calculate_pi_group=Group() for task_id in range(1,11): download_group.add(gevent.spawn(download, task_id)) #[a for a in download_group.imap_unordered(download, task_id)] calculate_pi_group.add(gevent.spawn(displayer)) calculate_pi_group.join() download_group.join()
def stop_user_instances(self, users): if self.environment.stop_timeout: stopping = Group() for user in users: if not user.stop(self.user_greenlets, force=False): # User.stop() returns False if the greenlet was not stopped, so we'll need # to add it's greenlet to our stopping Group so we can wait for it to finish it's task stopping.add(user._greenlet) if not stopping.join(timeout=self.environment.stop_timeout): logger.info("Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout) stopping.kill(block=True) else: for user in users: user.stop(self.user_greenlets, force=True)
def test_group(self): def talk(msg): for i in xrange(3): print(msg) g1 = gevent.spawn(talk, 'bar') g2 = gevent.spawn(talk, 'foo') g3 = gevent.spawn(talk, 'fizz') group = Group() group.add(g1) group.add(g2) group.join() group.add(g3) group.join()
def groups_and_pools(): def talk(msg): for i in range(3): print(msg) g1 = gevent.spawn(talk, "bar") g2 = gevent.spawn(talk, "foo") g3 = gevent.spawn(talk, "fizz") group = Group() group.add(g1) group.add(g2) group.join() group.add(g3) group.join()
def multi_coroutine(): ''' 非cpu密集型可用 多协程 gevent :return: ''' from gevent import monkey import gevent from gevent.pool import Group monkey.patch_socket() #以上都是准备工作 g = Group() for i in range(10): g.add(gevent.spawn(test, (str(i), ))) g.join() pass
def search(self, query, queue): """Schedules a search and returns the related task information.""" group = Group() for provider in self._providers: group.add( spawn( self._search_wrapper, provider, query, queue, self._middleware, )) return group
def kill_locust_instances(self, users): if self.environment.stop_timeout: dying = Group() for user in users: if not user.stop(self.locusts, force=False): # Locust.stop() returns False if the greenlet was not killed, so we'll need # to add it's greenlet to our dying Group so we can wait for it to finish it's task dying.add(user._greenlet) if not dying.join(timeout=self.environment.stop_timeout): logger.info( "Not all locusts finished their tasks & terminated in %s seconds. Killing them..." % self.environment.stop_timeout) dying.kill(block=True) else: for user in users: user.stop(self.locusts, force=True)
def create_connection(self, address, timeout=10): # 尝试连接缓存 route_list = self.get_route_order_ping(address[0],address[1],None) if route_list: try: route = route_list[0] cache_timeout = route['tcp_ping'] if cache_timeout<1000: cache_timeout = cache_timeout * 2 else: cache_timeout = cache_timeout+1000 cache_timeout = int(math.ceil(cache_timeout/1000.0)) _upstream = self.upstream_dict.get(route['proxy_name']) start_time = int(time.time() * 1000) sock = _upstream.create_connection(address, cache_timeout) t = int(time.time() * 1000) - start_time logging.debug(u'[upstream][RouteCache]%s 缓存记录 连接 %s:%s 命中。time:%s'%(_upstream.get_display_name(),address[0],address[1],t)) self.update_route_ping(_upstream.get_name(),address[0],address[1],t) return sock except: t = int(time.time() * 1000) - start_time info = traceback.format_exc() logging.debug( u'[upstream][RouteCache]%s 缓存记录 连接 %s:%s 失败。time:%s' % (_upstream.get_display_name(), address[0], address[1],t)) logging.debug('%s\r\n\r\n' % info) # 缓存失败,连接全部 evt = Event() group = Group() aync_task = MultipathAsyncTask(evt, None, group) for _upstream in self.upstream_dict.values(): group.add(gevent.spawn(self._create_connection, _upstream, aync_task, address, timeout)) # 所有连接失败时发出通知 gevent.spawn(self._create_connection_all_end, aync_task) evt.wait() if aync_task.sock: return aync_task.sock else: raise UpstreamConnectError()
def get(self, ip, timeout=None): """Queries all DNSBLs in the group for matches. :param ip: The IP address to check for. :param timeout: Timeout in seconds before canceling remaining queries. :returns: A :class:`set()` containing the DNSBL domain names that matched a record for the IP address. """ matches = set() group = Group() with gevent.Timeout(timeout, None): for dnsbl in self.dnsbls: thread = self.pool.spawn(self._run_dnsbl_get, matches, dnsbl, ip) group.add(thread) group.join() group.kill() return matches
def runner(test_run_id, datasets, local_storage, credentials): failures = 0 group = Group() for dataset_index, dataset in enumerate(datasets): try: g = run_mb_test(dataset_index, dataset) if g: group.add(g) except Exception: failures += 1 logging.error('MBTest FAILED for [{}] {}'.format(dataset_index, dataset), exc_info = True) if len(group): group.join() mark_as_complete(test_run_id) return failures
class HandlerManager(object): def __init__(self, task_queue, concurrency, success_queue, handler_args): self.task_queue = task_queue self.success_queue = success_queue self.unit_num = (self.task_queue.qsize() + concurrency - 1) / concurrency self.handler_args = handler_args self.gr_group = Group() def run(self): for _ in xrange(self.unit_num): g = WorkHandler(self.task_queue, self.success_queue, **self.handler_args) self.gr_group.add(g) try: self.gr_group.join() except Exception as e: print "gr group err: %s" % str(e) return
class MultiZerologEmitter(gevent.Greenlet): """Emitter using multiple loggers which are configured by the zerolog server. """ def __init__(self, interval): super(MultiZerologEmitter, self).__init__() self.interval = interval self.greenlets = Group() #self.loggers = 'foo foo.lib foo.web foo.web.request foo.web.db'.split() self.loggers = 'foo foo.lib foo.lib.bar'.split() self.levels = 'critical error warning info debug'.split() self._keep_going = True def _run(self): self.greenlets.add(gevent.spawn(self.__random_logger)) #for logger_name in self.loggers: # self.greenlets.add(gevent.spawn(self.__logger, logger_name)) self.greenlets.join() def __logger(self, logger_name): #loggers = 'app app.sub app.sub.lib'.split() logger = zerolog.getLogger(logger_name) index = 0 while self._keep_going: level = random.choice(self.levels) message = "{0} {1} {2}".format(index, logger_name, level) getattr(logger, level)(message) index += 1 gevent.sleep(self.interval) def __random_logger(self): index = 0 while self._keep_going: logger = zerolog.getLogger(random.choice(self.loggers)) level = random.choice(self.levels) message = "{0} {1} {2}".format(index, logger.name, level) getattr(logger, level)(message) index += 1 gevent.sleep(self.interval) def kill(self, exception=gevent.GreenletExit, **kwargs): self._keep_going = False self.greenlets.kill() super(MultiZerologEmitter, self).kill(exception=exception, **kwargs)
def kill_locust_greenlets(self, greenlets): """ Kill running locust greenlets. If options.stop_timeout is set, we try to stop the Locust users gracefully """ if self.options.stop_timeout: dying = Group() for g in greenlets: locust = g.args[0] if locust._state == LOCUST_STATE_WAITING: self.locusts.killone(g) else: locust._state = LOCUST_STATE_STOPPING dying.add(g) if not dying.join(timeout=self.options.stop_timeout): logger.info("Not all locusts finished their tasks & terminated in %s seconds. Killing them..." % self.options.stop_timeout) dying.kill(block=True) else: for g in greenlets: self.locusts.killone(g)
def gevent_click_page(): global TRY_COUNT TRY_COUNT = int(sys.argv[1]) _log.info('自动点击页面开始...') # 先获取文章总篇数 driver = webdriver.PhantomJS() driver.get('https://www.xncoding.com/archives/') # driver.maximize_window() posts_count = len(driver.find_elements_by_xpath( '//article/header/h1[@class="post-title"]/a[@class="post-title-link"]')) driver.close() # gevent的pool容量 psize = posts_count / THREAD_COUNT _log.info('总的文章数量为:{}, 每组需要爬取的文章数:{}'.format(posts_count, psize)) group = Group() for i in range(0, THREAD_COUNT + 1): group.add(gevent.spawn(_click_page, posts_count, psize, i)) group.join() _log.info('成功结束...')
def get_reasons(self, matches, ip, timeout=None): """Gets the reasons for each matching DNSBL for the IP address. :param matches: The DNSBL matches, as returned by :meth:`.get()`. :param ip: The IP address to get reasons for. :param timeout: Timeout in seconds before canceling remaining queries. :returns: A :class:`dict()` keyed by the DNSBL domain names from the ``matches`` argument with the values being the reasons each DNSBL matched or ``None``. """ reasons = dict.fromkeys(matches) group = Group() with gevent.Timeout(timeout, None): for dnsbl in self.dnsbls: if dnsbl.address in matches: thread = self.pool.spawn(self._run_dnsbl_get_reason, reasons, dnsbl, ip) group.add(thread) group.join() group.kill() return reasons
class Job(Greenlet): def __init__(self): Greenlet.__init__(self) self.subjobs = Group() self.pool = None def _launch(self, subjob, doneNotifier=None): subjob.pool = self.pool self.subjobs.add(subjob) if doneNotifier is not None: subjob.link_value(doneNotifier) else: subjob.link_value(self._endSubjob) self.pool.start(subjob) return subjob def _endSubjob(self, subjob): None def joinSubjobs(self): while len(self.subjobs) <> 0: gevent.sleep(1)
class Job(Greenlet): def __init__(self): Greenlet.__init__(self) self.subjobs = Group() self.pool = None def _launch(self,subjob,doneNotifier=None): subjob.pool = self.pool self.subjobs.add(subjob) if doneNotifier is not None: subjob.link_value(doneNotifier) else : subjob.link_value(self._endSubjob) self.pool.start(subjob) return subjob def _endSubjob(self,subjob): None def joinSubjobs(self): while len(self.subjobs)<>0: gevent.sleep(1)
def gevent_click_page(): global TRY_COUNT TRY_COUNT = int(sys.argv[1]) _log.info('自动点击页面开始...') # 先获取文章总篇数 driver = webdriver.PhantomJS() driver.get('https://www.xncoding.com/archives/') # driver.maximize_window() posts_count = len( driver.find_elements_by_xpath( '//article/header/h1[@class="post-title"]/a[@class="post-title-link"]' )) driver.close() # gevent的pool容量 psize = posts_count / THREAD_COUNT _log.info('总的文章数量为:{}, 每组需要爬取的文章数:{}'.format(posts_count, psize)) group = Group() for i in range(0, THREAD_COUNT + 1): group.add(gevent.spawn(_click_page, posts_count, psize, i)) group.join() _log.info('成功结束...')
def forward(self,sock,remote_sock,data_timeout=5*60): u"""在两个套接字之间转发数据(阻塞调用) 在转发失败时自动关闭连接。在双向都出现超时的情况下会关闭连接。 未专门处理 shutdown ,单方向 shutdown 时会关闭双向链接。 """ try: o = { # 最后一次转发数据的时间 = int(time()*1000) 'forward_data_time':int(time.time()*1000), } sock.settimeout(data_timeout) remote_sock.settimeout(data_timeout) group = Group() group.add(gevent.spawn(self.__forwardData,sock,remote_sock,o,data_timeout)) group.add(gevent.spawn(self.__forwardData,remote_sock,sock,o,data_timeout)) group.join() finally: sock.close() remote_sock.close()
def stop_users(self, user_count, stop_rate=None): """ Stop `user_count` weighted users at a rate of `stop_rate` """ if user_count == 0 or stop_rate == 0: return bucket = self.weight_users(user_count) user_count = len(bucket) to_stop = [] for g in self.user_greenlets: for l in bucket: user = g.args[0] if isinstance(user, l): to_stop.append(user) bucket.remove(l) break if not to_stop: return if stop_rate is None or stop_rate >= user_count: sleep_time = 0 logger.info("Stopping %i users" % (user_count)) else: sleep_time = 1.0 / stop_rate logger.info("Stopping %i users at rate of %g users/s" % (user_count, stop_rate)) async_calls_to_stop = Group() stop_group = Group() while True: user_to_stop: User = to_stop.pop(random.randint(0, len(to_stop) - 1)) logger.debug("Stopping %s" % user_to_stop._greenlet.name) if user_to_stop._greenlet is greenlet.getcurrent(): # User called runner.quit(), so dont block waiting for killing to finish" user_to_stop._group.killone(user_to_stop._greenlet, block=False) elif self.environment.stop_timeout: async_calls_to_stop.add(gevent.spawn_later(0, User.stop, user_to_stop, force=False)) stop_group.add(user_to_stop._greenlet) else: async_calls_to_stop.add(gevent.spawn_later(0, User.stop, user_to_stop, force=True)) if to_stop: gevent.sleep(sleep_time) else: break async_calls_to_stop.join() if not stop_group.join(timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout ) stop_group.kill(block=True) logger.info("%i Users have been stopped" % user_count)
def stop_users(self, user_classes_stop_count: Dict[str, int]): async_calls_to_stop = Group() stop_group = Group() for user_class, stop_count in user_classes_stop_count.items(): if self.user_classes_count[user_class] == 0: continue to_stop = [] for user_greenlet in self.user_greenlets: if len(to_stop) == stop_count: break try: user = user_greenlet.args[0] except IndexError: logger.error( "While stopping users, we encountered a user that didnt have proper args %s", user_greenlet ) continue if isinstance(user, self.user_classes_by_name[user_class]): to_stop.append(user) if not to_stop: continue while True: user_to_stop: User = to_stop.pop() logger.debug("Stopping %s" % user_to_stop.greenlet.name) if user_to_stop.greenlet is greenlet.getcurrent(): # User called runner.quit(), so don't block waiting for killing to finish user_to_stop.group.killone(user_to_stop.greenlet, block=False) elif self.environment.stop_timeout: async_calls_to_stop.add(gevent.spawn_later(0, user_to_stop.stop, force=False)) stop_group.add(user_to_stop.greenlet) else: async_calls_to_stop.add(gevent.spawn_later(0, user_to_stop.stop, force=True)) if not to_stop: break async_calls_to_stop.join() if not stop_group.join(timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout ) stop_group.kill(block=True) logger.debug( "%g users have been stopped, %g still running", sum(user_classes_stop_count.values()), self.user_count )
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- poll_frequency: Integer Seconds to wait between polling for the greenlets spawned heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.poll_frequency = poll_frequency self.syncmanager_lock = db_write_lock(account.namespace.id) self.refresh_flags_max = refresh_flags_max provider_supports_condstore = account.provider_info.get('condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() self.sync_status_queue = Queue() self.folder_monitors.start(Greenlet(self.sync_status_consumer)) BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() save_folder_names(log, self.account_id, crispin_client.folder_names(), db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) return sync_folder_names_ids def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ sync_folder_names_ids = self.prepare_sync() for folder_name, folder_id in sync_folder_names_ids: log.info('initializing folder sync') thread = self.sync_engine_class(self.account_id, folder_name, folder_id, self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes, self.sync_status_queue) thread.start() self.folder_monitors.add(thread) while not thread_polling(thread) and \ not thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if thread_finished(thread) or thread.ready(): log.info('folder sync finished/killed', folder_name=thread.folder_name) # NOTE: Greenlet is automatically removed from the group. self.folder_monitors.join() def sync_status_consumer(self): """Consume per-monitor sync status queue and update the ImapFolderSyncStatus table accordingly. Nothing fancy is happening as of now but here we may implement some batching to reduce the stress of the database.""" while True: folder_id, state = self.sync_status_queue.get() with mailsync_session_scope() as db_session: sync_status_entry = db_session.query(ImapFolderSyncStatus)\ .filter_by(account_id=self.account_id, folder_id=folder_id)\ .options(load_only(ImapFolderSyncStatus.state)).one() sync_status_entry.state = state db_session.add(sync_status_entry) db_session.commit()
class Consumer(object): """High level NSQ consumer. A Consumer will connect to the nsqd tcp addresses or poll the provided nsqlookupd http addresses for the configured topic and send signals to message handlers connected to the :attr:`on_message` signal or provided by ``message_handler``. Messages will automatically be finished when the message handle returns unless :meth:`message.enable_async() <gnsq.Message.enable_async>` is called. If an exception occurs or :class:`~gnsq.errors.NSQRequeueMessage` is raised, the message will be requeued. The Consumer will handle backing off of failed messages up to a configurable ``max_interval`` as well as automatically reconnecting to dropped connections. Example usage:: from gnsq import Consumer consumer = gnsq.Consumer('topic', 'channel', 'localhost:4150') @consumer.on_message.connect def handler(consumer, message): print 'got message:', message.body consumer.start() :param topic: specifies the desired NSQ topic :param channel: specifies the desired NSQ channel :param nsqd_tcp_addresses: a sequence of string addresses of the nsqd instances this consumer should connect to :param lookupd_http_addresses: a sequence of string addresses of the nsqlookupd instances this consumer should query for producers of the specified topic :param name: a string that is used for logging messages (defaults to ``'gnsq.consumer.{topic}.{channel}'``) :param message_handler: the callable that will be executed for each message received :param max_tries: the maximum number of attempts the consumer will make to process a message after which messages will be automatically discarded :param max_in_flight: the maximum number of messages this consumer will pipeline for processing. this value will be divided evenly amongst the configured/discovered nsqd producers :param requeue_delay: the default delay to use when requeueing a failed message :param lookupd_poll_interval: the amount of time in seconds between querying all of the supplied nsqlookupd instances. A random amount of time based on this value will be initially introduced in order to add jitter when multiple consumers are running :param lookupd_poll_jitter: the maximum fractional amount of jitter to add to the lookupd poll loop. This helps evenly distribute requests even if multiple consumers restart at the same time. :param low_ready_idle_timeout: the amount of time in seconds to wait for a message from a producer when in a state where RDY counts are re-distributed (ie. `max_in_flight` < `num_producers`) :param max_backoff_duration: the maximum time we will allow a backoff state to last in seconds. If zero, backoff wil not occur :param backoff_on_requeue: if ``False``, backoff will only occur on exception :param **kwargs: passed to :class:`~gnsq.NsqdTCPClient` initialization """ def __init__(self, topic, channel, nsqd_tcp_addresses=[], lookupd_http_addresses=[], name=None, message_handler=None, max_tries=5, max_in_flight=1, requeue_delay=0, lookupd_poll_interval=60, lookupd_poll_jitter=0.3, low_ready_idle_timeout=10, max_backoff_duration=128, backoff_on_requeue=True, **kwargs): if not nsqd_tcp_addresses and not lookupd_http_addresses: raise ValueError('must specify at least one nsqd or lookupd') self.nsqd_tcp_addresses = parse_nsqds(nsqd_tcp_addresses) self.lookupds = parse_lookupds(lookupd_http_addresses) self.iterlookupds = cycle(self.lookupds) self.topic = topic self.channel = channel self.max_tries = max_tries self.max_in_flight = max_in_flight self.requeue_delay = requeue_delay self.lookupd_poll_interval = lookupd_poll_interval self.lookupd_poll_jitter = lookupd_poll_jitter self.low_ready_idle_timeout = low_ready_idle_timeout self.backoff_on_requeue = backoff_on_requeue self.max_backoff_duration = max_backoff_duration self.conn_kwargs = kwargs if name: self.name = name else: self.name = '%s.%s.%s' % (__name__, self.topic, self.channel) if message_handler is not None: self.on_message.connect(message_handler, weak=False) self.logger = logging.getLogger(self.name) self._state = INIT self._redistributed_ready_event = Event() self._connection_backoffs = defaultdict(self._create_backoff) self._message_backoffs = defaultdict(self._create_backoff) self._connections = {} self._workers = Group() self._killables = Group() @cached_property def on_message(self): """Emitted when a message is received. The signal sender is the consumer and the ``message`` is sent as an argument. The ``message_handler`` param is connected to this signal. """ return blinker.Signal(doc='Emitted when a message is received.') @cached_property def on_response(self): """Emitted when a response is received. The signal sender is the consumer and the ``response`` is sent as an argument. """ return blinker.Signal(doc='Emitted when a response is received.') @cached_property def on_error(self): """Emitted when an error is received. The signal sender is the consumer and the ``error`` is sent as an argument. """ return blinker.Signal(doc='Emitted when a error is received.') @cached_property def on_finish(self): """Emitted after a message is successfully finished. The signal sender is the consumer and the ``message_id`` is sent as an argument. """ return blinker.Signal(doc='Emitted after the a message is finished.') @cached_property def on_requeue(self): """Emitted after a message is requeued. The signal sender is the consumer and the ``message_id`` and ``timeout`` are sent as arguments. """ return blinker.Signal(doc='Emitted after the a message is requeued.') @cached_property def on_giving_up(self): """Emitted after a giving up on a message. Emitted when a message has exceeded the maximum number of attempts (``max_tries``) and will no longer be requeued. This is useful to perform tasks such as writing to disk, collecting statistics etc. The signal sender is the consumer and the ``message`` is sent as an argument. """ return blinker.Signal(doc='Sent after a giving up on a message.') @cached_property def on_auth(self): """Emitted after a connection is successfully authenticated. The signal sender is the consumer and the ``conn`` and parsed ``response`` are sent as arguments. """ return blinker.Signal(doc='Emitted when a response is received.') @cached_property def on_exception(self): """Emitted when an exception is caught while handling a message. The signal sender is the consumer and the ``message`` and ``error`` are sent as arguments. """ return blinker.Signal(doc='Emitted when an exception is caught.') @cached_property def on_close(self): """Emitted after :meth:`close`. The signal sender is the consumer. """ return blinker.Signal(doc='Emitted after the consumer is closed.') def start(self, block=True): """Start discovering and listing to connections.""" if self._state == INIT: if not any(self.on_message.receivers_for(blinker.ANY)): raise RuntimeError('no receivers connected to on_message') self.logger.debug('starting %s...', self.name) self._state = RUNNING self.query_nsqd() if self.lookupds: self.query_lookupd() self._killables.add(self._workers.spawn(self._poll_lookupd)) self._killables.add(self._workers.spawn(self._poll_ready)) else: self.logger.warn('%s already started', self.name) if block: self.join() def close(self): """Immediately close all connections and stop workers.""" if not self.is_running: return self._state = CLOSED self.logger.debug('killing %d worker(s)', len(self._killables)) self._killables.kill(block=False) self.logger.debug('closing %d connection(s)', len(self._connections)) for conn in self._connections: conn.close_stream() self.on_close.send(self) def join(self, timeout=None, raise_error=False): """Block until all connections have closed and workers stopped.""" self._workers.join(timeout, raise_error) @property def is_running(self): """Check if consumer is currently running.""" return self._state == RUNNING @property def is_starved(self): """Evaluate whether any of the connections are starved. This property should be used by message handlers to reliably identify when to process a batch of messages. """ return any(conn.is_starved for conn in self._connections) @property def total_ready_count(self): return sum(c.ready_count for c in self._connections) @property def total_in_flight(self): return sum(c.in_flight for c in self._connections) def query_nsqd(self): self.logger.debug('querying nsqd...') for address in self.nsqd_tcp_addresses: address, port = address.split(':') self.connect_to_nsqd(address, int(port)) def query_lookupd(self): self.logger.debug('querying lookupd...') lookupd = next(self.iterlookupds) try: producers = lookupd.lookup(self.topic)['producers'] self.logger.debug('found %d producers', len(producers)) except Exception as error: self.logger.warn( 'Failed to lookup %s on %s (%s)', self.topic, lookupd.address, error) return for producer in producers: self.connect_to_nsqd( producer['broadcast_address'], producer['tcp_port']) def _poll_lookupd(self): try: delay = self.lookupd_poll_interval * self.lookupd_poll_jitter gevent.sleep(random.random() * delay) while True: gevent.sleep(self.lookupd_poll_interval) self.query_lookupd() except gevent.GreenletExit: pass def _poll_ready(self): try: while True: if self._redistributed_ready_event.wait(5): self._redistributed_ready_event.clear() self._redistribute_ready_state() except gevent.GreenletExit: pass def _redistribute_ready_state(self): if not self.is_running: return if len(self._connections) > self.max_in_flight: ready_state = self._get_unsaturated_ready_state() else: ready_state = self._get_saturated_ready_state() for conn, count in ready_state.items(): if conn.ready_count == count: self.logger.debug('[%s] RDY count already %d', conn, count) continue self.logger.debug('[%s] sending RDY %d', conn, count) try: conn.ready(count) except NSQSocketError as error: self.logger.warn('[%s] RDY %d failed (%r)', conn, count, error) def _get_unsaturated_ready_state(self): ready_state = {} active = [] for conn, state in self._connections.items(): if state == BACKOFF: ready_state[conn] = 0 else: active.append(conn) random.shuffle(active) for conn in active[self.max_in_flight:]: ready_state[conn] = 0 for conn in active[:self.max_in_flight]: ready_state[conn] = 1 return ready_state def _get_saturated_ready_state(self): ready_state = {} active = [] now = time.time() for conn, state in self._connections.items(): if state == BACKOFF: ready_state[conn] = 0 elif state in (INIT, THROTTLED): ready_state[conn] = 1 elif (now - conn.last_message) > self.low_ready_idle_timeout: self.logger.info( '[%s] idle connection, giving up RDY count', conn) ready_state[conn] = 1 else: active.append(conn) if not active: return ready_state ready_available = self.max_in_flight - sum(ready_state.values()) connection_max_in_flight = ready_available // len(active) for conn in active: ready_state[conn] = connection_max_in_flight for conn in random.sample(active, ready_available % len(active)): ready_state[conn] += 1 return ready_state def redistribute_ready_state(self): self._redistributed_ready_event.set() def connect_to_nsqd(self, address, port): if not self.is_running: return conn = NsqdTCPClient(address, port, **self.conn_kwargs) if conn in self._connections: self.logger.debug('[%s] already connected', conn) return self._connections[conn] = INIT self.logger.debug('[%s] connecting...', conn) conn.on_message.connect(self.handle_message) conn.on_response.connect(self.handle_response) conn.on_error.connect(self.handle_error) conn.on_finish.connect(self.handle_finish) conn.on_requeue.connect(self.handle_requeue) conn.on_auth.connect(self.handle_auth) try: conn.connect() conn.identify() if conn.max_ready_count < self.max_in_flight: msg = ( '[%s] max RDY count %d < consumer max in flight %d, ' 'truncation possible') self.logger.warning( msg, conn, conn.max_ready_count, self.max_in_flight) conn.subscribe(self.topic, self.channel) except NSQException as error: self.logger.warn('[%s] connection failed (%r)', conn, error) self.handle_connection_failure(conn) return # Check if we've closed since we started if not self.is_running: self.handle_connection_failure(conn) return self.logger.info('[%s] connection successful', conn) self.handle_connection_success(conn) def _listen(self, conn): try: conn.listen() except NSQException as error: self.logger.warning('[%s] connection lost (%r)', conn, error) self.handle_connection_failure(conn) def handle_connection_success(self, conn): self._workers.spawn(self._listen, conn) self.redistribute_ready_state() if str(conn) not in self.nsqd_tcp_addresses: return self._connection_backoffs[conn].success() def handle_connection_failure(self, conn): del self._connections[conn] conn.close_stream() if not self.is_running: return self.redistribute_ready_state() if str(conn) not in self.nsqd_tcp_addresses: return seconds = self._connection_backoffs[conn].failure().get_interval() self.logger.debug('[%s] retrying in %ss', conn, seconds) gevent.spawn_later( seconds, self.connect_to_nsqd, conn.address, conn.port) def handle_auth(self, conn, response): metadata = [] if response.get('identity'): metadata.append("Identity: %r" % response['identity']) if response.get('permission_count'): metadata.append("Permissions: %d" % response['permission_count']) if response.get('identity_url'): metadata.append(response['identity_url']) self.logger.info('[%s] AUTH accepted %s', conn, ' '.join(metadata)) self.on_auth.send(self, conn=conn, response=response) def handle_response(self, conn, response): self.logger.debug('[%s] response: %s', conn, response) self.on_response.send(self, response=response) def handle_error(self, conn, error): self.logger.debug('[%s] error: %s', conn, error) self.on_error.send(self, error=error) def _handle_message(self, message): if self.max_tries and message.attempts > self.max_tries: self.logger.warning( "giving up on message '%s' after max tries %d", message.id, self.max_tries) self.on_giving_up.send(self, message=message) return message.finish() self.on_message.send(self, message=message) if not self.is_running: return if message.is_async(): return if message.has_responded(): return message.finish() def handle_message(self, conn, message): self.logger.debug('[%s] got message: %s', conn, message.id) try: return self._handle_message(message) except NSQRequeueMessage as error: if error.backoff is None: backoff = self.backoff_on_requeue else: backoff = error.backoff except Exception as error: backoff = True self.logger.exception( '[%s] caught exception while handling message', conn) self.on_exception.send(self, message=message, error=error) if not self.is_running: return if message.has_responded(): return try: message.requeue(self.requeue_delay, backoff) except NSQException as error: self.logger.warning( '[%s] error requeueing message (%r)', conn, error) def _create_backoff(self): return BackoffTimer(max_interval=self.max_backoff_duration) def _start_backoff(self, conn): self._connections[conn] = BACKOFF interval = self._message_backoffs[conn].get_interval() gevent.spawn_later(interval, self._start_throttled, conn) self.logger.info('[%s] backing off for %s seconds', conn, interval) self.redistribute_ready_state() def _start_throttled(self, conn): if self._connections.get(conn) != BACKOFF: return self._connections[conn] = THROTTLED self.logger.info('[%s] testing backoff state with RDY 1', conn) self.redistribute_ready_state() def _complete_backoff(self, conn): if self._message_backoffs[conn].is_reset(): self._connections[conn] = RUNNING self.logger.info('backoff complete, resuming normal operation') self.redistribute_ready_state() else: self._start_backoff(conn) def _finish_message(self, conn, backoff): if not self.max_backoff_duration: return try: state = self._connections[conn] except KeyError: return if state == BACKOFF: return if backoff: self._message_backoffs[conn].failure() self._start_backoff(conn) elif state == THROTTLED: self._message_backoffs[conn].success() self._complete_backoff(conn) elif state == INIT: self._connections[conn] = RUNNING self.redistribute_ready_state() def handle_finish(self, conn, message_id): self.logger.debug('[%s] finished message: %s', conn, message_id) self._finish_message(conn, backoff=False) self.on_finish.send(self, message_id=message_id) def handle_requeue(self, conn, message_id, timeout, backoff): self.logger.debug( '[%s] requeued message: %s (%s)', conn, message_id, timeout) self._finish_message(conn, backoff=backoff) self.on_requeue.send(self, message_id=message_id, timeout=timeout)
class GeventReactor(posixbase.PosixReactorBase): """Implement gevent-powered reactor based on PosixReactorBase.""" implements(IReactorGreenlets) def __init__(self,*args): self.greenlet = None self.greenletpool = Group() self._reads = {} self._writes = {} self._callqueue = [] self._wake = 0 self._wait = 0 self.resolver = GeventResolver(self) self.addToGreenletPool = self.greenletpool.add posixbase.PosixReactorBase.__init__(self,*args) self._initThreads() self._initThreadPool() self._initGreenletPool() def mainLoop(self): """This main loop yields to gevent until the end, handling function calls along the way.""" self.greenlet = gevent.getcurrent() callqueue = self._callqueue seconds = self.seconds try: while 1: self._wait = 0 now = seconds() if len(callqueue) > 0: self._wake = delay = callqueue[0].time delay -= now else: self._wake = now+300 delay = 300 try: self._wait = 1 gevent.sleep(max(0,delay)) self._wait = 0 except Reschedule: continue now = seconds() while 1: try: c = callqueue[0] except IndexError: break if c.time <= now: del callqueue[0] try: c() except GreenletExit: raise except: log.msg('Unexpected error in main loop.') log.err() else: break except (GreenletExit,KeyboardInterrupt): pass log.msg('Main loop terminated.') self.fireSystemEvent('shutdown') def addReader(self,selectable): """Add a FileDescriptor for notification of data available to read.""" try: self._reads[selectable].resume() except KeyError: self._reads[selectable] = g = Stream.spawn(self,selectable,'doRead') self.addToGreenletPool(g) def addWriter(self,selectable): """Add a FileDescriptor for notification of data available to write.""" try: self._writes[selectable].resume() except KeyError: self._writes[selectable] = g = Stream.spawn(self,selectable,'doWrite') self.addToGreenletPool(g) def removeReader(self,selectable): """Remove a FileDescriptor for notification of data available to read.""" try: if selectable.disconnected: self._reads[selectable].kill(block=False) del self._reads[selectable] else: self._reads[selectable].pause() except KeyError: pass def removeWriter(self,selectable): """Remove a FileDescriptor for notification of data available to write.""" try: if selectable.disconnected: self._writes[selectable].kill(block=False) del self._writes[selectable] else: self._writes[selectable].pause() except KeyError: pass def discardReader(self,selectable): """Remove a FileDescriptor without checking.""" try: del self._reads[selectable] except KeyError: pass def discardWriter(self,selectable): """Remove a FileDescriptor without checking.""" try: del self._writes[selectable] except KeyError: pass def getReaders(self): return self._reads.keys() def getWriters(self): return self._writes.keys() def removeAll(self): return self._removeAll(self._reads,self._writes) # IReactorTime seconds = staticmethod(runtimeSeconds) def callLater(self,*args,**kw): if isinstance(args[0],DelayedCall): c = args[0] try: self._callqueue.remove(c) except ValueError: return None else: c = DelayedCall(self,self.seconds()+args[0],args[1],args[2:],kw,seconds=self.seconds) insort(self._callqueue,c) self.reschedule() return c def getDelayedCalls(self): return list(self._callqueue) def cancelCallLater(self,callID): # deprecated self._callqueue.remove(callID) self.reschedule() # IReactorGreenlets def _initGreenletPool(self): self.greenletpoolShutdownID = self.addSystemEventTrigger('during','shutdown',self._stopGreenletPool) def _stopGreenletPool(self): self.greenletpool.kill() def getGreenletPool(self): return self.greenletpool def callInGreenlet(self,*args,**kwargs): self.addToGreenletPool(Greenlet.spawn_later(0,*args,**kwargs)) def callFromGreenlet(self,*args,**kw): c = DelayedCall(self,self.seconds(),args[0],args[1:],kw,seconds=self.seconds) insort(self._callqueue,c) self.reschedule() return c def suggestGreenletPoolSize(self,size): pass def addToGreenletPool(self,g): self.greenletpool.add(g) # IReactorThreads def _initThreads(self): # do not initialize ThreadedResolver, since we are using GeventResolver self.usingThreads = True callFromThread = callFromGreenlet # IReactorCore def stop(self): self._callqueue.insert(0,DelayedCall(self,0,gevent.sleep,(),{},seconds=self.seconds)) gevent.kill(self.greenlet) def reschedule(self): if self._wait and len(self._callqueue) > 0 and self._callqueue[0].time < self._wake: gevent.kill(self.greenlet,Reschedule) self._wait = 0
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- poll_frequency: Integer Seconds to wait between polling for the greenlets spawned heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account, heartbeat=1, poll_frequency=30, retry_fail_classes=[], refresh_flags_max=2000): self.poll_frequency = poll_frequency self.syncmanager_lock = db_write_lock(account.namespace.id) self.refresh_flags_max = refresh_flags_max provider_supports_condstore = provider_info(account.provider).get( 'condstore', False) account_supports_condstore = getattr(account, 'supports_condstore', False) if provider_supports_condstore or account_supports_condstore: self.sync_engine_class = CondstoreFolderSyncEngine else: self.sync_engine_class = FolderSyncEngine self.folder_monitors = Group() BaseMailSyncMonitor.__init__(self, account, heartbeat, retry_fail_classes) def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with mailsync_session_scope() as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() save_folder_names(log, self.account_id, crispin_client.folder_names(), db_session) account = db_session.query(ImapAccount).get(self.account_id) Tag.create_canonical_tags(account.namespace, db_session) folder_id_for = {name: id_ for id_, name in db_session.query( Folder.id, Folder.name).filter_by(account_id=self.account_id)} saved_states = {name: state for name, state in db_session.query(Folder.name, ImapFolderSyncStatus.state) .join(ImapFolderSyncStatus.folder) .filter(ImapFolderSyncStatus.account_id == self.account_id)} for folder_name in sync_folders: if folder_name not in folder_id_for: log.error("Missing Folder object when starting sync", folder_name=folder_name, folder_id_for=folder_id_for) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) if saved_states.get(folder_name) != 'finish': log.info('initializing folder sync') thread = self.sync_engine_class(self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider_name, self.poll_frequency, self.syncmanager_lock, self.refresh_flags_max, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread) or thread.ready(): log.info('folder sync finished/killed', folder_name=thread.folder_name) # NOTE: Greenlet is automatically removed from the group. self.folder_monitors.join()
import gevent from gevent.pool import Group def talk(msg): for i in xrange(3): print msg g1 = gevent.spawn(talk, 'bar') g2 = gevent.spawn(talk, 'foo') g3 = gevent.spawn(talk, 'fizz') group = Group() group.add(g1) group.add(g2) group.join() group.add(g3) group.join()
def get_all_company(): company_group = Group() for page in range(1, 500): company_group.add(gevent.spawn(get_company_page, page)) company_group.join()
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. poll_frequency and heartbeat are in seconds. """ def __init__(self, account_id, namespace_id, email_address, provider, heartbeat=1, poll_frequency=300, retry_fail_classes=None): self.shared_state = { # IMAP folders are kept up-to-date via polling 'poll_frequency': poll_frequency, 'syncmanager_lock': db_write_lock(namespace_id), } self.folder_monitors = Group() if not hasattr(self, 'folder_state_handlers'): self.folder_state_handlers = { 'initial': initial_sync, 'initial uidinvalid': resync_uids_from('initial'), 'poll': poll, 'poll uidinvalid': resync_uids_from('poll'), 'finish': lambda c, s, l, f, st: 'finish', } BaseMailSyncMonitor.__init__(self, account_id, email_address, provider, heartbeat, retry_fail_classes) def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with session_scope() as db_session: saved_states = dict() folder_id_for = dict() for saved_state in db_session.query(ImapFolderSyncStatus)\ .filter_by(account_id=self.account_id): saved_states[saved_state.folder.name] = saved_state.state folder_id_for[saved_state.folder.name] = saved_state.folder.id # it's possible we've never started syncs for these folders before for folder_id, folder_name, in \ db_session.query(Folder.id, Folder.name).filter_by( account_id=self.account_id): folder_id_for[folder_name] = folder_id with connection_pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() account = db_session.query(ImapAccount)\ .get(self.account_id) save_folder_names(self.log, account, crispin_client.folder_names(), db_session) Tag.create_canonical_tags(account.namespace, db_session) for folder_name in sync_folders: if saved_states.get(folder_name) != 'finish': self.log.info("Initializing folder sync for {0}" .format(folder_name)) thread = ImapFolderSyncMonitor(self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider, self.shared_state, self.folder_state_handlers, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread): self.log.info("Folder sync for {} is done." .format(folder_name)) # NOTE: Greenlet is automatically removed from the group # after finishing. self.folder_monitors.join()
class ImapSyncMonitor(BaseMailSyncMonitor): """ Top-level controller for an account's mail sync. Spawns individual FolderSync greenlets for each folder. Parameters ---------- poll_frequency: Integer Seconds to wait between polling for the greenlets spawned heartbeat: Integer Seconds to wait between checking on folder sync threads. refresh_flags_max: Integer the maximum number of UIDs for which we'll check flags periodically. """ def __init__(self, account_id, namespace_id, email_address, provider_name, heartbeat=1, poll_frequency=300, retry_fail_classes=[], refresh_flags_max=2000): self.shared_state = { # IMAP folders are kept up-to-date via polling 'poll_frequency': poll_frequency, 'syncmanager_lock': db_write_lock(namespace_id), 'refresh_flags_max': refresh_flags_max, } self.folder_monitors = Group() if not hasattr(self, 'folder_state_handlers'): self.folder_state_handlers = { 'initial': initial_sync, 'initial uidinvalid': resync_uids_from('initial'), 'poll': poll, 'poll uidinvalid': resync_uids_from('poll'), 'finish': lambda c, s, l, f, st: 'finish', } BaseMailSyncMonitor.__init__(self, account_id, email_address, provider_name, heartbeat, retry_fail_classes) def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with session_scope(ignore_soft_deletes=False) as db_session: with _pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() account = db_session.query(ImapAccount)\ .get(self.account_id) save_folder_names(self.log, account, crispin_client.folder_names(), db_session) Tag.create_canonical_tags(account.namespace, db_session) folder_id_for = {name: id_ for id_, name in db_session.query( Folder.id, Folder.name).filter_by(account_id=self.account_id)} saved_states = {name: state for name, state in db_session.query(Folder.name, ImapFolderSyncStatus.state) .join(ImapFolderSyncStatus.folder) .filter(ImapFolderSyncStatus.account_id == self.account_id)} for folder_name in sync_folders: if folder_name not in folder_id_for: self.log.error("Missing Folder object when starting sync", folder_name=folder_name, folder_id_for=folder_id_for) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) if saved_states.get(folder_name) != 'finish': self.log.info('initializing folder sync') thread = ImapFolderSyncMonitor(self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider_name, self.shared_state, self.folder_state_handlers, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread) and \ not thread.ready(): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread) or thread.ready(): self.log.info('folder sync finished/killed', folder_name=thread.folder_name) # NOTE: Greenlet is automatically removed from the group. self.folder_monitors.join()
coordinates.""" for x in positions: pass pass # Server initialization functions def announce(): """Attempts to connect to the manager every 3 seconds until the device is acquired.""" while not device_acquired.is_set(): gevent.sleep(3) payload = {"action": "addDevice", "port": device_port, "addr": device_ip} requests.get(manager_ip + ":" + manager_port, params=payload) def start_server(): """Starts a minimal Werkzeug server. This is not a robust server and should be replaced eventually with something like CherryPy.""" print "Starting server" http = WSGIServer(("", device_port), app) http.serve_forever() if __name__ == "__main__": """Launch the biotillexis device - start the server and send an acquisition request to manager.""" g.add(gevent.spawn(announce)) g.add(gevent.spawn(start_server)) g.join()
taskqueue = Queue(maxsize=5) workerdata = local() def worker(pid): print 'worker #%d-%s starting' % (pid,id(getcurrent())) workerdata.cnt = 0 while True: try: task = taskqueue.get(timeout=5) workerdata.cnt += 1 print 'worker #%d got task #%d' % (pid,task) gevent.sleep(random.randint(0,3)) except Empty: break print 'worker #%d-%s exiting (cnt:%d)' % (pid,id(getcurrent()),workerdata.cnt) def boss(): print 'boss starting' for i in xrange(0,15): taskqueue.put(i) print 'boss completed' g = Group() g.add(gevent.spawn(boss)) map(g.add,(gevent.spawn(worker,i) for i in xrange(0,3))) print 'all spawned' gevent.joinall(g) print 'end'
from pinga.config import get_sites_list from pinga.events.producer import Producer from pinga.log import get_logger def _check_site_thread(producer, site): while True: result = check_site(site) producer.emit(json.dumps(result)) gevent.sleep(5) if __name__ == "__main__": logger = get_logger() group = Group() producer = Producer() sites_list = get_sites_list()["sites"] logger.info(f"List of sites to be checked by Pinga {sites_list}") for site in sites_list: group.add(gevent.spawn(_check_site_thread, producer, site)) try: logger.info("Starting Pinga producer...") group.join() except KeyboardInterrupt: logger.info("Pinga producer interrupted. Goodbye.") group.kill()
class FTPPool(object): def __init__(self, addr, user, pwd, min_=1, max_=0, stat=None): self.addr = addr self.user = user self.pwd = pwd self.pool = set() self.busy = Group() self.stat = stat self.max_ = max_ for _ in xrange(min_): self.pool.add(self._connect()) def _connect(self): ftp = FTP(self.addr) try: ftp.login(self.user, self.pwd) except error_temp as e: print e return return ftp def _get_ftp(self): if self.pool: ftp = self.pool.pop() else: while self.max_ and len(self.busy) > self.max_: sleep(0.1) while True: ftp = self._connect() if ftp: break return ftp def _release_ftp(self, gr, ftp): if gr.successful(): self.pool.add(ftp) else: ftp.close() del ftp self.busy.discard(gr) if self.stat: self.stat.set_busy(len(self.busy)) def spawn_ftp(self, func, *a, **kw): ftp = self._get_ftp() gr = spawn(func, ftp, *a, **kw) gr.link(lambda g: self._release_ftp(g, ftp)) self.busy.add(gr) if self.stat: self.stat.set_busy(len(self.busy)) def wait(self): self.busy.join() def break_all(self): self.busy.kill()