def customer_sendtime(): """ 自定义时间发送报告 :return: """ try: t3 = time.time() pool = gevent.pool.Pool(_MAXTHREAD) for s in CustomerSetting.objects.filter( is_spamrpt_sendtime=True, spamrpt_sendtime__contains=time.strftime("%H:%M:")).exclude( customer__gateway_status='disabled'): # for s in CustomerSetting.objects.filter(is_spamrpt_sendtime=True).exclude(customer__gateway_status='disabled'): if s.spamrpt: mails = get_mails_from_sendtime(s.customer_id, s.spamrpt_sendtime) for mail_to in set([m.mail_to for m in mails]): pool.spawn(work_send, s.customer_id, mail_to, 'customer', s.spamrpt_sendtime) if s.m_spamrpt: pool.spawn(work_send, s.customer_id, '', 'manager', s.spamrpt_sendtime) log.info('waiting stop...') pool.join() t4 = time.time() log.info('spam_rpt send total time={}'.format(t4 - t3)) return except (DatabaseError, InterfaceError) as e: log.error(u'DatabaseError', exc_info=1) connection.close() except BaseException as e: log.error(u'spam_rpt: exception', exc_info=1)
def run(self): self.db_manager = util.DBManager(self.config['connections']['mysql']) db = self.db_manager.get_db() while True: # XXX next try-except block force load tables try: db.messages db.servers db.server_properties break except db_exc.OperationalError: LOG.error(util.exc_info()) time.sleep(5) LOG.debug('Reconnecting...') try: global pool persistent_gm_workers = [_GMWorker(self) for i in range(2)] for wrk in persistent_gm_workers: wrk.register_task('message.send', send) pool.add(gevent.spawn(wrk.work)) gevent.spawn(heartbeat, 10, self) pool.join() except: LOG.critical(util.exc_info()) sys.exit(1)
def run(self, test, pool=pool): """ extend run to wait for global pool to finish """ result = self._makeResult() startTime = time.time() test(result) pool.join() TestCaseHandler.tearDownClasses() stopTime = time.time() timeTaken = stopTime - startTime result.printErrors() self.stream.writeln(result.separator2) run = result.testsRun self.stream.writeln("Ran %d test%s in %.3fs" % (run, run != 1 and "s" or "", timeTaken)) self.stream.writeln() if not result.wasSuccessful(): self.stream.write("FAILED (") failed, errored = map(len, (result.failures, result.errors)) if failed: self.stream.write("failures=%d" % failed) if errored: if failed: self.stream.write(", ") self.stream.write("errors=%d" % errored) self.stream.writeln(")") else: self.stream.writeln("OK") return result
def test_stderr_raising(self): # testing that really egregious errors in the error handling code # (that prints tracebacks to stderr) don't cause the pool to lose # any members import sys pool = self.klass(size=1) # we're going to do this by causing the traceback.print_exc in # safe_apply to raise an exception and thus exit _main_loop normal_err = sys.stderr try: sys.stderr = FakeFile() waiter = pool.spawn(crash) with gevent.Timeout(2): self.assertRaises(RuntimeError, waiter.get) # the pool should have something free at this point since the # waiter returned # pool.Pool change: if an exception is raised during execution of a link, # the rest of the links are scheduled to be executed on the next hub iteration # this introduces a delay in updating pool.sem which makes pool.free_count() report 0 # therefore, sleep: gevent.sleep(0) self.assertEqual(pool.free_count(), 1) # shouldn't block when trying to get with gevent.Timeout.start_new(0.1): pool.apply(gevent.sleep, (0, )) finally: sys.stderr = normal_err pool.join()
def propogate(): common = {'foo': 'bar', 'bar': 'foo'} pool = gevent.pool.Pool(10) pool.spawn(change_foo, common) pool.spawn(change_bar, common) pool.join() print common
def main(): init() # log.info('start worker_other.....') # worker_other() # log.info('finish worker_other.....') # gevent.sleep(0.1) log.info('start worker_day.....') pool = gevent.pool.Pool(10) for tablename in glb_maillog_tables: pool.spawn(worker_day, tablename) gevent.sleep(0.02) pool.join() gevent.sleep(0.1) log.info('finish worker_day.....') log.info('start worker_success.....') worker_success() log.info('finish worker_success.....') log.info('start worker_success.....') worker_success() log.info('finish worker_success.....') log.info('start worker_redis.....') worker_redis() log.info('finish worker_redis.....') return
def subpool_map(pool_size, func, iterable): """ Starts a Gevent pool and run a map. Takes care of setting current_job and cleaning up. """ if not pool_size: return [func(*args) for args in iterable] counter = itertools_count() current_job = get_current_job() def inner_func(*args): next(counter) if current_job: set_current_job(current_job) ret = func(*args) if current_job: set_current_job(None) return ret start_time = time.time() pool = gevent.pool.Pool(size=pool_size) ret = pool.map(inner_func, iterable) pool.join(raise_error=True) total_time = time.time() - start_time log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time)) return ret
def cleanup_containers(self, auth_kwargs, container_base, concurrency): storage_urls, token = self._authenticate(auth_kwargs) _, container_list = client.get_account( random.choice(storage_urls), token) our_container_re = re.compile('%s_\d+$' % container_base) start_time = time.time() obj_count = 0 container_count = 0 pool = gevent.pool.Pool(concurrency) for container_info in container_list: # e.g. {'count': 41, 'bytes': 496485, 'name': 'doc'} if our_container_re.match(container_info['name']): pool.spawn(_container_deleter, concurrency, storage_urls, token, container_info) container_count += 1 obj_count += container_info['count'] else: logging.debug('Ignoring non-ssbench container %r', container_info['name']) pool.join() delta_t = time.time() - start_time logging.info('Deleted %.1f containers/s, %.1f objs/s', container_count / delta_t, obj_count / delta_t)
def create_index_cache(cat, start, end): """ PHASE 1A: Creates a snapshot of the index by downloading them all into a local cache directory. Input: None Output: {page_num}.html in the cache_path directory """ print 'Creating index cache' if cat != 'All': BASE_INDEX_URL = 'http://www.indiegogo.com/projects?filter_category={category}&filter_country=&pg_num='.format(category=cat) else: BASE_INDEX_URL = 'http://www.indiegogo.com/projects?&pg_num=' def fetch_index(page_num): #print 'Caching ' + str(page_num) html = _fetch(BASE_INDEX_URL + str(page_num)) filename = str(page_num) + '.html' with open(os.path.join(cache_path, filename), 'w') as f: f.write(html) pool = gevent.pool.Pool(THREADS) threads = [] for i in range(start, end + 1): threads.append(pool.spawn(fetch_index, i)) #gevent.joinall(threads) pool.join() return
def test_fox_cub(games_to_test, dataset, client): pool = gevent.pool.Pool(1024) # unique testing id session_id = str(uuid.uuid4()) for game in games_to_test: _, features = dataset.prepare_observation(game) home_team_season = dataset.get_team_stats(game.HomeTeam, game.Season)['season'] away_team_season = dataset.get_team_stats(game.AwayTeam, game.Season)['season'] home_team_res = home_team_season.get_team_scores(game.HomeTeam) away_team_res = away_team_season.get_team_scores(game.AwayTeam) season_avg = { "avgScoredHome": features.avg_goals_home_team / 2, "avgScoredAway": features.avg_goals_away_team / 2 } pool.spawn(client.get_stats, home_team_res, away_team_res, season_avg, game.HomeTeam, game.AwayTeam, session_id) pool.join() return session_id
def cleanup_containers(self, auth_kwargs, container_base, concurrency, policy): storage_urls, token = self._authenticate(auth_kwargs) _, container_list = client.get_account(random.choice(storage_urls), token) our_container_re = re.compile(self.DELETER_RE % (container_base, policy)) start_time = time.time() obj_count = 0 container_count = 0 pool = gevent.pool.Pool(concurrency) for container_info in container_list: # e.g. {'count': 41, 'bytes': 496485, 'name': 'doc'} if our_container_re.match(container_info['name']): pool.spawn(_container_deleter, concurrency, storage_urls, token, container_info) container_count += 1 obj_count += container_info['count'] else: logging.debug('Ignoring non-ssbench container %r', container_info['name']) pool.join() delta_t = time.time() - start_time logging.info('Deleted %.1f containers/s, %.1f objs/s', container_count / delta_t, obj_count / delta_t)
def scanurl(): hosts = load_target() def find_plugins_by_name(search): for name, plugins in COMPONENT_PLUGIN_INFO.iteritems(): if name in search: return plugins return False for i in hosts: url, server, title = i['url'], i['server'], i['title'].lower() parse = urlparse.urlparse(url) port = 80 l = parse.netloc.split(':') if len(l) == 2: host, port = l port = int(port) else: host = l[0] plugins = find_plugins_by_name(title) if plugins: for plugin in plugins: pool.spawn(run_task, plugin, host, port) pool.join()
def async (): index = 200000 pool = gevent.pool.Pool(100) while index: index -= 1 pool.spawn(_async) pool.join()
def main(): connection = couchbase.client.Couchbase('192.168.1.154:8091', 'default', ''); bucket = connection['default'] now = int(time.time()) start_time = now - 30 * 60 def read_traffic(key): for row in bucket.view('_design/ray/_view/live_congestion', limit=100, stale='ok', startkey=[key, start_time], endkey=[key, now + 1], ): if row is None or 'value' not in row: return 'now found' value = row['value'] return 'key=%d, record_count=%d, average_congestion=%.2f, age=%.2fs' % \ (key, value['count'], value['congestion'], now - value['age']) pool = gevent.pool.Pool(size=200) for result in pool.imap_unordered(read_traffic, itertools.cycle(xrange(0, 10000 * 50, 100))): print result pool.join()
def test_stderr_raising(self): if greentest.PYPY: # Does not work on PyPy return # testing that really egregious errors in the error handling code # (that prints tracebacks to stderr) don't cause the pool to lose # any members import sys pool = self.klass(size=1) # we're going to do this by causing the traceback.print_exc in # safe_apply to raise an exception and thus exit _main_loop normal_err = sys.stderr try: sys.stderr = FakeFile() waiter = pool.spawn(crash) with gevent.Timeout(2): self.assertRaises(RuntimeError, waiter.get) # the pool should have something free at this point since the # waiter returned # pool.Pool change: if an exception is raised during execution of a link, # the rest of the links are scheduled to be executed on the next hub iteration # this introduces a delay in updating pool.sem which makes pool.free_count() report 0 # therefore, sleep: gevent.sleep(0) self.assertEqual(pool.free_count(), 1) # shouldn't block when trying to get t = gevent.Timeout.start_new(0.1) try: pool.apply(gevent.sleep, (0, )) finally: t.cancel() finally: sys.stderr = normal_err pool.join()
def workon(iap): pool = gevent.pool.Pool(self.poolsize) for i in range(self.poolsize): pool.spawn(iap.work) pool.join()
def worker_task(user_list): pool = gevent.pool.Pool(5) for user_id in user_list: pool.spawn(do_worker_task, user_id) gevent.sleep(0.01) pool.join() return
def _img_ori(_poi_type): global data global offset query_sql = '''SELECT id, image_list, first_image, official FROM {} ORDER BY id LIMIT {}, 99999999999999;'''.format(table_name, offset) _count = 0 cache = [] for _uid, _old_img_list, _old_first_img, _official in MysqlSource(poi_ori_config, table_or_query=query_sql, size=500, is_table=False, is_dict_cursor=False): cache.append((_uid, _old_img_list, _old_first_img, _official)) for _uid, _old_img_list, _old_first_img, _official in cache: pool.apply_async(_update_per_uid_img, (_uid, _poi_type, _old_img_list, _old_first_img, _official)) _count += 1 if _count % 1000 == 0: pool.join() update_img() data = [] offset += 1000 update_img() pool.join() update_img()
def interval_sendtime(): """ :return: """ try: t3 = time.time() pool = gevent.pool.Pool(_MAXTHREAD) for s in CustomerSetting.objects.filter( interval_spamrpt__gt=0).exclude( customer__gateway_status='disabled'): interval = int(s.interval_spamrpt) customer_id = s.customer_id if check_interval(customer_id, interval): log.info( 'check interval fail(customer_id:{}, interval:{})'.format( customer_id, interval)) continue if s.spamrpt: mails = get_mails_from_interval(customer_id, interval) for mail_to in set([m.mail_to for m in mails]): pool.spawn(work_send, customer_id, mail_to, 'customer', interval) if s.m_spamrpt: pool.spawn(work_send, customer_id, '', 'manager', interval) log.info('waiting stop...') pool.join() t4 = time.time() log.info('spam_rpt send total time={}'.format(t4 - t3)) return except (DatabaseError, InterfaceError) as e: log.error(u'DatabaseError', exc_info=1) connection.close() except BaseException as e: log.error(u'spam_rpt: exception', exc_info=1)
def _find_wildcards(self): """ Queries some random non-existant records to reduce false positives. Returns True if process can continue, otherwise false. """ wildcard_count = self.options.wildcard_tests if wildcard_count < 1: return True total_queries = len(self.domains) * wildcard_count LOG.info("Eliminating wildcard responses (%d tests)", total_queries) is_ok = False # Setup pool and progress pool = gevent.pool.Pool(self.options.concurrency) if self.progress: self.progress.start(total_queries) self.finished = 0 try: for domain in self.domains: LOG.debug("Checking wildcard domain: %s", domain) names = [rand_name() for _ in range(0, wildcard_count)] for name in names: pool.add(gevent.spawn(self._test_wildcard, domain, name)) is_ok = True except KeyboardInterrupt: print("Ctrl+C caught... stopping") pool.join() if self.progress: self.progress.finish() return is_ok
def main(argv): if len(argv) < 2 or not os.path.exists(argv[1]): print("Usage: ping.py [config]") return 1 # Initialize global settings init_settings(argv) # Initialize logger loglevel = logging.INFO if SETTINGS['debug']: loglevel = logging.DEBUG logformat = ("%(asctime)s,%(msecs)05.1f %(levelname)s (%(funcName)s) " "%(message)s") logging.basicConfig(level=loglevel, format=logformat, filename=SETTINGS['logfile'], filemode='w') print("Writing output to {}, press CTRL+C to terminate..".format( SETTINGS['logfile'])) logging.info("Removing all keys") REDIS_CONN.delete('reachable') REDIS_CONN.delete('open') REDIS_CONN.delete('opendata') # Initialize a pool of workers (greenlets) pool = gevent.pool.Pool(SETTINGS['workers']) pool.spawn(cron, pool) pool.join() return 0
def create_project_cache(): """ PHASE 3 Creates a cache of all the individual projects in {category}/proj """ print 'Creating project cache' with open(phase1_outfile, 'r') as f: projects_info = json.load(f) def fetch_project(url): #print 'Fetching project at ' + url filename = hashlib.md5(url).hexdigest() html = _fetch(url) #print 'Caching ' + filename projects_info[url]['cache_file_name'] = filename with open(os.path.join(proj_cache_path, filename), 'w') as f: f.write(html) pool = gevent.pool.Pool(THREADS) threads = [] for proj_url, info in projects_info.iteritems(): threads.append(pool.spawn(fetch_project, proj_url)) pool.join() with open(phase1_outfile, 'w') as f: json.dump(projects_info, f, sort_keys=True, indent=4, separators=(',', ': '))
def scan(): sql = "SELECT email, browser, os, country, simple_country, area, ip_first, ip_last, open_total, open_first, open_last FROM active_emails;" res = DB.query(REMOTE_PG, sql) pool = gevent.pool.Pool(50) for data in res: pool.spawn(worker, data) pool.join()
def schedule_green_jobs(fns, concurrency=DEFAULT_THREADS, progress=None, total=None): import gevent.pool if total is None: try: total = len(fns) except TypeError: # generators don't have len pass pbar = tqdm(total=total, desc=progress, disable=(not progress)) results = [] def updatefn(fn): def realupdatefn(): res = fn() pbar.update(1) results.append(res) return realupdatefn pool = gevent.pool.Pool(concurrency) for fn in fns: pool.spawn(updatefn(fn)) pool.join() pool.kill() pbar.close() return results
def start(): sql = 'select # from # where #' urls = get_url_from_db(sql) #获取url pool = gevent.pool.Pool(4) #设置并发度 for url in urls: pool.add(gevent.spawn(get_page_content, url)) pool.join()
def manager_main(): """ 跟客户管理员发送报告 :return: """ try: t3 = time.time() mail_model = get_mail_model(get_mail_date()) customer_list = mail_model.objects.exclude(customer__gateway_status='disabled') \ .filter(state='reject', mail_to__isnull=False, review_result='reject', customer__customersetting__m_spamrpt=True, customer__customersetting__is_spamrpt_sendtime=False) \ .distinct('customer_id') \ .values_list('customer_id', flat=True) pool = gevent.pool.Pool(_MAXTHREAD) for customer_id in customer_list: pool.spawn(work_send, customer_id, '', 'manager') log.info('waiting stop...') pool.join() t4 = time.time() log.info('m_spam_rpt send total time={}'.format(t4 - t3)) return except (DatabaseError, InterfaceError) as e: log.error(u'DatabaseError', exc_info=1) connection.close() except BaseException as e: log.error(u'spam_rpt: exception', exc_info=1) gevent.sleep(10)
def download_pic(): conn = pymysql.connect(host='10.10.228.253', user='******', password='******', charset='utf8', db='BaseDataFinal') cursor = conn.cursor() cursor.execute('''SELECT file_name, source, sid FROM poi_images WHERE source = 'qyer' AND length(file_name) > 32;''') start = time.time() _count = 0 for file_name, source, sid in cursor.fetchall(): _count += 1 # parent_path = os.path.join(PARENT_PATH, "###".join([source, sid])) new_file_name = file_name.split('.')[0] # parent_path = os.path.join(PARENT_PATH, new_file_name) if not os.path.exists(PARENT_PATH): os.makedirs(PARENT_PATH) # download("mioji-attr", file_name, PARENT_PATH, new_file_name) pool.apply_async(download, ("mioji-attr", file_name, PARENT_PATH, new_file_name)) pool.join() cursor.close() conn.close() print("[Total: {}][Takes: {}]".format(_count, time.time() - start))
def start(thread_num): pool = Pool(processes=thread_num) for i in xrange(thread_num): pool.apply_async(getContent, args=()) pool.close() pool.join() return pool
def woker_imap(): smtp_dict = getSmtpData() pool = gevent.pool.Pool(10) for smtp_account_id in smtp_dict: smtp_list = smtp_dict[smtp_account_id] pool.spawn(do_woker_imap, smtp_account_id, smtp_list) pool.join() return
def worker_redis(cr): pool = gevent.pool.Pool(10) for start in range(0, 100, 10): for domain in GLB_DOMAINS: end = start + 10 pool.spawn(do_worker_redis, cr, domain, start, end) pool.join() return
def worker_redis(): T = ['163.com', 'qq.com', '*'] pool = gevent.pool.Pool(10) for index in xrange(1, 11): for domain in T: pool.spawn(do_worker_redis, domain, index) pool.join() return
def scanner(): pool = gevent.pool.Pool(_MAXTHREAD) for t in _TABLES: if signal_stop: break pool.spawn(worker, t) gevent.sleep(0.01) pool.join() return
def scan(): index = 0 pool = gevent.pool.Pool(10) while True: index += 1 pool.spawn(woker1, index) gevent.sleep(1) pool.join()
def join_raffle(self, room_id): params = { 'roomid': room_id, } pool = gevent.pool.Pool(len(self)) for each_record in self: pool.add(gevent.spawn(each_record._join_raffle, params=params)) pool.join()
def sync_cassandra(simple=False): for table, fields in sorted(schemas.items(), key=lambda x: len(x[0])): if simple: if table in ['ataobao2.top10', 'ataobao2.blacklist', 'ataobao2.agghosts', 'ataobao2.cate', 'ataobao2.brand']: sync_table(table, fields) else: sync_table(table, fields) pool.join()
def fetch_pages_con(): pool = gevent.pool.Pool(16) gs = [] for i in range(pages): g = gevent.spawn(fetch_page, i) gs.append(g) pool.add(g) pool.join()
def worker_3(): log.info('start worker_3...') pool = gevent.pool.Pool(5) while True: pool.spawn(do_worker_3, random.randint(1, 100000)) pool.join() log.info('finish worker_3...') return
def fetch_all_subitems(db, pool): for channel in ('teleplay', 'documentary', 'comic'): collection = db[channel] jobs = [pool.spawn(fetch_subitems, item) for item in collection.find()] pool.join() for job in jobs: item = job.value collection.save(item, save=True)
def dosearch(self): for offset in [i*200 for i in range(40)]: pool.spawn(self.getTracks,self.querystr,offset) pool.join() result = sorted(self.results, key=lambda x:x.tosort, reverse=True)[:20] for t in result: t.widget = makeWidget(t.id) print "(%s) %s - %s"%(t.tosort,t.username.encode('ascii', 'ignore'),t.title.encode('ascii', 'ignore')) return result
def fetch_all_channels(db, pool): jobs = [pool.spawn(fetch_channel, channel) for channel in CONFIG["MAPPING"]] pool.join() for job in jobs: channel, items = job.value collection = db[channel] collection.insert(items, save=True)
def resolve_hostname(self): """ Concurrently resolves hostname for the unresolved addresses. """ pool = gevent.pool.Pool(len(self.resolved['hostname'])) with gevent.Timeout(15, False): for address in self.resolved['hostname']: pool.spawn(self.set_hostname, address) pool.join()
def test_proxy_list(http_proxies, pool_size, server_port): pool = gevent.pool.Pool(pool_size) my_ip = whats_my_ip() globals()['LOCAL_SERVER'] = 'http://{}:{}/'.format(my_ip, server_port) for proxy in http_proxies: ip, port = proxy.rsplit(':') pool.spawn(test_single_proxy, my_ip, ip, port) pool.join() queue.put(StopIteration)
def run_with_gevent(): from qs.misc import call_in_loop import gevent.pool pool = gevent.pool.Pool() for i in range(numgreenlets): pool.spawn(call_in_loop(1.0, start_worker)) pool.join()
def start(self): pool = gevent.pool.Pool(size=self.concurrency) try: for i in xrange(1, self.num_connectors + 1): pool.spawn(self.connector) time.sleep(self.spawn_interval) pool.join() except KeyboardInterrupt: pass
def download_images(posts): """Downloads images for the given posts""" pool = gevent.pool.Pool(size=96) for post in progress.bar(posts, width=60, every=100): if not post.static: continue pool.spawn(download_image, post) # wait for all jobs to finish pool.join()
def run(self): def update_latest_ids(cid): data = get_json(cid, page=1, sort='_oldstart') nids = get_ids(data) if nids: print 'found {} ids in category {}'.format(len(nids), cid) ai2.put(*list(nids)) pool = gevent.pool.Pool(10) for cid in fecids: pool.spawn(update_latest_ids, cid) pool.join()
def runworker(args): gevent.monkey.patch_all() initlog(optdict.get('-l', 'INFO')) app = apps.Application(args[0]) size = int(optdict.get('-s', '100')) pool = gevent.pool.Pool(size) for n in xrange(size): pool.spawn(worker.BeanstalkWorker( app, optdict['-q'], optdict.get('-H', 'localhost'), optdict.get('-p', '11300'), int(optdict.get('-t', '10'))).run) pool.join()
def make_nuwiki(fsdir, metabook, options, podclient=None, status=None): id2wiki = {} for x in metabook.wikis: id2wiki[x.ident] = (x, []) for x in metabook.articles(): assert x.wikiident in id2wiki, "no wikiconf for %r (%s)" % (x.wikiident, x) id2wiki[x.wikiident][1].append(x) is_multiwiki = len(id2wiki) > 1 if is_multiwiki: progress = fetch.shared_progress(status=status) else: progress = None fetchers = [] for id, (wikiconf, articles) in id2wiki.items(): if id is None: id = "" assert not is_multiwiki, "id must be set in multiwiki" if not is_multiwiki: id = "" assert "/" not in id, "bad id: %r" % (id,) my_fsdir = os.path.join(fsdir, id) if is_multiwiki: my_mb = collection() my_mb.items = articles else: my_mb = metabook wikitrust(wikiconf.baseurl, my_mb) fetchers.append(start_fetcher(fsdir=my_fsdir, progress=progress, base_url=wikiconf.baseurl, metabook=my_mb, options=options, podclient=podclient, status=status)) if is_multiwiki: if not os.path.exists(fsdir): os.makedirs(fsdir) open(os.path.join(fsdir, "metabook.json"), "wb").write(metabook.dumps()) myjson.dump(dict(format="multi-nuwiki"), open(os.path.join(fsdir, "nfo.json"), "wb")) pool = gevent.pool.Pool() for x in fetchers: pool.spawn(x.run) pool.join(raise_error=True) import signal signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL)
def process_group(group): group_results = [] pool = gevent.pool.Pool(urls_group_size) for url in group: if not is_valid_url(url, allow_no_protocol=True): completed_urls[url] = (False, "Invalid URL") if len(completed_urls) == len(urls): #all done, trigger callback return completed_callback(completed_urls) else: continue assert url.startswith('http://') or url.startswith('https://') pool.spawn(make_stream_request, url) pool.join()
def sync_cassandra(simple=False): for table, fields in sorted(schemas.items(), key=lambda x: len(x[0])): if simple: if table in [ "ataobao2.top10", "ataobao2.blacklist", "ataobao2.agghosts", "ataobao2.cate", "ataobao2.brand", ]: sync_table(table, fields) else: sync_table(table, fields) pool.join()
def subpool_map(pool_size, func, iterable): """ Starts a Gevent pool and run a map. Takes care of setting current_job and cleaning up. """ if not pool_size: return [func(*args) for args in iterable] counter = itertools_count() current_job = get_current_job() def inner_func(*args): """ As each call to 'func' will be done in a random greenlet of the subpool, we need to register their IDs with set_current_job() to make get_current_job() calls work properly inside 'func'. """ next(counter) if current_job: set_current_job(current_job) try: ret = func(*args) except Exception as exc: trace = traceback.format_exc() log.error("Error in subpool: %s \n%s" % (exc, trace)) raise if current_job: set_current_job(None) return ret def inner_iterable(): """ This will be called inside the pool's main greenlet, which ID also needs to be registered """ if current_job: set_current_job(current_job) for x in iterable: yield x if current_job: set_current_job(None) start_time = time.time() pool = gevent.pool.Pool(size=pool_size) ret = pool.map(inner_func, inner_iterable()) pool.join(raise_error=True) total_time = time.time() - start_time log.debug("SubPool ran %s greenlets in %0.6fs" % (counter, total_time)) return ret
def test_contest(self): ns = NamespaceSemaphore() ns.acquire('/ex', 3, 0) self.q = gevent.queue.Queue() pool = gevent.pool.Pool(size=5) for i in xrange(5): pool.spawn(self.acquire, ns) pool.join() self.assertEquals(self.q.qsize(), 1) self.q.get() for i in xrange(5): pool.spawn(self.acquire, ns) pool.join() self.assertEquals(self.q.qsize(), 1)
def _s3_upload_pg_archive(self, archive_filename, pool_size, rate_limit=None): """ Upload archive_filename to s3_url_prefix. """ backup_s3_prefix = ('{0}/basebackups_{1}/base_{2}' .format(self.s3_prefix, FILE_STRUCTURE_VERSION, os.path.basename(archive_filename))) # absolute upload paths are used for telling lzop what to compress local_abspath = os.path.abspath(archive_filename) partitions = tar_partition.archive_partitions_plan(local_abspath, # 1610612736 bytes == 1.5 gigabytes, per partition, # non-tunable 1610612736) if rate_limit is None: per_process_limit = None else: per_process_limit = int(rate_limit / pool_size) # Reject tiny per-process rate limits. They should be # rejected more nicely elsewhere. assert per_process_limit > 0 or per_process_limit is None # a list to accumulate async upload jobs uploads = [] total_size = os.path.getsize(local_abspath) pool = gevent.pool.Pool(size=pool_size) # Enqueue uploads for parallel execution try: for part in partitions: uploads.append(pool.apply_async( s3_worker.do_archive_partition_put, [backup_s3_prefix, part, per_process_limit, self.gpg_key_id])) finally: while uploads: uploads.pop().get() pool.join() return backup_s3_prefix, total_size
def fetch_used(self, name, lst, expanded=False): limit = self.api.api_request_limit pool = gevent.pool.Pool() blocks = splitblocks(lst, limit) self.count_total += len(blocks) for bl in blocks: pool.add(self._refcall_noinc(self.fetch_used_block, name, bl, expanded)) pool.join() if conf.noedits: return items = self.title2latest.items() self.title2latest = {} self.count_total += len(items) for title, rev in items: self._refcall_noinc(self.get_edits, title, rev)
def main(): parser = argparse.ArgumentParser( description='Image downloader.') parser.add_argument('-f', '--file', metavar='INPUT_FILE', required=True, help='Path to input file with image links') parser.add_argument('-d', '--dir', metavar='DIR', required=True, help='Download directory') args = parser.parse_args() # Parse input file and form list with the URL paths urls = [url.rstrip('\n') for url in open(args.file) if url != '\n'] # Make pool with two greenlets pool = gevent.pool.Pool(2) [pool.spawn(download_image, url, args.dir) for url in urls] pool.join()