def upload_photos_in_pending(with_failed=True): q_filter = ['pending'] if with_failed: q_filter.append('failed') photos = (Photo.select(Photo.local_path, Photo.ext_album_key).where( (Photo.status << q_filter))) photos = list(photos) def worker(): logger.info('[New worker started]') while True: item = q.get() try: upload_photo(item) finally: q.task_done() q = JoinableQueue(maxsize=10) for i in range(UPLOADING_WORKERS_COUNT): gevent.spawn(worker) for p in photos: q.put((p.local_path, p.ext_album_key)) q.join()
def update_keywords(): sm_api = SmugmugAPI() def worker(): logger.info('[Worker started]') while True: item = q.get() try: sm_api.update_image_keywords(*item) finally: q.task_done() q = JoinableQueue(maxsize=100) for i in range(50): gevent.spawn(worker) photos = (Photo.select(Photo.local_path, Photo.ext_key).where( (Photo.status == 'uploaded'))) photos = list(photos) print("Total photos to update:", len(photos)) cnt = 0 for p in photos: cnt += 1 print(cnt) keywords = get_keywords(p.local_path) q.put((p.ext_key, keywords)) q.join()
def on_search(self, query): log.debug('search for %r', query) queue = JoinableQueue() task_group = g.api.search(query, queue) while True: finished = all( [t.ready() for t in task_group] ) try: item = queue.get(timeout=1.0) except Empty: if finished: break continue try: self.emit('result', item._asdict()) finally: queue.task_done() queue.join() task_group.join() self.emit('done', query)
def process_24_network(net, port): q = JoinableQueue() r = JoinableQueue() gevent.spawn(prepare_list, q, net) tasks = [] for x in range(0, CONCURRENT_GROUPS): #print "spawning %i" % x tasks += [gevent.spawn(scan_network, q, r, port)] q.join() gevent.joinall(tasks) if not r.empty(): with open(str(net.ip) + '_' + str(port) + ".m3u", "w+") as f: f.write("#EXTM3U\n") while not r.empty(): try: group = r.get(timeout=10) f.write( '#EXTINF:-1 tvg-logo="" tvg-name="" group-title="",ChannelName' + "\n") f.write('udp://@' + str(group) + ':' + str(port) + "\n") logging.info("Ok ====> %s" % group) except gevent.queue.Empty: break
class GeventPoolExecutor2(LoggerMixin): def __init__( self, max_works, ): self._q = JoinableQueue(maxsize=max_works) # self._q = Queue(maxsize=max_works) for _ in range(max_works): gevent.spawn(self.__worker) # atexit.register(self.__atexit) self._q.join(timeout=100) def __worker(self): while True: fn, args, kwargs = self._q.get() try: fn(*args, **kwargs) except Exception as exc: self.logger.exception( f'函数 {fn.__name__} 中发生错误,错误原因是 {type(exc)} {exc} ') finally: pass self._q.task_done() def submit(self, fn: Callable, *args, **kwargs): self._q.put((fn, args, kwargs)) def __atexit(self): self.logger.critical('想即将退出程序。') self._q.join()
class GQueue(object): def __init__(self): self.__QUEUE = JoinableQueue() def job(self, func): @functools.wraps(func) def f(*args, **kwargs): self.__QUEUE.put([func, args, kwargs]) return f def join(self): self.__QUEUE.join() def work(self): while True: func, args, kwargs = self.__QUEUE.get() try: func(*args, **kwargs) finally: self.__QUEUE.task_done() def run_worker(self, num=1): for i in range(num): gevent.spawn(self.work)
def test_api(self): queue = JoinableQueue() task_group = self.api.search('terminator', queue) while True: finished = all( [greenlet.ready() for greenlet in task_group.greenlets] ) try: item = queue.get(timeout=1.0) except Empty: if finished: log.info('queue is empty and all jobs are done, quitting') break log.info( 'queue was empty and jobs are still running, retrying' ) continue try: log.info('%r', item) finally: queue.task_done() task_group.join() queue.join() log.info('joined everything')
def extract(input_dir, output_path, func): with open(output_path, 'w') as output: tasks = JoinableQueue() for file_name in os.listdir(input_dir): tasks.put(file_name) def _extract(file_name): file_path = os.path.join(input_dir, file_name) with open(file_path) as f: try: json = simplejson.load(f) except Exception as e: print(str(e)) print('Failed to load json file {}'.format(file_path)) for pair in func(json): output.write('\t'.join([str(x) for x in pair]) + '\n') def worker(): while True: file_name = tasks.get() _extract(file_name) print(file_name) tasks.task_done() for i in range(10): gevent.spawn(worker) tasks.join()
def handle(): connection = create_postgresql_connection() cursor = connection.cursor() cursor.execute("BEGIN;") cursor.execute("DELETE FROM core_ratequery;") cursor.execute("COMMIT;") cursor.close() queue = JoinableQueue() event = Event() age_ids = age_map(connection).values() + [None] sex_ids = sex_map(connection).values() + [None] education_ids = education_map(connection).values() + [None] province_ids = province_map(connection).values() + [None] cursor = connection.cursor() cursor.execute("SELECT DISTINCT cycle FROM core_microdata;"); cycles = [row[0] for row in cursor] cursor.close() greenlets = [] for i in range(50): gv = gevent.spawn(worker, queue, event) greenlets.append(gv) combs = itertools.product(age_ids, sex_ids, province_ids, education_ids, cycles) for c in combs: queue.put(c) queue.join() event.set() gevent.joinall(greenlets)
def processor(data): """ Each launched process(=NUM_CORES) executes 1 item in the list map_data as data. For given start_id and batch_size, launches gevent consumers to scrape data for the given ID Also, the main thread acts as a producer to produce the data for the workers to use """ try: NUM_GREENLETS = 8 # Depending on how much I/O block is expected. Varies for each problem. process_id = multiprocessing.current_process() monkey.patch_all( ) # Patch all the libraries to support non-IO blocking start_id = data["start_id"] batch_size = data["batch_size"] joinable_queue = JoinableQueue() # Launch NUM_GREENLETS workers for i in range(NUM_GREENLETS): gevent.spawn(worker, joinable_queue=joinable_queue, greenlet_id=i, process_id=process_id) # Producer for id in range(start_id, start_id + batch_size): joinable_queue.put(id) joinable_queue.join() except: # If the processes have any uncaptured error, it'd not redirect to stderr, # as it's a different Pipe for each process fork spawned print(traceback.format_exc())
class GeventPoolExecutor2(LoggerMixin): def __init__( self, max_works, ): check_gevent_monkey_patch() self._q = JoinableQueue(maxsize=max_works) # self._q = Queue(maxsize=max_works) for _ in range(max_works): # self.logger.debug('yyyyyy') gevent.spawn(self.__worker) atexit.register(self.__atexit) def __worker(self): while True: fn, args, kwargs = self._q.get() # noinspection PyBroadException try: fn(*args, **kwargs) except Exception as exc: self.logger.exception( f'函数 {fn.__name__} 中发生错误,错误原因是 {type(exc)} {exc} ') finally: pass self._q.task_done() def submit(self, fn: Callable, *args, **kwargs): # self.logger.debug(self._q.qsize()) self._q.put((fn, args, kwargs)) def __atexit(self): self.logger.critical('想即将退出程序。') self._q.join()
def test_api(self): queue = JoinableQueue() task_group = self.api.search('terminator', queue) while True: finished = all( [greenlet.ready() for greenlet in task_group.greenlets]) try: item = queue.get(timeout=1.0) except Empty: if finished: log.info('queue is empty and all jobs are done, quitting') break log.info( 'queue was empty and jobs are still running, retrying') continue try: log.info('%r', item) finally: queue.task_done() task_group.join() queue.join() log.info('joined everything')
def start(self): if not self.__threads: self.__threads = len(IPNetwork(self.__ip)) if len(IPNetwork(self.__ip)) <= 10 else 10 if len(IPNetwork(self.__ip)) < int(self.__threads): print "Please decrease number of threads to number of hosts <= %s" % len(IPNetwork(self.__ip)) exit() queue = JoinableQueue() [queue.put(str(ip)) for ip in IPNetwork(self.__ip)] workers = [spawn(self.get_ip_info, queue, self.__apis) for t in range(int(self.__threads))] queue.join()
def start(self): if not self.__threads: self.__threads = len(IPNetwork( self.__ip)) if len(IPNetwork(self.__ip)) <= 10 else 10 if len(IPNetwork(self.__ip)) < int(self.__threads): print "Please decrease number of threads to number of hosts <= %s" % len( IPNetwork(self.__ip)) exit() queue = JoinableQueue() [queue.put(str(ip)) for ip in IPNetwork(self.__ip)] workers = [ spawn(self.get_ip_info, queue, self.__apis) for t in range(int(self.__threads)) ] queue.join()
def main(): if "-v" in argv: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) logging.info("Starting up") q = JoinableQueue() gevent.spawn(feeder, q) tasks = [] for x in range(0, CONCURRENT_GROUPS): #print "spawning %i" % x tasks += [gevent.spawn(poolworker, q)] q.join() gevent.joinall(tasks) logging.info("Finished.")
def spider(start_url, max_depth=1, no_of_workers=10, page_fn=check_page_for_profanities): """ Concurrently spider the web, starting from web page, executing page_fn on each page. start_url specifies the document the spider starts from. max_depth specifies the maximum link depth from the start_url that processing will occur. no_of_workers specifies how many concurrent workers process the job queue. page_fn is a function that takes BeautifulSoup parsed html and a url and processes them as required """ seen_urls = set((start_url,)) job_queue = JoinableQueue() job_queue.put((start_url, max_depth)) for i in range(no_of_workers): gevent.spawn(job_worker, job_queue, seen_urls, page_fn) job_queue.join()
def handle(): #The expected format is: #ciclo edad sexo nforma prov aoi factorel csv_path = sys.argv[1] queue = JoinableQueue() event = Event() greenlets = [] for i in range(90): gv = gevent.spawn(worker, queue, event) greenlets.append(gv) with io.open(csv_path, 'r') as f: for line in f: queue.put(line) queue.join() event.set() gevent.joinall(greenlets)
def recursive_crawl(url): all_urls = set() processed_urls = set() task_queue = JoinableQueue() def add_to_all(url): if url not in all_urls: print("Record url {}".format(url)) all_urls.add(url) task_queue.put_nowait(url) # Start workers workers = [] for i in xrange(10): workers.append(gevent.spawn(url_worker, i, processed_urls, add_to_all, task_queue)) print("workers", len(workers)) task_queue.join() print("Processed", len(processed_urls), "All", len(all_urls)) print("Total latency", demo_helpers.TOTAL_LATENCY)
class TaskList: def __init__(self): self.queue = JoinableQueue() self.all_tasks = {} def add_task(self, task): self.all_tasks[task.get_id()] = task self.queue.put(task) def get_queue(self): return self.queue def join(self, timeout=None): return self.queue.join(timeout)
class MassGet(FastGet): def __init__(self, urls, dic, threads=10, report_db=False, keepalive=None, each_threads=10): self.dic = dic self.report_db = report_db self.table = None if report_db: self.sql_conn(report_db) self.keepalive = keepalive self.each_threads = each_threads self.queue = JoinableQueue() [self.queue.put(x.strip()) for x in urls] [spawn(self.worker) for _ in xrange(threads)] self.queue.join() def worker(self): while not self.queue.empty(): url = self.queue.get() try: FastGet(url, self.dic, self.each_threads, self.report_db, self.keepalive, self.table) except Exception as e: logging.error('Worker global exception for %s: %s' % (url, e)) finally: self.queue.task_done()
def handle(): connection = create_postgresql_connection() cursor = connection.cursor() cursor.execute("BEGIN;") cursor.execute("DELETE FROM core_ratequery;") cursor.execute("COMMIT;") cursor.close() queue = JoinableQueue() event = Event() age_ids = age_map(connection).values() + [None] sex_ids = sex_map(connection).values() + [None] education_ids = education_map(connection).values() + [None] province_ids = province_map(connection).values() + [None] cursor = connection.cursor() cursor.execute("SELECT DISTINCT cycle FROM core_microdata;") cycles = [row[0] for row in cursor] cursor.close() greenlets = [] for i in range(50): gv = gevent.spawn(worker, queue, event) greenlets.append(gv) combs = itertools.product(age_ids, sex_ids, province_ids, education_ids, cycles) for c in combs: queue.put(c) queue.join() event.set() gevent.joinall(greenlets)
else: sleep(5) if __name__ == '__main__': t_status = spawn_link_exception(status_thread) t_item_queue = spawn_link_exception(add_to_item_queue) for i in range(80): spawn_link_exception(run_find_item) #t_index_items = spawn_link_exception(index_items) for i in range(8): spawn_link_exception(run_solr_queue, i) #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr]) sleep(1) print('join item_queue thread') t_item_queue.join() print('item_queue thread complete') #print 'join item_and_host_queue:', item_and_host_queue.qsize() #item_and_host_queue.join() #print 'item_and_host_queue complete' for host, host_queue in host_queues.items(): qsize = host_queue.qsize() print('host:', host, qsize) host_queue.join() print('join solr_queue:', solr_queue.qsize()) solr_queue.join() print('solr_queue complete')
class FastGet: def __init__(self, url, dic, threads=100, report_db=False, keepalive=None, table_name=None): self.url = url parts = urlparse(url) self.scheme, self.host, self.port = parts.scheme, parts.hostname, parts.port if not self.port: self.port = 443 if self.scheme == 'https' else 80 self.keepalive = keepalive try: instance = HehReq(self.host, int(self.port), self.scheme, self.keepalive) except Exception as e: logging.error('Init exception for %s: %s' % (self.url, e)) return if not keepalive: self.keepalive = instance.detect_keepalive() if self.keepalive == 0: logging.error('Keep-Alive value for %s appears to be 0, check the connection' % url) return logging.warning('Calculated Keep-Alive for %s: %s' % (url, self.keepalive)) self.report_db = report_db if report_db: self.table = table_name self.sql_conn(report_db) self.queue = JoinableQueue() [self.queue.put(dic[i:i + self.keepalive]) for i in xrange(0, len(dic), self.keepalive)] [spawn(self.worker) for _ in xrange(threads)] self.queue.join() def sql_conn(self, report_db): self.conn = MySQLdb.connect(report_db['host'], report_db['user'], report_db['passwd'], report_db['db']) self.cur = self.conn.cursor() if not self.table: self.table = 'scan_%s' % datetime.strftime(datetime.now(), '%Y_%m_%d_%H%M%S') self.cur.execute( 'create table %s(scheme varchar(16), host varchar(128), port smallint, uri varchar(128),\ code smallint, size int, type varchar(128))' % self.table) def report(self, result): if result[1] not in [302, 404]: logging.warning('Path %s://%s:%s/%s, response code %s, content-length %s, content-type %s' % ( self.scheme, self.host, self.port, result[0], result[1], result[2], result[3])) if self.report_db: p = [self.scheme, self.host, self.port] + list(result) self.cur.execute('insert into %s values(%%s,%%s,%%s,%%s,%%s,%%s,%%s)' % self.table, p) def worker(self): try: instance = HehReq(self.host, int(self.port), self.scheme, self.keepalive) except Exception as e: logging.error('Worker init exception for %s: %s' % (self.url, e)) return while not self.queue.empty(): paths = self.queue.get() try: for x in instance.bulk_get(paths): self.report(x) except Exception as e: logging.error('Worker loop exception for %s: %s' % (self.url, e)) finally: if self.report_db: self.conn.commit() self.queue.task_done()
class Importer(object): def __init__(self, creds, pool_size=POOL_SIZE): self.client = get_session(creds['host'], creds['key'], creds['secret']) self.queue = JoinableQueue(maxsize=POOL_SIZE*2) for i in range(pool_size): gevent.spawn(self.worker) def worker(self): while True: job = self.queue.get() typ = job.get('type') try: if typ == 'device': self._process_device(job['data']) elif typ == 'datapoints': self._process_datapoints(job['data']) finally: self.queue.task_done() def write_devices(self, devices): for device in devices: self.queue.put({'type': 'device', 'data': device}) self.queue.join() def write_datapoints_from_file(self, infile): points = {} lineno = 0 for line in infile: lineno += 1 (device, sensor, ts, val) = line.split('\t') pts = points.setdefault(device, {}).setdefault(sensor, []) pts.append({'t': ts, 'v': float(val)}) if lineno % 1000 == 0: self.queue.put({'type': 'datapoints', 'data': points}) points = {} if points: self.queue.put({'type': 'datapoints', 'data': points}) self.queue.join() def _process_device(self, device, retries=5): res = self.client.create_device(device) if res.successful != tempoiq.response.SUCCESS: if 'A device with that key already exists' in res.body: print("Skipping creating existing device {}" .format(device['key'])) return if retries > 0: print("Retrying device create {}, error {}" .format(device['key'], res.body)) self._process_device(device, retries - 1) else: print("Retries exceeded; couldn't create device {}" .format(device['key'])) def _process_datapoints(self, write_request, retries=5): try: res = self.client.write(write_request) except Exception, e: print("ERROR with request: --->") print(json.dumps(write_request, default=WriteEncoder().default)) raise e if res.successful != tempoiq.response.SUCCESS: if retries > 0: print("Retrying write, error was: {}".format(res.body)) return self._process_datapoints(write_request, retries - 1) else: print("Retries exceeded; lost data!") print(json.dumps(write_request, default=WriteEncoder().default)) return True return False
class Worker(object): # http://www.gevent.org/gevent.wsgi.html # http://toastdriven.com/blog/2011/jul/31/gevent-long-polling-you/ # http://blog.pythonisito.com/2012/07/gevent-and-greenlets.html DEFAULT_PORT = "9311" def __init__(self, port=DEFAULT_PORT): # REST services monkey.patch_all() signal(SIGQUIT, shutdown) self.is_config = False self.server = wsgi.WSGIServer(('', int(port)), self._response_handler, log=None) # sharding self.prefix = None self.shard_id = None self.ring = None # concurrency based on message passing / barrier pattern self._task_event = None self._task_queue = None # UnitOfWork self._uow = None def shard_start(self): """start the worker service for this shard""" self.server.serve_forever() def shard_stop(self, *args, **kwargs): """stop the worker service for this shard""" payload = args[0] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): logging.info( "worker service stopping... you can safely ignore any exceptions that follow" ) self.server.stop() else: # returns incorrect response in this case, to avoid exception logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"]) ###################################################################### ## authentication methods def auth_request(self, payload, start_response, body): """test the authentication credentials for a REST call""" if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): return True else: # UoW caller did not provide correct credentials to access shard start_response('403 Forbidden', [('Content-Type', 'text/plain')]) body.put("Forbidden, incorrect credentials for this shard\r\n") body.put(StopIteration) logging.error("incorrect credentials shard %s prefix %s", payload["shard_id"], payload["prefix"]) return False def shard_config(self, *args, **kwargs): """configure the service to run a shard""" payload, start_response, body = self.get_response_context(args) if self.is_config: # hey, somebody call security... start_response('403 Forbidden', [('Content-Type', 'text/plain')]) body.put("Forbidden, shard is already in a configured state\r\n") body.put(StopIteration) logging.warning("denied configuring shard %s prefix %s", self.shard_id, self.prefix) else: self.is_config = True self.prefix = payload["prefix"] self.shard_id = payload["shard_id"] # dependency injection for UnitOfWork uow_name = payload["uow_name"] logging.info("initializing unit of work based on %s", uow_name) ff = instantiate_class(uow_name) self._uow = ff.instantiate_uow(uow_name, self.prefix) start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) logging.info("configuring shard %s prefix %s", self.shard_id, self.prefix) ###################################################################### ## barrier pattern methods @contextmanager def wrap_task_event(self): """initialize a gevent.Event, to which the UnitOfWork will wait as a listener""" self._task_event = Event() yield # complete the Event, notifying the UnitOfWork which waited self._task_event.set() self._task_event = None def _consume_task_queue(self): """consume/serve requests until the task_queue empties""" while True: payload = self._task_queue.get() try: self._uow.perform_task(payload) finally: self._task_queue.task_done() def prep_task_queue(self): """prepare task_queue for another set of distributed tasks""" self._task_queue = JoinableQueue() spawn(self._consume_task_queue) def put_task_queue(self, payload): """put the given task definition into the task_queue""" self._task_queue.put_nowait(payload) def queue_wait(self, *args, **kwargs): """wait until all shards finished sending task_queue requests""" payload, start_response, body = self.get_response_context(args) if self.auth_request(payload, start_response, body): if self._task_event: self._task_event.wait() # HTTP response first, then initiate long-running task start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) def queue_join(self, *args, **kwargs): """join on the task_queue, as a barrier to wait until it empties""" payload, start_response, body = self.get_response_context(args) if self.auth_request(payload, start_response, body): start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("join queue...\r\n") ## NB: TODO this step of emptying out the task_queue on ## shards could take a while on a large run... perhaps use ## a long-polling HTTP request or websocket instead? self._task_queue.join() body.put("done\r\n") body.put(StopIteration) ###################################################################### ## hash ring methods def ring_init(self, *args, **kwargs): """initialize the HashRing""" payload, start_response, body = self.get_response_context(args) if self.auth_request(payload, start_response, body): self.ring = payload["ring"] start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) logging.info("setting hash ring %s", self.ring) ###################################################################### ## WSGI handler for REST endpoints def get_response_context(self, args): """decode the WSGI response context from the Greenlet args""" env = args[0] msg = env["wsgi.input"].read() payload = loads(msg) start_response = args[1] body = args[2] return payload, start_response, body def _response_handler(self, env, start_response): """handle HTTP request/response""" uri_path = env["PATH_INFO"] body = JoinableQueue() if self._uow and self._uow.handle_endpoints(self, uri_path, env, start_response, body): pass ########################################## # Worker endpoints elif uri_path == '/shard/config': # configure the service to run a shard Greenlet(self.shard_config, env, start_response, body).start() elif uri_path == '/shard/stop': # shutdown the service ## NB: must parse POST data specially, to avoid exception payload = loads(env["wsgi.input"].read()) Greenlet(self.shard_stop, payload).start_later(1) # HTTP response starts first, to avoid error after server stops start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Goodbye\r\n") body.put(StopIteration) elif uri_path == '/queue/wait': # wait until all shards have finished sending task_queue requests Greenlet(self.queue_wait, env, start_response, body).start() elif uri_path == '/queue/join': # join on the task_queue, as a barrier to wait until it empties Greenlet(self.queue_join, env, start_response, body).start() elif uri_path == '/check/persist': ## NB: TODO checkpoint the service state to durable storage start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) elif uri_path == '/check/recover': ## NB: TODO restart the service, recovering from most recent checkpoint start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # HashRing endpoints elif uri_path == '/ring/init': # initialize the HashRing Greenlet(self.ring_init, env, start_response, body).start() elif uri_path == '/ring/add': ## NB: TODO add a node to the HashRing start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) elif uri_path == '/ring/del': ## NB: TODO delete a node from the HashRing start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # utility endpoints elif uri_path == '/': # dump info about the service in general start_response('200 OK', [('Content-Type', 'text/plain')]) body.put(str(env) + "\r\n") body.put(StopIteration) else: # ne znayu start_response('404 Not Found', [('Content-Type', 'text/plain')]) body.put('Not Found\r\n') body.put(StopIteration) return body
class Worker (object): # http://www.gevent.org/gevent.wsgi.html # http://toastdriven.com/blog/2011/jul/31/gevent-long-polling-you/ # http://blog.pythonisito.com/2012/07/gevent-and-greenlets.html DEFAULT_PORT = "9311" def __init__ (self, port=DEFAULT_PORT): monkey.patch_all() self.server = wsgi.WSGIServer(('', int(port)), self._response_handler) self.is_config = False self.prefix = None self.shard_id = None self.ring = None self.ff_name = None self.pop = None self.evt = None self.reify_queue = None def start (self): """start the service""" self.server.serve_forever() def stop (self, *args, **kwargs): """stop the service""" payload = args[0] body = args[1] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): logging.info("executor service stopping... you can safely ignore any exceptions that follow") self.server.stop() else: # NB: you have dialed a wrong number! # returns incorrect response in this case, to avoid exception logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"]) def _bad_auth (self, payload, body, start_response): """Framework did not provide the correct credentials to access this shard""" start_response('403 Forbidden', [('Content-Type', 'text/plain')]) body.put('Forbidden\r\n') body.put(StopIteration) logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"]) def reify_consumer (self): """consume/serve reify requests until the queue empties""" while True: payload = self.reify_queue.get() try: key = payload["key"] gen = payload["gen"] feature_set = payload["feature_set"] self.pop.receive_reify(key, gen, feature_set) finally: self.reify_queue.task_done() def shard_config (self, *args, **kwargs): """configure the service to run a shard""" payload = args[0] body = args[1] start_response = args[2] if self.is_config: # somebody contact security... start_response('403 Forbidden', [('Content-Type', 'text/plain')]) body.put("Forbidden, executor already in a configured state\r\n") body.put(StopIteration) logging.warning("denied configuring shard %s prefix %s", self.shard_id, self.prefix) else: self.is_config = True self.prefix = payload["prefix"] self.shard_id = payload["shard_id"] start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) logging.info("configuring shard %s prefix %s", self.shard_id, self.prefix) def ring_init (self, *args, **kwargs): """initialize the HashRing""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.ring = payload["ring"] start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) logging.info("setting hash ring %s", self.ring) else: self._bad_auth(payload, body, start_response) def pop_init (self, *args, **kwargs): """initialize a Population of unique Individuals on this shard""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.ff_name = payload["ff_name"] logging.info("initializing population based on %s", self.ff_name) self.pop = Population(Individual(), self.ff_name, self.prefix) self.pop.set_ring(self.shard_id, self.ring) self.reify_queue = JoinableQueue() spawn(self.reify_consumer) start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response) def pop_gen (self, *args, **kwargs): """create generation 0 of Individuals in this shard of the Population""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.evt = Event() # HTTP response first, then initiate long-running task start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) self.pop.populate(0) self.evt.set() self.evt = None else: self._bad_auth(payload, body, start_response) def pop_wait (self, *args, **kwargs): """wait until all shards finished sending reify requests""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): if self.evt: self.evt.wait() # HTTP response first, then initiate long-running task start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response) def pop_join (self, *args, **kwargs): """join on the reify queue, to wait until it empties""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.reify_queue.join() ## NB: perhaps use a long-polling HTTP request or websocket instead? start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response) def pop_hist (self, *args, **kwargs): """calculate a partial histogram for the fitness distribution""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): start_response('200 OK', [('Content-Type', 'application/json')]) body.put(dumps(self.pop.get_part_hist())) body.put("\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response) def pop_next (self, *args, **kwargs): """iterate N times or until a 'good enough' solution is found""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.evt = Event() # HTTP response first, then initiate long-running task start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) current_gen = payload["current_gen"] fitness_cutoff = payload["fitness_cutoff"] self.pop.next_generation(current_gen, fitness_cutoff) self.evt.set() self.evt = None else: self._bad_auth(payload, body, start_response) def pop_enum (self, *args, **kwargs): """enumerate the Individuals in this shard of the Population""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): fitness_cutoff = payload["fitness_cutoff"] start_response('200 OK', [('Content-Type', 'application/json')]) body.put(dumps(self.pop.enum(fitness_cutoff))) body.put("\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response) def pop_reify (self, *args, **kwargs): """test/add a newly generated Individual into the Population (birth)""" payload = args[0] body = args[1] start_response = args[2] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): self.reify_queue.put_nowait(payload) start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) else: self._bad_auth(payload, body, start_response) def _response_handler (self, env, start_response): """handle HTTP request/response""" uri_path = env['PATH_INFO'] body = Queue() ## NB: these handler cases can be collapsed into a common pattern ## except for config/stop -- later ########################################## # shard lifecycle endpoints if uri_path == '/shard/config': # configure the service to run a shard payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.shard_config, payload, body, start_response) gl.start() elif uri_path == '/shard/persist': # checkpoint the service state to durable storage payload = loads(env['wsgi.input'].read()) print "POST", payload ## TODO start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) elif uri_path == '/shard/recover': # restart the service, recovering from the most recent checkpoint payload = loads(env['wsgi.input'].read()) print "POST", payload ## TODO start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # HashRing endpoints elif uri_path == '/ring/init': # initialize the HashRing payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.ring_init, payload, body, start_response) gl.start() elif uri_path == '/ring/add': # add a node to the HashRing payload = loads(env['wsgi.input'].read()) print "POST", payload ## TODO start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) elif uri_path == '/ring/del': # delete a node from the HashRing payload = loads(env['wsgi.input'].read()) print "POST", payload ## TODO start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # evolution endpoints elif uri_path == '/pop/init': # initialize the Population subset on this shard payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_init, payload, body, start_response) gl.start() elif uri_path == '/pop/gen': # create generation 0 of Individuals in this shard of the Population payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_gen, payload, body, start_response) gl.start() elif uri_path == '/pop/wait': # wait until all shards have finished sending reify requests payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_wait, payload, body, start_response) gl.start() elif uri_path == '/pop/join': # join on the reify queue, to wait until it empties payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_join, payload, body, start_response) gl.start() elif uri_path == '/pop/hist': # calculate a partial histogram for the fitness distribution payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_hist, payload, body, start_response) gl.start() elif uri_path == '/pop/next': # attempt to run another generation payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_next, payload, body, start_response) gl.start() elif uri_path == '/pop/enum': # enumerate the Individuals in this shard of the Population payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_enum, payload, body, start_response) gl.start() elif uri_path == '/pop/reify': # test/add a newly generated Individual into the Population (birth) payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.pop_reify, payload, body, start_response) gl.start() elif uri_path == '/pop/evict': # remove an Individual from the Population (death) payload = loads(env['wsgi.input'].read()) print "POST", payload ## TODO start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # utility endpoints elif uri_path == '/': # dump info about the service in general start_response('200 OK', [('Content-Type', 'text/plain')]) body.put(str(env) + "\r\n") body.put(StopIteration) elif uri_path == '/stop': # shutdown the service payload = loads(env['wsgi.input'].read()) gl = Greenlet(self.stop, payload, body) gl.start_later(1) # HTTP response must start here, to avoid failure when server stops start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Goodbye\r\n") body.put(StopIteration) else: # ne znayu start_response('404 Not Found', [('Content-Type', 'text/plain')]) body.put('Not Found\r\n') body.put(StopIteration) return body
class Migrator: def __init__(self, scheme, create_devices=True, write_data=True, start_date="2000-01-01T00:00:00Z", end_date="2014-12-31T00:00:00Z", pool_size=3): self.scheme = scheme self.create_devices = create_devices self.should_write_data = write_data self.start_date = start_date self.end_date = end_date self.tdb = TDBClient(scheme.db_key, scheme.db_key, scheme.db_secret, base_url=scheme.db_baseurl) iq_endpoint = HTTPEndpoint(scheme.iq_baseurl, scheme.iq_key, scheme.iq_secret) self.tiq = TIQClient(iq_endpoint) self.queue = JoinableQueue() self.lock = Lock() self.dp_count = 0 self.req_count = 0 self.dp_reset = time.time() for i in range(pool_size): gevent.spawn(self.worker) def worker(self): while True: series = self.queue.get() try: self.migrate_series(series) finally: self.queue.task_done() def migrate_all_series(self, start_key="", limit=None): start_time = time.time() (keys, tags, attrs) = self.scheme.identity_series_filter() series_set = self.tdb.list_series(keys, tags, attrs) # Keep our own state of whether we passed the resume point, so we don't # need to assume client and server sort strings the same. found_first_series = False series_count = 0 for series in series_set: if not found_first_series and series.key < start_key: continue else: found_first_series = True if limit and series_count >= limit: print("Reached limit of %d devices, stopping." % (limit)) break if self.scheme.identity_series_client_filter(series): # If the series looks like an identity series, # queue it to be processed by the threadpool self.queue.put(series) series_count += 1 self.queue.join() end_time = time.time() print("Exporting {} devices took {} seconds".format( series_count, end_time - start_time)) def migrate_series(self, series): print(" Beginning to migrate series: %s" % (series.key)) error = False try: if self.create_devices: error = self.create_device(series) if self.should_write_data and not error: error = self.write_data(series) except Exception, e: logging.exception(e) error = True if not error: print("COMPLETED migrating for series %s" % (series.key)) else: print("ERROR migrating series %s" % (series.key))
class HttpScanner(object): def __init__(self, args): """ Initialise HTTP scanner :param args: :return: """ self.args = args self.output = HttpScannerOutput(args) self._init_scan_options() # Reading files self.output.write_log("Reading files and deduplicating.", logging.INFO) self.hosts = self._file_to_list(args.hosts) self.urls = self._file_to_list(args.urls) # self._calc_urls() out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count) if self.args.ports is not None: out += ' %i ports' % len(self.args.ports) self.output.print_and_log(out) if self.args.ports is not None and not self.args.syn: new_hosts = [] for host in self.hosts: for port in self.args.ports: # print(host, port) new_hosts.append(helper.generate_url(host, port)) self.hosts = new_hosts # self._calc_urls() self.output.print_and_log('%i full urls to scan' % self.full_urls_count) # Queue and workers self.hosts_queue = JoinableQueue() self.workers = [] def _file_to_list(self, filename, dedup=True): """ Get list from file :param filename: file to read :return: list of lines """ if not path.exists(filename) or not path.isfile(filename): self.output.print_and_log('File %s not found!' % filename, logging.ERROR) exit(-1) # Preparing lines list lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n')) if len(lines) == 0: self.output.print_and_log('File %s is empty!' % filename, logging.ERROR) exit(-1) return helper.deduplicate(lines) if dedup else lines def _init_scan_options(self): # Session self.session = session() self.session.timeout = self.args.timeout self.session.verify = False # TODO: debug and check # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries)) # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries)) # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request # Max retries adapters.DEFAULT_RETRIES = self.args.max_retries # TOR if self.args.tor: self.output.write_log("TOR usage detected. Making some checks.") self.session.proxies = { 'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050' } url = 'http://ifconfig.me/ip' real_ip, tor_ip = None, None # Ger real IP address try: real_ip = get(url).text.strip() except Exception as exception: self.output.print_and_log( "Couldn't get real IP address. Check yout internet connection.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Get TOR IP address try: tor_ip = self.session.get(url).text.strip() except Exception as exception: self.output.print_and_log( "TOR socks proxy doesn't seem to be working.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Show IP addresses self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip)) if real_ip == tor_ip: self.output.print_and_log( "TOR doesn't work! Stop to be secure.", logging.ERROR) exit(-1) # Proxy if self.args.proxy is not None: self.session.proxies = { "https": self.args.proxy, "http": self.args.proxy } # Auth if self.args.auth is not None: items = self.args.auth.split(':') self.session.auth = (items[0], items[1]) # Cookies self.cookies = {} if self.args.cookies is not None: self.cookies = Cookies.from_request(self.args.cookies) # Cookies from file if self.args.load_cookies is not None: if not path.exists(self.args.load_cookies) or not path.isfile( self.args.load_cookies): self.output.print_and_log( 'Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR) exit(-1) self.cookies = MozillaCookieJar(self.args.load_cookies) self.cookies.load() self.session.cookies = self.cookies # User-Agent self.ua = UserAgent() if self.args.random_agent else None def worker(self, worker_id): self.output.write_log('Worker %i started.' % worker_id) while not self.hosts_queue.empty(): host = self.hosts_queue.get() try: self.scan_host(worker_id, host) finally: self.output.write_log('Worker %i finished.' % worker_id) self.hosts_queue.task_done() def _head_available(self, host): """ Determine if HEAD requests is allowed :param host: :return: """ # Trying to use OPTIONS request try: response = self.session.options(host, headers=self._fill_headers()) o = response.headers[ 'allow'] if 'allow' in response.headers else None if o is not None and o.find('HEAD') != -1: return True except: # TODO: fix pass try: return False if self.session.head( host, headers=self._fill_headers()).status_code == 405 else True except: # TODO: fix return False def scan_host(self, worker_id, host): # check if resolvable ip = helper.url_to_ip(host) if ip is None: self.output.write_log('Could not resolve %s Skipping...' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) return # Check for HEAD host_url = helper.host_to_url(host) head_available = False if self.args.head: head_available = self._head_available(host) if head_available: self.output.write_log('HEAD is supported for %s' % host) errors_count, urls_scanned = 0, 0 for url in self.urls: full_url = urljoin(host_url, url) r = self.scan_url(full_url, head_available) urls_scanned += 1 self.output.urls_scanned += 1 # Output r['worker'] = worker_id self.output.write(**r) if r['exception'] is not None: errors_count += 1 # Skip host on errors if self.args.skip is not None and errors_count == self.args.skip: self.output.write_log( 'Errors limit reached on %s Skipping other urls.' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) - urls_scanned break # cookies bugfix? self.session.cookies.clear() def _fill_headers(self): # Fill UserAgent in headers headers = {} if self.args.user_agent is not None: headers['User-agent'] = self.args.user_agent elif self.args.random_agent: headers['User-agent'] = self.ua.random # Fill Referer in headers if self.args.referer is not None: headers['Referer'] = self.args.referer return headers def _parse_response(self, url, response, exception): res = {'url': url, 'response': response, 'exception': exception} if response is None or exception is not None: res.update({ 'status': -1, 'length': -1, }) return res try: length = int(response.headers['content-length'] ) if 'content-length' in response.headers else len( response.text) except Exception as exception: self.output.write_log( "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)), logging.ERROR) length = 0 res.update({ 'status': response.status_code, 'length': length, }) return res def scan_url(self, url, use_head=False): self.output.write_log('Scanning %s' % url, logging.DEBUG) # Query URL and handle exceptions response, exception = None, None method = 'HEAD' if use_head else 'GET' try: # TODO: add support for user:password in URL response = self.session.request( method, url, headers=self._fill_headers(), allow_redirects=self.args.allow_redirects) except ConnectionError as ex: self.output.write_log('Connection error while quering %s' % url, logging.ERROR) exception = ex except HTTPError as ex: self.output.write_log('HTTP error while quering %s' % url, logging.ERROR) exception = ex except Timeout as ex: self.output.write_log('Timeout while quering %s' % url, logging.ERROR) exception = ex except TooManyRedirects as ex: self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR) exception = ex except Exception as ex: self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR) exception = ex # print('cookies: %s' % self.cookies) print('session.cookies: %s' % self.session.cookies) # self.session.cookies = self.cookies return self._parse_response(url, response, exception) def signal_handler(self): """ Signal hdndler :return: """ # TODO: add saving status via pickle self.output.print_and_log('Signal caught. Stopping...', logging.WARNING) self.stop() exit(signal.SIGINT) def _calc_urls(self): # Calculations self.urls_count = len(self.urls) self.hosts_count = len(self.hosts) self.full_urls_count = len(self.urls) * len(self.hosts) self.output.args.urls_count = self.full_urls_count def start(self): """ Start mulithreaded scan :return: """ # Set signal handler gevent.signal(signal.SIGTERM, self.signal_handler) gevent.signal(signal.SIGINT, self.signal_handler) gevent.signal(signal.SIGQUIT, self.signal_handler) # ICMP scan if self.args.icmp: if geteuid() != 0: self.output.print_and_log( 'To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING) else: self.output.print_and_log('Starting ICMP scan.') self.hosts = helper.icmp_scan(self.hosts, self.args.timeout) self._calc_urls() self.output.print_and_log( 'After ICMP scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # SYN scan if self.args.syn: if self.args.tor or self.args.proxy is not None: self.output.print_and_log( 'SYN scan via tor or proxy is impossible!', logging.WARNING) self.output.print_and_log( 'Stopping to prevent deanonymization!', logging.WARNING) exit(-1) if geteuid() != 0: self.output.print_and_log( 'To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING) else: self.output.print_and_log('Starting SYN scan.') self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout) self._calc_urls() self.output.print_and_log( 'After SYN scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # Check threds count vs hosts count if self.args.threads > self.hosts_count: self.output.write_log( 'Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING) threads_count = self.hosts_count else: threads_count = self.args.threads # Output urls count self.output.args.urls_count = self.full_urls_count # Start workers self.workers = [spawn(self.worker, i) for i in range(threads_count)] # Fill and join queue [self.hosts_queue.put(host) for host in self.hosts] self.hosts_queue.join() def stop(self): """ Stop scan :return: """ # TODO: stop correctly gevent.killall(self.workers)
words = sys.argv[1:] queue_start = JoinableQueue() queue_to_search = JoinableQueue() queue_to_download = JoinableQueue() pbar = ProgressBar(maxval=len(words) * 3).start() for word in words: queue_start.put_nowait(word) CheckWord(queue_start, queue_to_search, pbar).start() SearchWord(queue_to_search, queue_to_download, pbar).start() Downloading(queue_to_download, pbar).start() queue_start.join() queue_to_search.join() queue_to_download.join() pbar.finish() exists = ', '.join(WORDS_STATUS['exists']) not_found = ', '.join(WORDS_STATUS['not_found']) downloaded = ', '.join(WORDS_STATUS['downloaded']) if exists: cprint('Files exists: {0}'.format(exists), 'green') if not_found: cprint('Files not_found: {0}'.format(not_found), 'red')
class GeventConsumer(object): def __init__( self, consumer_config=None, topic=None, parse_func=None, num=8, auto_commit_offset=False, is_debug=False, ): if not parse_func: raise Exception("not parse func, system exit") self.parse = parse_func self.queue = Queue(100) self.stop_flag = Event() self.num = num self.debug = is_debug if not self.debug: self.auto_commit_offset = auto_commit_offset if isinstance(consumer_config, dict): consumer_config.update({'enable.auto.commit':self.auto_commit_offset}) self.consumer = Consumer(consumer_config) self.topic = topic self.consumer.subscribe(self.topic) def sign_handler(self, sig, frame): print(" >>> Termination_signal:[{}] to stop".format(sig)) self.stop_flag.set() def kafka_to_queue(self): logger.info("Start Producer thread") m = 0 time_diff = 0 start_time = time.time() while not self.stop_flag.is_set(): msg = self.consumer.poll(1) if msg is None: time.sleep(0.001) return err = msg.error() if err: if err.code() == KafkaError._PARTITION_EOF: logger.debug( '%s [%s] reached end at offset %s', msg.topic(), msg.partition(), msg.offset() ) else: logger.error('kafka failed, system exit') self.stop_flag.set() self.queue.put(msg) # 消费速度统计 m += 1 current_time = time.time() time_diff = current_time - start_time if time_diff > 10: rate = m / time_diff start_time = current_time m = 0 logger.info('consumer_rate:[%.2f]p/s, queue_size:[%d]' % (rate, self.queue.qsize())) logger.info("Producer thread has stopped") def consume(self): logger.info('Start Thread To Consumer') data = dict() stop = False while True: stop = self.stop_flag.is_set() if stop and self.queue.empty(): break msg = self.queue.get() try: data = self.parse(msg.value()) if data: self.handle_data(data, stop) finally: self.queue.task_done() if not stop and not self.auto_commit_offset: self.consumer.commit(msg) logger.info('Thread Consumer has stopped') def handle_data(self, data, stop): raise NotImplementedError def consume_forever(self): """ start consume forever """ signal(SIGTERM, self.sign_handler) signal(SIGINT, self.sign_handler) if self.debug: consume_func = self.mock_consume produce_func = self.mock_kafka else: consume_func = self.consume produce_func = self.kafka_to_queue task_list = [] for _ in range(self.num): task_list.append(gevent.spawn(consume_func)) produce_func() self.queue.join() if not self.debug: logger.info("closing kafka...") self.consumer.close() gevent.joinall(task_list, timeout=5) logger.info('Exiting with qsize:%d' % self.queue.qsize()) # ===========mock kafka and consumer======================= def mock_kafka(self): logger.info("Start Producer thread") m = 0 time_diff = 0 start_time = time.time() # jing5 msg msg = "23230254455354325631393046433232323232320101008e14080b0e0c38426e0101008422551354455354325631393046433232323232323131313131313131313131313131313131313131313131313131313131313131313130010000000002803365818a91eb00010002fffe050018fffe2eeb596f50830005e91efd02649c6b7eb1ac0d80000043c497fd0022f90a3d057b2403032581373635343332310082e99f008a06".decode('hex') while not self.stop_flag.is_set(): self.queue.put(msg) m += 1 # 消费速度统计 current_time = time.time() time_diff = current_time - start_time if time_diff > 5: rate = m / time_diff start_time = current_time m = 0 logger.info('consumer_rate:[%.2f]p/s, queue_size:[%d]' % (rate, self.queue.qsize())) logger.info("closing produce...") logger.info("Producer thread has stopped") def mock_consume(self): logger.info('Start Thread To Consumer') data = dict() stop = False while True: stop = self.stop_flag.is_set() if stop and self.queue.empty(): break msg = self.queue.get() try: data = self.parse(msg) self.handle_data(data, stop) except Exception as err: logger.error("consumer:{}".format(getcurrent())) finally: self.queue.task_done() logger.info('Thread Consumer has stopped')
class ConcurrentBase(object): """ Provides the following useful methods to its inheriting classes: + _debug() + _notify() + _put() + finish() """ def __init__(self, monitor, workers=1): self.klass = type(self) self.klass_name = self.klass.__name__ self.FINISHED_PROCESSING = '{0}: finished processing'.format(self.klass_name) self._monitor = monitor self._workers_to_start = workers self._read_commands_q, self._write_commands_q = None, None self._setup_command_system() gevent.sleep(0) def _debug(self, msg, debug_level=None): self._monitor.debug('{0}: {1}'.format(self.klass_name, msg), debug_level) def finish(self): self._prevent_new_requests_from_being_processed() gevent.spawn(self._wait_for_processing_to_finish) gevent.sleep(0) def _notify(self, notification_msg): self._monitor.notify(self.klass, notification_msg) def _prevent_new_requests_from_being_processed(self): # don't accept new commands after receiving a finish command self._write_commands_q = ThrowawayCommandsQueue() def _process_commands(self): while True: try: ## do arbitrary command func, args = self._read_commands_q.get() func(args) finally: self._read_commands_q.task_done() def _put(self, method, args): ## tell some worker to do arbitrary command self._write_commands_q.put((method, args)) gevent.sleep(0) def _setup_command_system(self): # we have two refs to the commands queue, # but write_commands_q will switch to throwaway # after we receive a finish command self._read_commands_q = JoinableQueue(None) self._write_commands_q = self._read_commands_q for x in range(self._workers_to_start): gevent.spawn(self._process_commands) def _wait_for_processing_to_finish(self): self._read_commands_q.join() self._monitor.notify(self.klass, self.FINISHED_PROCESSING)
class Migrator: def __init__(self, scheme, create_devices=True, write_data=True, start_date="2000-01-01T00:00:00Z", end_date="2014-12-31T00:00:00Z", pool_size=3): self.scheme = scheme self.create_devices = create_devices self.should_write_data = write_data self.start_date = start_date self.end_date = end_date self.tdb = TDBClient(scheme.db_key, scheme.db_key, scheme.db_secret, base_url=scheme.db_baseurl) iq_endpoint = HTTPEndpoint(scheme.iq_baseurl, scheme.iq_key, scheme.iq_secret) self.tiq = TIQClient(iq_endpoint) self.queue = JoinableQueue() self.lock = Lock() self.dp_count = 0 self.req_count = 0 self.dp_reset = time.time() for i in range(pool_size): gevent.spawn(self.worker) def worker(self): while True: series = self.queue.get() try: self.migrate_series(series) finally: self.queue.task_done() def migrate_all_series(self, start_key="", limit=None): start_time = time.time() (keys, tags, attrs) = self.scheme.identity_series_filter() series_set = self.tdb.list_series(keys, tags, attrs) # Keep our own state of whether we passed the resume point, so we don't # need to assume client and server sort strings the same. found_first_series = False series_count = 0 for series in series_set: if not found_first_series and series.key < start_key: continue else: found_first_series = True if limit and series_count >= limit: print("Reached limit of %d devices, stopping." % (limit)) break if self.scheme.identity_series_client_filter(series): # If the series looks like an identity series, # queue it to be processed by the threadpool self.queue.put(series) series_count += 1 self.queue.join() end_time = time.time() print("Exporting {} devices took {} seconds".format(series_count, end_time - start_time)) def migrate_series(self, series): print(" Beginning to migrate series: %s" % (series.key)) error = False try: if self.create_devices: error = self.create_device(series) if self.should_write_data and not error: error = self.write_data(series) except Exception, e: logging.exception(e) error = True if not error: print("COMPLETED migrating for series %s" % (series.key)) else: print("ERROR migrating series %s" % (series.key))
class InterceptedStreamsMixin(object): """ Mixin class for GethProcess instances that feeds all of the stdout and stderr lines into some set of provided callback functions. """ stdout_callbacks = None stderr_callbacks = None def __init__(self, *args, **kwargs): super(InterceptedStreamsMixin, self).__init__(*args, **kwargs) self.stdout_callbacks = [] self.stdout_queue = JoinableQueue() self.stderr_callbacks = [] self.stderr_queue = JoinableQueue() def register_stdout_callback(self, callback_fn): self.stdout_callbacks.append(callback_fn) def register_stderr_callback(self, callback_fn): self.stderr_callbacks.append(callback_fn) def produce_stdout_queue(self): for line in iter(self.proc.stdout.readline, b''): self.stdout_queue.put(line) gevent.sleep(0) def produce_stderr_queue(self): for line in iter(self.proc.stderr.readline, b''): self.stderr_queue.put(line) gevent.sleep(0) def consume_stdout_queue(self): while True: line = self.stdout_queue.get() for fn in self.stdout_callbacks: fn(line.strip()) gevent.sleep(0) def consume_stderr_queue(self): while True: line = self.stderr_queue.get() for fn in self.stderr_callbacks: fn(line.strip()) gevent.sleep(0) def start(self): super(InterceptedStreamsMixin, self).start() gevent.spawn(self.produce_stdout_queue) gevent.spawn(self.produce_stderr_queue) gevent.spawn(self.consume_stdout_queue) gevent.spawn(self.consume_stderr_queue) def stop(self): super(InterceptedStreamsMixin, self).stop() try: self.stdout_queue.join(5) except Timeout: pass try: self.stderr_queue.join(5) except Timeout: pass
log.error("database initialization error: %s" % (e, )) exit() #--------------------------------- # module initialization #--------------------------------- try: #the update manager maintains a queue of messages to be sent to connected clients. um = UpdateManager() pm = ProcessingModule(datadb, resourcedb, um) im = InstallationModule(RESOURCE_NAME, RESOURCE_URI, datadb) gevent.signal(signal.SIGQUIT, gevent.shutdown) gevent.spawn(worker) pqueue.join() log.info("module initialization completed... [SUCCESS]") except Exception, e: log.error("module initialization error: %s" % (e, )) #--------------------------------- # Web Server initialization #--------------------------------- try: debug(True) run(host=HOST, port=PORT, server='gevent') except Exception, e: log.error("Web Server Exception: %s" % (e, )) exit()
#arbitrage_dg = async_result_opportunity.get() #print 'transactionTracking() received arbitrage polygon:', arbitrage_dg, '\n' print ' Ready to begin transaction....' # Input Tracking while 1: gevent.joinall([ gevent.spawn(ratesTracking), gevent.spawn(arbiterTracking), #gevent.spawn(transactionTracking), ]) stdscr.addstr(0, 1, 'Arbitrage Program created by Glen Baker', curses.A_REVERSE) stdscr.addstr(1, 1, "- Press any key to continue, 'q' to quit") c = stdscr.getch() if c == ord('q'): # Shutdown Curses UI curses.nocbreak(); stdscr.keypad(0); curses.echo() curses.endwin() rates_q = JoinableQueue() # Clear Queue break #if c == ord('r'): # if rates_tracking: # rates_tracking = False # else: # rates_tracking = True # gevent.spawn(ratesTracking) rates_q.join()
class BaseLogger(Collected,Jobber): """\ This class implements one particular way to log things. """ storage = Loggers.storage q = None job = None ready = False _in_flush = False def __init__(self, level): self.level = level global logger_nr logger_nr += 1 if not hasattr(self,"name") or self.name is None: self.name = Name(self.__class__.__name__, "x"+str(logger_nr)) super(BaseLogger,self).__init__() self._init() def _init(self): """Fork off the writer thread. Override this to do nothing if you don't have one.""" self.q = JoinableQueue(100) self.start_job("job",self._writer) self.job.link(self.delete) if self.ready is False: self.ready = True else: self.stop_job("job") # concurrency issues? def _writer(self): errs = 0 for r in self.q: try: if r is FlushMe: self._flush() else: self._log(*r) except Exception as ex: errs += 1 fix_exception(ex) from moat.run import process_failure process_failure(ex) if errs > 10: reraise(ex) else: if errs: errs -= 1 finally: self.q.task_done() self.q.task_done() # for the StopIter # Collection stuff def list(self): yield super(BaseLogger,self) yield ("Type",self.__class__.__name__) yield ("Level",LogNames[self.level]) yield ("Queue",self.q.qsize()) def info(self): return LogNames[self.level]+": "+self.__class__.__name__ def delete(self, ctx=None): if self.ready: self.ready = None super(BaseLogger,self).delete(ctx) try: if self.q: self.q.put(StopIteration,block=False) except Full: ## panic? pass if self.job is not None: self.job.join(timeout=1) self.stop_job("job") def _wlog(self, *a): try: self.q.put(a, block=False) except Full: ## panic? self.delete() def _log(self, level, *a): a=" ".join(( x if isinstance(x,six.string_types) else str(x) for x in a)) self._slog(level,a) def _slog(self, a): raise NotImplementedError("You need to override %s._log or ._slog" % (self.__class__.__name__,)) def _flush(self): pass def log(self, level, *a): if LogLevels[level] >= self.level: self._wlog(level,*a) if TESTING and not (hasattr(a[0],"startswith") and a[0].startswith("TEST")): self.flush() else: gevent.sleep(0) def log_event(self, event, level): if level >= self.level: for r in report_(event,99): self._wlog(LogNames[level],r) if TESTING: self.flush() def log_failure(self, err, level=WARN): if level >= self.level: self._wlog(LogNames[level],format_exception(err)) if TESTING: self.flush() def flush(self): if self._in_flush: return if self.q is not None: try: self._in_flush = True self.q.put(FlushMe) self.q.join() finally: self._in_flush = False def end_logging(self): self.flush() self.delete()
class ConcurrentBase(object): """ Provides the following useful methods to its inheriting classes: + _debug() + _notify() + _put() + finish() """ def __init__(self, monitor, workers=1): self.klass = type(self) self.klass_name = self.klass.__name__ self.FINISHED_PROCESSING = '{0}: finished processing'.format( self.klass_name) self._monitor = monitor self._workers_to_start = workers self._setup_command_system() gevent.sleep(0) def _debug(self, msg, debug_level=None): self._monitor.debug('{0}: {1}'.format(self.klass_name, msg), debug_level) def finish(self): self._prevent_new_requests_from_being_processed() gevent.spawn(self._wait_for_processing_to_finish) gevent.sleep(0) def _notify(self, notification_msg): self._monitor.notify(self.klass, notification_msg) def _prevent_new_requests_from_being_processed(self): # don't accept new commands after receiving a finish command self._write_commands_q = ThrowawayCommandsQueue() def _process_commands(self): while True: try: ## do arbitrary command func, args = self._read_commands_q.get() func(args) finally: self._read_commands_q.task_done() def _put(self, method, args): ## tell some worker to do arbitrary command self._write_commands_q.put((method, args)) gevent.sleep(0) def _setup_command_system(self): # we have two refs to the commands queue, # but write_commands_q will switch to throwaway # after we receive a finish command self._read_commands_q = JoinableQueue(None) self._write_commands_q = self._read_commands_q for x in range(self._workers_to_start): gevent.spawn(self._process_commands) def _wait_for_processing_to_finish(self): self._read_commands_q.join() self._monitor.notify(self.klass, self.FINISHED_PROCESSING)
def start_fluud(): parser = argparse.ArgumentParser() parser.add_argument('host', help='mongo host') parser.add_argument('port', help='mongo port') parser.add_argument('--login', help='mongo login') parser.add_argument('--password', help='mongo password') args = parser.parse_args() if args.login and args.password: login = urllib.quote_plus(args.login) password = urllib.quote_plus(args.password) uri = 'mongodb://{}:{}@{}:{}/'.format(login, password, args.host, args.port) else: uri = 'mongodb://{}:{}/'.format(args.host, args.port) client = MongoClient(uri) template = { "first_sample_timestamp": dateutil.parser.parse("2015-09-02T13:08:20.314Z"), "last_sample_timestamp": dateutil.parser.parse("2015-09-02T13:08:20.314Z"), "metadata": { "typeURI": "http://schemas.dmtf.org/cloud/audit/1.0/event", "initiator": { "typeURI": "service/security/account/user", "host": { "address": "192.168.0.2" }, "id": "openstack:610e7d74-16af-4358-9b77-5275194fa6e4", "name": "8b07b49216d243d2b49561759bd104f4" }, "target": { "typeURI": "service/security/account/user", "id": "openstack:fc43ddcf-d147-466c-adfe-d60bd2b773ba" }, "observer": { "typeURI": "service/security", "id": "openstack:a256def4-0a36-472e-95e5-e456db4e0681" }, "eventType": "activity", "eventTime": "2015-09-02T13:08:20.256770+0000", "host": "identity.node-1", "action": "authenticate", "outcome": "success", "id": "openstack:00244b9a-1a43-48a5-b75e-9d68dd647487", "event_type": "identity.authenticate" }, "meter": [ { "counter_name": "identity.authenticate.success", "counter_unit": "user", "counter_type": "delta" } ], "project_id": None, "source": "openstack", "user_id": "openstack:610e7d74-16af-4358-9b77-5275194fa6e4" } data = [copy.deepcopy(template) for _ in range(10000)] def progress(): while True: print client.ceilometer.resource.count() sys.stdout.flush() sleep(2) spawn(progress) def worker(): while True: q.get() try: client.ceilometer.resource.insert_many(copy.deepcopy(data), False) finally: q.task_done() q = JoinableQueue() for i in range(10): spawn(worker) for i in range(100): q.put(0) q.join()
class Worker (object): # http://www.gevent.org/gevent.wsgi.html # http://toastdriven.com/blog/2011/jul/31/gevent-long-polling-you/ # http://blog.pythonisito.com/2012/07/gevent-and-greenlets.html DEFAULT_PORT = "9311" def __init__ (self, port=DEFAULT_PORT): # REST services monkey.patch_all() signal(SIGQUIT, shutdown) self.is_config = False self.server = wsgi.WSGIServer(('', int(port)), self._response_handler, log=None) # sharding self.prefix = None self.shard_id = None self.ring = None # concurrency based on message passing / barrier pattern self._task_event = None self._task_queue = None # UnitOfWork self._uow = None def shard_start (self): """start the worker service for this shard""" self.server.serve_forever() def shard_stop (self, *args, **kwargs): """stop the worker service for this shard""" payload = args[0] if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): logging.info("worker service stopping... you can safely ignore any exceptions that follow") self.server.stop() else: # returns incorrect response in this case, to avoid exception logging.error("incorrect shard %s prefix %s", payload["shard_id"], payload["prefix"]) ###################################################################### ## authentication methods def auth_request (self, payload, start_response, body): """test the authentication credentials for a REST call""" if (self.prefix == payload["prefix"]) and (self.shard_id == payload["shard_id"]): return True else: # UoW caller did not provide correct credentials to access shard start_response('403 Forbidden', [('Content-Type', 'text/plain')]) body.put("Forbidden, incorrect credentials for this shard\r\n") body.put(StopIteration) logging.error("incorrect credentials shard %s prefix %s", payload["shard_id"], payload["prefix"]) return False def shard_config (self, *args, **kwargs): """configure the service to run a shard""" payload, start_response, body = self.get_response_context(args) if self.is_config: # hey, somebody call security... start_response('403 Forbidden', [('Content-Type', 'text/plain')]) body.put("Forbidden, shard is already in a configured state\r\n") body.put(StopIteration) logging.warning("denied configuring shard %s prefix %s", self.shard_id, self.prefix) else: self.is_config = True self.prefix = payload["prefix"] self.shard_id = payload["shard_id"] # dependency injection for UnitOfWork uow_name = payload["uow_name"] logging.info("initializing unit of work based on %s", uow_name) ff = instantiate_class(uow_name) self._uow = ff.instantiate_uow(uow_name, self.prefix) start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) logging.info("configuring shard %s prefix %s", self.shard_id, self.prefix) ###################################################################### ## barrier pattern methods @contextmanager def wrap_task_event (self): """initialize a gevent.Event, to which the UnitOfWork will wait as a listener""" self._task_event = Event() yield # complete the Event, notifying the UnitOfWork which waited self._task_event.set() self._task_event = None def _consume_task_queue (self): """consume/serve requests until the task_queue empties""" while True: payload = self._task_queue.get() try: self._uow.perform_task(payload) finally: self._task_queue.task_done() def prep_task_queue (self): """prepare task_queue for another set of distributed tasks""" self._task_queue = JoinableQueue() spawn(self._consume_task_queue) def put_task_queue (self, payload): """put the given task definition into the task_queue""" self._task_queue.put_nowait(payload) def queue_wait (self, *args, **kwargs): """wait until all shards finished sending task_queue requests""" payload, start_response, body = self.get_response_context(args) if self.auth_request(payload, start_response, body): if self._task_event: self._task_event.wait() # HTTP response first, then initiate long-running task start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) def queue_join (self, *args, **kwargs): """join on the task_queue, as a barrier to wait until it empties""" payload, start_response, body = self.get_response_context(args) if self.auth_request(payload, start_response, body): start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("join queue...\r\n") ## NB: TODO this step of emptying out the task_queue on ## shards could take a while on a large run... perhaps use ## a long-polling HTTP request or websocket instead? self._task_queue.join() body.put("done\r\n") body.put(StopIteration) ###################################################################### ## hash ring methods def ring_init (self, *args, **kwargs): """initialize the HashRing""" payload, start_response, body = self.get_response_context(args) if self.auth_request(payload, start_response, body): self.ring = payload["ring"] start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) logging.info("setting hash ring %s", self.ring) ###################################################################### ## WSGI handler for REST endpoints def get_response_context (self, args): """decode the WSGI response context from the Greenlet args""" env = args[0] msg = env["wsgi.input"].read() payload = loads(msg) start_response = args[1] body = args[2] return payload, start_response, body def _response_handler (self, env, start_response): """handle HTTP request/response""" uri_path = env["PATH_INFO"] body = JoinableQueue() if self._uow and self._uow.handle_endpoints(self, uri_path, env, start_response, body): pass ########################################## # Worker endpoints elif uri_path == '/shard/config': # configure the service to run a shard Greenlet(self.shard_config, env, start_response, body).start() elif uri_path == '/shard/stop': # shutdown the service ## NB: must parse POST data specially, to avoid exception payload = loads(env["wsgi.input"].read()) Greenlet(self.shard_stop, payload).start_later(1) # HTTP response starts first, to avoid error after server stops start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Goodbye\r\n") body.put(StopIteration) elif uri_path == '/queue/wait': # wait until all shards have finished sending task_queue requests Greenlet(self.queue_wait, env, start_response, body).start() elif uri_path == '/queue/join': # join on the task_queue, as a barrier to wait until it empties Greenlet(self.queue_join, env, start_response, body).start() elif uri_path == '/check/persist': ## NB: TODO checkpoint the service state to durable storage start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) elif uri_path == '/check/recover': ## NB: TODO restart the service, recovering from most recent checkpoint start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # HashRing endpoints elif uri_path == '/ring/init': # initialize the HashRing Greenlet(self.ring_init, env, start_response, body).start() elif uri_path == '/ring/add': ## NB: TODO add a node to the HashRing start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) elif uri_path == '/ring/del': ## NB: TODO delete a node from the HashRing start_response('200 OK', [('Content-Type', 'text/plain')]) body.put("Bokay\r\n") body.put(StopIteration) ########################################## # utility endpoints elif uri_path == '/': # dump info about the service in general start_response('200 OK', [('Content-Type', 'text/plain')]) body.put(str(env) + "\r\n") body.put(StopIteration) else: # ne znayu start_response('404 Not Found', [('Content-Type', 'text/plain')]) body.put('Not Found\r\n') body.put(StopIteration) return body
class LeakQueue(object): def __init__(self, maxsize=0, workers=10): """ Setup the gevent queue and the workers. :param int maxsize: the max lenght of the queue, default the queue size is infinite. :param int workers: the number of workers, default=10. """ self.queue = JoinableQueue(maxsize=maxsize) [spawn(self.worker) for x in xrange(workers)] def __repr__(self): return u'{} items in queue'.format(self.queue.qsize()) def put(self, operation, item, date=None): """ Each item are queued for a later processing. :param str operation: the operation name. :param item: the item to queued. :param date date: when the item is trigger. :returns: True if insertions succeeds, False otherwise. """ try: self.queue.put({ "operation": operation, "item": item, "date": date or datetime.utcnow() }) self.flush() except Exception as e: logger.critical( 'unable to put an item in the queue :: {}'.format(e)) return False else: return True def flush(self, force=False): """ Flush the queue and block until all tasks are done. :param boolean force: force the queue flushing :returns: True if the flush occurs, False otherwise. """ if self.queue.full() or force: logger.info('queue is full ({} items) :: flush it !'.format( self.queue.qsize())) self.queue.join() return True return False def worker(self): while True: try: item = self.queue.get() logger.info('get item :: {}'.format(item)) if not self.worker_process(item): logger.info('re-queue item :: {}'.format(item)) self.queue.put(item) except Empty: logger.info('queue is empty') else: self.queue.task_done() def worker_process(self, item): """ Default action execute by each worker. Must return a True statement to remove the item, otherwise the worker put the item into the queue. """ g_sleep() return item
class BaseLogger(Collected, Jobber): """\ This class implements one particular way to log things. """ storage = Loggers.storage q = None job = None ready = False _in_flush = False def __init__(self, level): self.level = level global logger_nr logger_nr += 1 if not hasattr(self, "name") or self.name is None: self.name = Name(self.__class__.__name__, "x" + str(logger_nr)) super(BaseLogger, self).__init__() self._init() def _init(self): """Fork off the writer thread. Override this to do nothing if you don't have one.""" self.q = JoinableQueue(100) self.start_job("job", self._writer) self.job.link(self.delete) if self.ready is False: self.ready = True else: self.stop_job("job") # concurrency issues? def _writer(self): errs = 0 for r in self.q: try: if r is FlushMe: self._flush() else: self._log(*r) except Exception as ex: errs += 1 fix_exception(ex) from moat.run import process_failure process_failure(ex) if errs > 10: reraise(ex) else: if errs: errs -= 1 finally: self.q.task_done() self.q.task_done() # for the StopIter # Collection stuff def list(self): yield super(BaseLogger, self) yield ("Type", self.__class__.__name__) yield ("Level", LogNames[self.level]) yield ("Queue", self.q.qsize()) def info(self): return LogNames[self.level] + ": " + self.__class__.__name__ def delete(self, ctx=None): if self.ready: self.ready = None super(BaseLogger, self).delete(ctx) try: if self.q: self.q.put(StopIteration, block=False) except Full: ## panic? pass if self.job is not None: self.job.join(timeout=1) self.stop_job("job") def _wlog(self, *a): try: self.q.put(a, block=False) except Full: ## panic? self.delete() def _log(self, level, *a): a = " ".join( (x if isinstance(x, six.string_types) else str(x) for x in a)) self._slog(level, a) def _slog(self, a): raise NotImplementedError("You need to override %s._log or ._slog" % (self.__class__.__name__, )) def _flush(self): pass def log(self, level, *a): if LogLevels[level] >= self.level: self._wlog(level, *a) if TESTING and not (hasattr(a[0], "startswith") and a[0].startswith("TEST")): self.flush() else: gevent.sleep(0) def log_event(self, event, level): if level >= self.level: for r in report_(event, 99): self._wlog(LogNames[level], r) if TESTING: self.flush() def log_failure(self, err, level=WARN): if level >= self.level: self._wlog(LogNames[level], format_exception(err)) if TESTING: self.flush() def flush(self): if self._in_flush: return if self.q is not None: try: self._in_flush = True self.q.put(FlushMe) self.q.join() finally: self._in_flush = False def end_logging(self): self.flush() self.delete()
sleep(1) else: sleep(5) if __name__ == '__main__': t_status = spawn_link_exception(status_thread) t_item_queue = spawn_link_exception(add_to_item_queue) for i in range(80): spawn_link_exception(run_find_item) #t_index_items = spawn_link_exception(index_items) for i in range(8): spawn_link_exception(run_solr_queue, i) #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr]) sleep(1) print 'join item_queue thread' t_item_queue.join() print 'item_queue thread complete' #print 'join item_and_host_queue:', item_and_host_queue.qsize() #item_and_host_queue.join() #print 'item_and_host_queue complete' for host, host_queue in host_queues.items(): qsize = host_queue.qsize() print 'host:', host, qsize host_queue.join() print 'join solr_queue:', solr_queue.qsize() solr_queue.join() print 'solr_queue complete'
signal.setitimer(signal.ITIMER_REAL,delay + delay * threshold) gevent.sleep(delay) if __name__ == '__main__': if len(sys.argv) < 3: sys.exit('Usage: %s worker_id concurrency' % sys.argv[0]) wd = gevent.spawn(watchdog) worker_id = sys.argv[1] concurrency = int(sys.argv[2]) queue = JoinableQueue(maxsize=concurrency) pool = Pool(concurrency) context = zmq.Context() # Socket to receive ssh hosts on receiver = context.socket(zmq.PULL) #receiver.setsockopt(zmq.RCVHWM, concurrency) receiver.connect("tcp://localhost:5557") # Socket to send uptime results to sender = context.socket(zmq.PUSH) sender.connect("tcp://localhost:5558") ssh_workers = [ pool.spawn(uptime, i, worker_id, queue, sender) for i in xrange(concurrency) ] recv_hosts(receiver, queue) queue.join()
class HttpScanner(object): def __init__(self, args): """ Initialise HTTP scanner :param args: :return: """ self.args = args self.output = HttpScannerOutput(args) self._init_scan_options() # Reading files self.output.write_log("Reading files and deduplicating.", logging.INFO) self.hosts = self._file_to_list(args.hosts) self.urls = self._file_to_list(args.urls) # self._calc_urls() out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count) if self.args.ports is not None: out += ' %i ports' % len(self.args.ports) self.output.print_and_log(out) if self.args.ports is not None and not self.args.syn: new_hosts = [] for host in self.hosts: for port in self.args.ports: # print(host, port) new_hosts.append(helper.generate_url(host, port)) self.hosts = new_hosts # self._calc_urls() self.output.print_and_log('%i full urls to scan' % self.full_urls_count) # Queue and workers self.hosts_queue = JoinableQueue() self.workers = [] def _file_to_list(self, filename, dedup=True): """ Get list from file :param filename: file to read :return: list of lines """ if not path.exists(filename) or not path.isfile(filename): self.output.print_and_log('File %s not found!' % filename, logging.ERROR) exit(-1) # Preparing lines list lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n')) if len(lines) == 0: self.output.print_and_log('File %s is empty!' % filename, logging.ERROR) exit(-1) return helper.deduplicate(lines) if dedup else lines def _init_scan_options(self): # Session self.session = session() self.session.timeout = self.args.timeout self.session.verify = False # TODO: debug and check # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries)) # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries)) # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request # Max retries adapters.DEFAULT_RETRIES = self.args.max_retries # TOR if self.args.tor: self.output.write_log("TOR usage detected. Making some checks.") self.session.proxies = { 'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050' } url = 'http://ifconfig.me/ip' real_ip, tor_ip = None, None # Ger real IP address try: real_ip = get(url).text.strip() except Exception as exception: self.output.print_and_log("Couldn't get real IP address. Check yout internet connection.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Get TOR IP address try: tor_ip = self.session.get(url).text.strip() except Exception as exception: self.output.print_and_log("TOR socks proxy doesn't seem to be working.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Show IP addresses self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip)) if real_ip == tor_ip: self.output.print_and_log("TOR doesn't work! Stop to be secure.", logging.ERROR) exit(-1) # Proxy if self.args.proxy is not None: self.session.proxies = {"https": self.args.proxy, "http": self.args.proxy} # Auth if self.args.auth is not None: items = self.args.auth.split(':') self.session.auth = (items[0], items[1]) # Cookies self.cookies = {} if self.args.cookies is not None: self.cookies = Cookies.from_request(self.args.cookies) # Cookies from file if self.args.load_cookies is not None: if not path.exists(self.args.load_cookies) or not path.isfile(self.args.load_cookies): self.output.print_and_log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR) exit(-1) self.cookies = MozillaCookieJar(self.args.load_cookies) self.cookies.load() self.session.cookies = self.cookies # User-Agent self.ua = UserAgent() if self.args.random_agent else None def worker(self, worker_id): self.output.write_log('Worker %i started.' % worker_id) while not self.hosts_queue.empty(): host = self.hosts_queue.get() try: self.scan_host(worker_id, host) finally: self.output.write_log('Worker %i finished.' % worker_id) self.hosts_queue.task_done() def _head_available(self, host): """ Determine if HEAD requests is allowed :param host: :return: """ # Trying to use OPTIONS request try: response = self.session.options(host, headers=self._fill_headers()) o = response.headers['allow'] if 'allow' in response.headers else None if o is not None and o.find('HEAD') != -1: return True except: # TODO: fix pass try: return False if self.session.head(host, headers=self._fill_headers()).status_code == 405 else True except: # TODO: fix return False def scan_host(self, worker_id, host): # check if resolvable ip = helper.url_to_ip(host) if ip is None: self.output.write_log('Could not resolve %s Skipping...' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) return # Check for HEAD host_url = helper.host_to_url(host) head_available = False if self.args.head: head_available = self._head_available(host) if head_available: self.output.write_log('HEAD is supported for %s' % host) errors_count, urls_scanned = 0, 0 for url in self.urls: full_url = urljoin(host_url, url) r = self.scan_url(full_url, head_available) urls_scanned += 1 self.output.urls_scanned += 1 # Output r['worker'] = worker_id self.output.write(**r) if r['exception'] is not None: errors_count += 1 # Skip host on errors if self.args.skip is not None and errors_count == self.args.skip: self.output.write_log('Errors limit reached on %s Skipping other urls.' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) - urls_scanned break # cookies bugfix? self.session.cookies.clear() def _fill_headers(self): # Fill UserAgent in headers headers = {} if self.args.user_agent is not None: headers['User-agent'] = self.args.user_agent elif self.args.random_agent: headers['User-agent'] = self.ua.random # Fill Referer in headers if self.args.referer is not None: headers['Referer'] = self.args.referer return headers def _parse_response(self, url, response, exception): res = {'url': url, 'response': response, 'exception': exception} if response is None or exception is not None: res.update({ 'status': -1, 'length': -1, }) return res try: length = int(response.headers['content-length']) if 'content-length' in response.headers else len( response.text) except Exception as exception: self.output.write_log( "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)), logging.ERROR) length = 0 res.update({ 'status': response.status_code, 'length': length, }) return res def scan_url(self, url, use_head=False): self.output.write_log('Scanning %s' % url, logging.DEBUG) # Query URL and handle exceptions response, exception = None, None method = 'HEAD' if use_head else 'GET' try: # TODO: add support for user:password in URL response = self.session.request(method, url, headers=self._fill_headers(), allow_redirects=self.args.allow_redirects) except ConnectionError as ex: self.output.write_log('Connection error while quering %s' % url, logging.ERROR) exception = ex except HTTPError as ex: self.output.write_log('HTTP error while quering %s' % url, logging.ERROR) exception = ex except Timeout as ex: self.output.write_log('Timeout while quering %s' % url, logging.ERROR) exception = ex except TooManyRedirects as ex: self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR) exception = ex except Exception as ex: self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR) exception = ex # print('cookies: %s' % self.cookies) print('session.cookies: %s' % self.session.cookies) # self.session.cookies = self.cookies return self._parse_response(url, response, exception) def signal_handler(self): """ Signal hdndler :return: """ # TODO: add saving status via pickle self.output.print_and_log('Signal caught. Stopping...', logging.WARNING) self.stop() exit(signal.SIGINT) def _calc_urls(self): # Calculations self.urls_count = len(self.urls) self.hosts_count = len(self.hosts) self.full_urls_count = len(self.urls) * len(self.hosts) self.output.args.urls_count = self.full_urls_count def start(self): """ Start mulithreaded scan :return: """ # Set signal handler gevent.signal(signal.SIGTERM, self.signal_handler) gevent.signal(signal.SIGINT, self.signal_handler) gevent.signal(signal.SIGQUIT, self.signal_handler) # ICMP scan if self.args.icmp: if geteuid() != 0: self.output.print_and_log('To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING) else: self.output.print_and_log('Starting ICMP scan.') self.hosts = helper.icmp_scan(self.hosts, self.args.timeout) self._calc_urls() self.output.print_and_log('After ICMP scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # SYN scan if self.args.syn: if self.args.tor or self.args.proxy is not None: self.output.print_and_log('SYN scan via tor or proxy is impossible!', logging.WARNING) self.output.print_and_log('Stopping to prevent deanonymization!', logging.WARNING) exit(-1) if geteuid() != 0: self.output.print_and_log('To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING) else: self.output.print_and_log('Starting SYN scan.') self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout) self._calc_urls() self.output.print_and_log('After SYN scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # Check threds count vs hosts count if self.args.threads > self.hosts_count: self.output.write_log('Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING) threads_count = self.hosts_count else: threads_count = self.args.threads # Output urls count self.output.args.urls_count = self.full_urls_count # Start workers self.workers = [spawn(self.worker, i) for i in range(threads_count)] # Fill and join queue [self.hosts_queue.put(host) for host in self.hosts] self.hosts_queue.join() def stop(self): """ Stop scan :return: """ # TODO: stop correctly gevent.killall(self.workers)
class AsynSpiderWithGevent(MySpider): def __init__(self, out=BasicAnalysis(), **kwargs): super(AsynSpiderWithGevent, self).__init__(out, **kwargs) self.q = JoinableQueue() self.fetching, self.fetched = set(), set() def assign_jobs(self, jobs): for job in jobs: self.q.put(job) def run(self): if self.q.empty(): url = LIST_URL + urllib.urlencode(self.list_query) self.q.put(url) for _ in range(CONCURRENCY): gevent.spawn(self.worker) self.q.join() assert self.fetching == self.fetched self._out.finish() def worker(self): while True: self.fetch_url() def fetch_url(self): current_url = self.q.get() try: if current_url in self.fetching: return self.fetching.add(current_url) resp = requests.get(current_url, headers=HEADERS) self.fetched.add(current_url) xml = etree.fromstring(resp.content) has_total_count = xml.xpath("//totalcount/text()") if has_total_count: # 非空证明为列表,否则为详细页 total_count = int(has_total_count[0]) if total_count == 0: return # 列表跨界 if self.list_query["pageno"] == 1: pageno = 2 # while pageno < 10: while pageno <= total_count / PAGE_SIZE: self.list_query["pageno"] = pageno next_list_url = LIST_URL + urllib.urlencode( self.list_query) self.q.put(next_list_url) # logging.info(next_list_url) pageno += 1 job_ids = xml.xpath("//jobid/text()") job_detail_urls = [] for ID in job_ids: new_detail_query = DETAIL_QUERY.copy() new_detail_query["jobid"] = ID job_detail_urls.append(DETAIL_URL + urllib.urlencode(new_detail_query)) for detail_url in job_detail_urls: self.q.put(detail_url) # logging.info(detail_url) else: self._out.collect(xml) finally: self.q.task_done()
else: print name, 'exists' def filewalk(path): #return ['file', ... ] w = os.walk(path) files = [] [files.append(x) for i in w for x in i[2]] #不能是for x in i for i in w 这个反回一堆None #for i in w: # for x in i[2]: # files.append(x) return files START = time.time() FILEEXISTS = filewalk('./') while True: tasks = JoinableQueue(maxsize=10) #debug了 #tasks = Queue() tasks.join() #tag = u"rina+aizawa" #tag recs = [gevent.spawn(tr, tag) for tag in tags] workers = [gevent.spawn(worker, n) for n in xrange(20)] gevent.joinall(workers) gevent.joinall(recs)
log.error( "database initialization error: %s" % ( e, ) ) exit() #--------------------------------- # module initialization #--------------------------------- try: #the update manager maintains a queue of messages to be sent to connected clients. um = UpdateManager() pm = ProcessingModule( datadb, resourcedb, um ) im = InstallationModule( RESOURCE_NAME, RESOURCE_URI, datadb ) gevent.signal(signal.SIGQUIT, gevent.shutdown) gevent.spawn(worker) pqueue.join() log.info( "module initialization completed... [SUCCESS]" ); except Exception, e: log.error( "module initialization error: %s" % ( e, ) ) #--------------------------------- # Web Server initialization #--------------------------------- try: debug( True ) run( host=HOST, port=PORT, server='gevent') except Exception, e: log.error( "Web Server Exception: %s" % ( e, ) ) exit()
save_queue.put((word, direction, data)) conn.close() arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--dict', type=str, help='dict path') arg_parser.add_argument('--direction', type=str, help='direction') args = arg_parser.parse_args() word_list = get_words(args.dict) print('word list size = %d' % (len(word_list))) print(word_list[0:10]) fetch_queue = JoinableQueue() save_queue = JoinableQueue() for i in range(100): gevent.spawn(fetch_worker, fetch_queue, save_queue, args.direction) gevent.spawn(save_worker, DSN, save_queue) for word in word_list: fetch_queue.put(word) fetch_queue.join() save_queue.join()
import gevent from gevent.queue import JoinableQueue def worker(): while True: item = q.get() try: do_work(item) finally: q.task_done() num_worker_threads = 3 q = JoinableQueue() for i in range(num_worker_threads): gevent.spawn(worker) for item in source(): q.put(item) q.join() # block until all tasks are done
def scrape(self, url=None, scraper_name='index', session=None, burst_limit=None, rate_limit=None, receivers=[], initial_scope={}, exception_handler=None): pool = Pool(10000) # almost no limit, limit connections instead job_queue = JoinableQueue() data_queue = JoinableQueue() scope = Scope() scope['log'] = logbook.Logger(self.name) scope['push_data'] = lambda name, data:\ data_queue.put((name, data)) rs = session or requests.Session() rs.hooks['response'] = lambda r: glocal.log.info(r.url) cticket_gen = TicketGenerator(rate_limit, burst_limit) adapter = TicketBoundHTTPAdapter(cticket_gen) rs.mount('http://', adapter) rs.mount('https://', adapter) scope['requests'] = rs scope.update(initial_scope) job_queue.put(Job(self, scraper_name, url, scope)) aborted = False def run_job(job): # runs a single job in the current greenlet try: # setup new log for val in job.run(): job_queue.put(job.from_yield(val)) except CapacityError as e: job.log.warning('CapacityError: %s, backing off') job.log.debug(traceback.format_exc()) # FIXME: throttle except TemporaryError as e: job.log.warning('Temporary failure on %s, ' 'rescheduling') job.log.debug(traceback.format_exc()) job_queue.put(job.retry()) # FIXME: add limit for retries except PermanentError as e: job.log.error(e) job.log.debug(traceback.format_exc()) except CriticalError as e: job.log.critical(e) job.log.debug(traceback.format_exc()) job.log.debug('Aborting scrape...') except Exception as e: job.log.error('Error handling job "%s" "%s": %s' % (scraper_name, url, e)) job.log.debug(traceback.format_exc()) if exception_handler: exception_handler(sys.exc_info()) finally: job_queue.task_done() def job_spawner(): # using the pool, spawns a new job for every job in the queue while not aborted: job = job_queue.get() if job is None: break pool.spawn(run_job, job) def receiver_spawner(): while not aborted: record = data_queue.get() if record is None: break for receiver in receivers: pool.spawn(receiver.process, record, scope) data_queue.task_done() spawner_greenlet = pool.spawn(job_spawner) receiver_greenlet = pool.spawn(receiver_spawner) # join queue job_queue.join() data_queue.join() # tell spawner to exit job_queue.put(None) data_queue.put(None) pool.join() # now perform all post-processing for receiver in receivers: if receiver._post_process: post_scope = scope.new_child() post_scope['log'] = logbook.Logger('%s-post_process') post_scope.inject_and_call(receiver._post_process)
zfile = '%s.zip' % site_id with zipfile.ZipFile(zfile) as z: z.extractall('tmp') file_workers = [ pool.spawn(upload_files, i, worker_id, file_queue) for i in xrange(concurrency) ] for dirname, dirnames, filenames in os.walk('tmp/%s' % site_id): # print path to all subdirectories first. files = [] for filename in filenames: files.append(os.path.join(dirname, filename)) for f in files: file_queue.put(f, block=False) print "START_DIRS" dirs = [] for subdirname in dirnames: dirs.append(os.path.join(dirname, subdirname)) if dirs: print "POOLING:", dirs dir_pool.imap(mkdirs, dirs) print "END" #joinall(dir_jobs) #joinall([ # spawn([s_dir] + dirs) for s_dir, dirs in skel_dirs.iteritems() #]) file_queue.join()
zfile = '%s.zip' % site_id with zipfile.ZipFile(zfile) as z: z.extractall('tmp') file_workers = [ pool.spawn(upload_files, i, worker_id, file_queue) for i in xrange(concurrency) ] for dirname, dirnames, filenames in os.walk('tmp/%s' % site_id): # print path to all subdirectories first. files = [] for filename in filenames: files.append(os.path.join(dirname, filename)) for f in files: file_queue.put(f, block=False) print "START_DIRS" dirs = [] for subdirname in dirnames: dirs.append(os.path.join(dirname, subdirname)) if dirs: print "POOLING:", dirs dir_pool.imap(mkdirs, dirs) print "END" #joinall(dir_jobs) #joinall([ # spawn([s_dir] + dirs) for s_dir, dirs in skel_dirs.iteritems() #]) file_queue.join()
for i in range(NUM_THEME_WORKER_THREADS): gevent.spawn(theme_worker) for i in range(NUM_PROJECT_WORKER_THREADS): gevent.spawn(project_worker) # i = 0 for item in get_themes(): q.put(item) # i += 1 # if i >= 1: # break try: q.join() # block until all tasks are done project_queue.join() except KeyboardInterrupt: logging.info('CTRL-C: save before exit') raise length_queue.put(StopIteration) max_length = 0 for length in length_queue: if max_length < length: max_length = length out_queue.put(StopIteration) data = None headers = ["Theme", "Activities (research area)", "Project Acronym", "Start Date", "End Date", "Project Cost", "Project Funding", "Project Status", "Contract Type", "Coordinator", "Project Reference", "Record"]
def filewalk(path): #return ['file', ... ] w = os.walk(path) files = [] [files.append(x) for i in w for x in i[2]] #不能是for x in i for i in w 这个反回一堆None #for i in w: # for x in i[2]: # files.append(x) return files START = time.time() FILEEXISTS = filewalk('./') while True: tasks = JoinableQueue(maxsize = 10) #debug了 #tasks = Queue() tasks.join() #tag = u"rina+aizawa" #tag recs = [ gevent.spawn(tr, tag) for tag in tags] workers = [ gevent.spawn(worker, n) for n in xrange(20) ] gevent.joinall( workers ) gevent.joinall( recs )