def test_fullduplex(self): def server(): (client, addr) = self.listener.accept() # start reading, then, while reading, start writing. the reader should not hang forever N = 100000 # must be a big enough number so that sendall calls trampoline sender = gevent.spawn_link_exception(client.sendall, 't' * N) result = client.recv(1000) assert result == 'hello world', result sender.join(0.2) sender.kill() if client.__class__.__name__ == 'SSLObject': # if sslold.SSLObject is not closed then the other end will receive sslerror: (8, 'Unexpected EOF') # Not sure if it must be fixed but I don't want to waste time on that since # the preferred way to do ssl now is via gevent.ssl which works OK without explicit close client.close() #print '%s: client' % getcurrent() server_proc = gevent.spawn_link_exception(server) client = self.create_connection() client_reader = gevent.spawn_link_exception(client.makefile().read) gevent.sleep(0.001) client.send('hello world') # close() used to hang client.close() # this tests "full duplex" bug; server_proc.get() client_reader.get()
def start(self, checkpoint=None): for gl in self._greenlets: gl.kill() self.load_config() self._greenlets = [gevent.spawn_link_exception(self.periodic_checkpoint, 5)] for master_uri in self._config: self._greenlets.append(gevent.spawn_link_exception(self.replicate, master_uri, checkpoint))
def fetch_job(self): try: generator = gevent.spawn_link_exception(self.data_fetcher) handler = gevent.spawn_link_exception(self.event_handler) gevent.joinall([generator, handler]) except gevent.GreenletExit, err: logging.warn(err)
def start(config, basedir, db, is_compressed_file_present, updater): logging.warn("transforming logs") repo = config["repo"] zmq_context = zmq.Context() col_out = wiring.Wire('collector_out', zmq_context=zmq_context) grouped_files, ips, estimator = get_grouped_files(basedir) device_router.add(db, ips, config) updater.update_stat('running') gevent.spawn_link_exception(progress.responder, estimator, zmq_context, repo) gevent.sleep( 2 ) # allow config-regeneration to complete successfully called by device_router.add transform(grouped_files, col_out, estimator, config, updater) # delete uploaded compressed file and extracted dir if is_compressed_file_present: dir_path = os.path.dirname(basedir) if os.path.exists(dir_path): shutil.rmtree(dir_path) # stopping immediately will remove sid in norm_front and store_handler # so sleep 1 hour, because events may be in queue of norm_front and store_handler time.sleep(3600) updater.update_stat('finished')
def run(self): from SimpleSeer.OLAPUtils import ScheduledOLAP from SimpleSeer.states import Core import Pyro4 core = Core(self.session) found_statemachine = False with open(self.options.program) as fp: exec fp in dict(core=core) found_statemachine = True if not found_statemachine: raise Exception("State machine " + self.options.program + " not found!") so = ScheduledOLAP() gevent.spawn_link_exception(so.runSked) core.start_socket_communication() if not self.options.disable_pyro: gevent.spawn_link_exception(core.run) Pyro4.Daemon.serveSimple( { core: "sightmachine.seer" }, ns=True) else: core.run()
def run(self): from SimpleSeer.Backup import Backup listen = self.options.listen Backup.exportAll() if listen.upper() == 'TRUE': gevent.spawn_link_exception(Backup.listen())
def __init__(self, src_sock, sink_sock, outstanding_chunks, max_timeout=600): self._instances.append(self) self.jobs = {} self._src_sock = src_sock self._sink_sock = sink_sock self._outstanding_chunks = 0 self._outstanding_chunk_limit = outstanding_chunks self.max_timeout = max_timeout gevent.spawn_link_exception(self._g_sink_handler) self._pool = gevent.pool.Pool(outstanding_chunks)
def start(self, checkpoint=None): for gl in self._greenlets: gl.kill() self.load_config() self._greenlets = [ gevent.spawn_link_exception(self.periodic_checkpoint, 5) ] for master_uri in self._config: self._greenlets.append( gevent.spawn_link_exception(self.replicate, master_uri, checkpoint))
def start(): """starts the agent core """ identity_cache = {} zmq_context = zmq.Context() ag_ws_in = AgentWsInWire(zmq_context) ag_push = AgentPushWire(zmq_context) ag_a_in = AgentAnalyticsInWire(zmq_context) gevent.spawn_link_exception(_webserver_handler, ag_ws_in, ag_push, ag_a_in, identity_cache) _analyzer_handler(ag_a_in, ag_ws_in, identity_cache)
def subscribe(self, name): # Create thread that listens for event specified by name # If message received, trigger that event def callback(msg): data = jsondecode(msg.body) self.trigger(name, data) def listener(): self._channel_manager.subscribe(name, callback) gevent.spawn_link_exception(listener)
def test_file2pipe(self): with gevent.Timeout(1): file = open(self.TESTFILE, "r") pipe = Pipe() gevent.spawn_link_exception(file_to_pipe, file, pipe).join() result = "".join(chunk for chunk in pipe) self.assertTrue(file.closed) self.assertTrue(pipe.finished()) self.assertEqual(result, self.get_content(self.TESTFILE))
def test(self): def test_client(message): conn = socket.create_connection(('127.0.0.1', 6000)).makefile(bufsize=1) welcome = conn.readline() assert 'Welcome' in welcome, repr(welcome) conn.write(message) received = conn.read(len(message)) self.assertEqual(received, message) conn._sock.settimeout(0.1) self.assertRaises(socket.timeout, conn.read, 1) client1 = gevent.spawn_link_exception(test_client, 'hello\r\n') client2 = gevent.spawn_link_exception(test_client, 'world\r\n') gevent.joinall([client1, client2])
def start(self): self.event_handler = EventHandler(self) self.event_handler.register_callback( self.context.send_with_norm_policy_and_repo) job_generator = JobGenerator(self) try: joblet = gevent.spawn_link_exception(job_generator.job_updater) eventlet = gevent.spawn_link_exception( self.event_handler.event_queue_handler) gevent.joinall([joblet, eventlet]) except gevent.GreenletExit, err: logging.warn(err)
def test(self): def test_client(message): conn = socket.create_connection( ('127.0.0.1', 6000)).makefile(bufsize=1) welcome = conn.readline() assert 'Welcome' in welcome, repr(welcome) conn.write(message) received = conn.read(len(message)) self.assertEqual(received, message) conn._sock.settimeout(0.1) self.assertRaises(socket.timeout, conn.read, 1) client1 = gevent.spawn_link_exception(test_client, 'hello\r\n') client2 = gevent.spawn_link_exception(test_client, 'world\r\n') gevent.joinall([client1, client2])
def start(self, job): q = None for p in self.parts: p.input = q p.output = q = Queue(1000) for p in self.parts: p._greenlet = gevent.spawn_link_exception(util.ending_request(job._mongodb), p.run)
def test_conditional_inspections(self): threshold = 20 inspections = dict( motion = mock.Mock(), blob = mock.Mock()) featuredata = dict(motion=10) self.core.get_inspection = lambda name: inspections[name] self.core.cameras = [ mock.Mock() ] def capture(): print time.time(), 'Capture' capture.call_count += 1 capture.call_count = 0 def blob_ex(frame): print 'Blob' blob_ex.call_count += 1 blob_ex.call_count = 0 def motion_ex(frame): print 'Motion' motion_ex.call_count += 1 result = mock.Mock() result.featuredata = featuredata return [result] motion_ex.call_count = 0 inspections['motion'].execute = motion_ex inspections['blob'].execute = blob_ex self.core.cameras[0].capture = capture @self.core.state('start') def start_state(state): core = state.core core.set_rate(10.0) motion = core.get_inspection('motion') blob = core.get_inspection('blob') while True: core.tick() frame = core.capture()[0] features = motion.execute(frame) if features[0].featuredata['motion'] > threshold: blob.execute(frame) @self.core.on('start', 'terminate') @self.core.on('fast', 'terminate') def terminate(state, name, data): state.transition(None) gl = gevent.spawn_link_exception(self.core.run) gevent.sleep(0.2) print '===> Set motion to 55' featuredata['motion'] = 55 gevent.sleep(0.2) print '===> Set motion to 4' featuredata['motion'] = 4 gevent.sleep(0.2) print '===> Call terminate' self.core.trigger('terminate') gl.join() motion_calls = inspections['motion'].execute.call_count blob_calls = inspections['blob'].execute.call_count self.assertGreater(motion_calls, blob_calls) self.assertGreater(blob_calls, 0)
def run(self): from SimpleSeer.Backup import Backup if self.options.subsubcommand != 'import' and self.options.subsubcommand != 'export': self.log.info("Valid subcommands are import and export. Ignoring \"{}\".".format(self.options.subsubcommand)) if self.options.subsubcommand == "export" and self.options.clean: self.log.info("Clean option not applicable when exporting. Ignoring") if self.options.subsubcommand == "import" and self.options.listen: self.log.info("Listen option not applicable when importing. Ignorning") if self.options.subsubcommand == "export": Backup.exportAll() if self.options.listen: gevent.spawn_link_exception(Backup.listen()) elif self.options.subsubcommand == "import": Backup.importAll(self.options.file, self.options.clean, self.options.skipbackfill)
def test_pipe2file(self): with gevent.Timeout(1): pipe = Pipe() file = tempfile.NamedTemporaryFile(delete=False) try: pf = gevent.spawn_link_exception(pipe_to_file, pipe, file) with open(self.TESTFILE, "r") as infile: for line in infile: pipe.put(line) pipe.close() pf.join() self.assertTrue(file.closed) self.assertTrue(pipe.finished()) self.assertEqualFiles(self.TESTFILE, file.name) finally: try: os.remove(file.name) except: pass
def spawn(self, handler, *args, **kwargs): """Spawn a new greenlet, and link it to the current greenlet when an exception is raise. This will cause to make the current process to stop if any of the spawned greenlet fail with an unhandled exception. The greenlet successful completion is also linked to the :meth:`remove` method to remove it from the list of greenlets. :Parameters: args : args positional arguments kwargs : keyword args keyword arguments """ import types assert handler is not None greenlet = gevent.spawn_link_exception(self.__spawn_handler__(handler), *args, **kwargs) greenlet.link(lambda x: self.remove(greenlet)) self._greenlets.append(greenlet) if isinstance(handler, types.FunctionType) or \ isinstance(handler, types.MethodType): name = handler.__name__ else: name = handler.__class__ self._log.debug('spawn function %s in greenlet %s' % (name, str(greenlet))) assert greenlet is not None return greenlet
def listener_loop_runner(): """Runs the loop to listen to the incoming requests and dispatch the task to another engine Args: None Returns: None """ zmq_context = zmq.Context() pdf2textconverter_in = _create_pdf2textconverter_in(zmq_context) pdf2textconverter_out = _create_pdf2textconverter_out(zmq_context) while True: request = pdf2textconverter_in.recv_json() logging.debug('PREPROCESSOR: Received request %s' % request) gevent.spawn_link_exception(_process_request, pdf2textconverter_out, request)
def OlapCommand(self): try: from SeerCloud.OLAPUtils import ScheduledOLAP, RealtimeOLAP except: print 'Error starting OLAP schedules. This requires Seer Cloud' return 0 from SimpleSeer.models.Inspection import Inspection, Measurement Inspection.register_plugins('seer.plugins.inspection') Measurement.register_plugins('seer.plugins.measurement') so = ScheduledOLAP() gevent.spawn_link_exception(so.runSked) ro = RealtimeOLAP() ro.monitorRealtime()
def run(self): if self.subsock and len(self.buttons): gevent.spawn_link_exception(Controls.checkSubscription, self) elif self.subsock: self.checkSubscription() return while True: while self.board.bytes_available(): self.board.iterate() #make sure we have a clean buffer before bouncing the buttons for b in self.buttons: b.read() for p in self.potentiometers: p.read() gevent.sleep(0)
def listener_loop_runner(): """Runs the loop to listen to the incoming requests and dispatch the task to another engine Args: None Returns: None """ zmq_context = zmq.Context() analyzer_in = _create_analyzer_in(zmq_context) analyzer_out = _create_analyzer_out(zmq_context) while True: request = analyzer_in.recv_json() logging.debug('ANALYZER: Received request ...') gevent.spawn_link_exception(_process_request, analyzer_out, request)
def _test_serve_forever(self): g = gevent.spawn_link_exception(self.server.serve_forever) try: gevent.sleep(0.01) self.assertRequestSucceeded() self.server.stop() assert not self.server.started self.assertConnectionRefused() finally: g.kill()
def _test_serve_forever(self): g = gevent.spawn_link_exception(self.server.serve_forever) try: gevent.sleep(0.01) self.assertRequestSucceeded() self.server.stop() assert not self.server.started self.assertConnectionRefused() finally: g.kill(block=True)
def start(self): """ Start event queue handler and different protocol socket listeners """ try: servlets = self.__spawn_servers() eventlet = gevent.spawn_link_exception(self.__event_queue_handler) servlets.extend([eventlet]) gevent.joinall(servlets) except gevent.GreenletExit, err: logging.warn(err)
def start_socket_communication(self): '''Listens to ALL messages and trigger()s on them''' context = zmq.Context.instance() # Setup subscriber sub_sock = context.socket(zmq.SUB) sub_sock.connect(self._config.sub_uri) sub_sock.setsockopt(zmq.SUBSCRIBE, '') def g_listener(): while True: name = sub_sock.recv() raw_data = sub_sock.recv() try: data = jsondecode(raw_data) except: continue self.trigger(name, data) gevent.spawn_link_exception(g_listener) # Setup publisher self._pub_sock = context.socket(zmq.PUB) self._pub_sock.connect(self._config.pub_uri)
def run(self): for cname, values in self.iter: ci = MRCommitter( self._job, self._output_type, cname, self._job.reducefun(cname)) ci.input = Queue(1000) ci._greenlet = gevent.spawn_link_exception(util.ending_request(self._job._mongodb), ci.run_wrapper) for v in values: ci.input.put(v[1]) ci.input.put(StopIteration) ci._greenlet.join()
def test_recv_timeout(self): acceptor = gevent.spawn_link_exception(self.listener.accept) client = self.create_connection() client.settimeout(0.1) start = time.time() try: data = client.recv(1024) except socket.timeout: assert 0.1 - 0.01 <= time.time() - start <= 0.1 + 0.1, (time.time() - start) else: raise AssertionError('socket.timeout should have been raised, instead recv returned %r' % (data, )) acceptor.get()
def __call__(self, job, command, chunk_iter): q_greenlet = Queue() def g_submitter(): for args in chunk_iter: g = self._pool.spawn_link_exception( self._do_chunk, command, job, *args) q_greenlet.put((g, args)) gevent.sleep(0) q_greenlet.put(StopIteration) gevent.spawn_link_exception(g_submitter) error = None for g, args in q_greenlet: header, payload = g.get() if error: continue elif 'error' in header: log.exception('Got error') error = header['error'] else: yield header, payload if error: raise zexc.WorkerError, error
def __spawn_servers(self): """ Spawn the servers on indivisual greenlets whose turn_on FLAGS are ON Return the greenlet objects """ servers_started = [] server_greenlets = [] if self.__tcp_server_on: servers_started.append("TCP SERVER") servlet_tcp = gevent.spawn_link_exception(self.__start_tcp_server) server_greenlets.append(servlet_tcp) if self.__tcp_ssl_server_on: if self.__tcp_ssl_port: servers_started.append("TCP/SSL SERVER") servlet_tcp_ssl = gevent.spawn_link_exception(self.__start_tcp_ssl_server) server_greenlets.append(servlet_tcp_ssl) else: logging.error("SSL port not defined in config file") if self.__udp_server_on: servers_started.append("UDP SERVER") servlet_udp = gevent.spawn_link_exception(self.__start_udp_server) server_greenlets.append(servlet_udp) if self.__ftp_server_on: servers_started.append("FTP SERVER") servlet_ftp = gevent.spawn_link_exception(self.__start_ftp_server) server_greenlets.append(servlet_ftp) if not servers_started: logging.warn("No Servers Started! Was that what you wanted to do?") logging.warn("Use e.g. turn_tcp_server_on() to start tcp server") else: logging.warn("Started Servers: %s", ", ".join(servers_started)) return server_greenlets
def server(): (client, addr) = self.listener.accept() # start reading, then, while reading, start writing. the reader should not hang forever N = 100000 # must be a big enough number so that sendall calls trampoline sender = gevent.spawn_link_exception(client.sendall, 't' * N) result = client.recv(1000) assert result == 'hello world', result sender.join(0.2) sender.kill() if client.__class__.__name__ == 'SSLObject': # if sslold.SSLObject is not closed then the other end will receive sslerror: (8, 'Unexpected EOF') # Not sure if it must be fixed but I don't want to waste time on that since # the preferred way to do ssl now is via gevent.ssl which works OK without explicit close client.close()
def test_file2file(self): with gevent.Timeout(1): srcfile = open(self.TESTFILE, "r") pipe = Pipe(0) dstfile = tempfile.NamedTemporaryFile(delete=False) try: pf = gevent.spawn_link_exception(pipe_to_file, pipe, dstfile) fp = gevent.spawn_link_exception(file_to_pipe, srcfile, pipe, 64) gevent.joinall([fp, pf]) self.assertTrue(srcfile.closed) self.assertTrue(dstfile.closed) self.assertTrue(pipe.finished()) self.assertEqualFiles(self.TESTFILE, dstfile.name) finally: try: os.remove(file.name) except: pass
def test_recv_timeout(self): acceptor = gevent.spawn_link_exception(self.listener.accept) try: client = self.create_connection() client.settimeout(0.1) start = time.time() try: data = client.recv(1024) except self.TIMEOUT_ERROR: assert 0.1 - 0.01 <= time.time() - start <= 0.1 + 0.1, ( time.time() - start) else: raise AssertionError( '%s should have been raised, instead recv returned %r' % ( self.TIMEOUT_ERROR, data, )) finally: acceptor.get()
def test_sendall_timeout(self): acceptor = gevent.spawn_link_exception(self.listener.accept) client = self.create_connection() client.settimeout(0.1) start = time.time() send_succeed = False data_sent = 'h' * 100000 try: result = client.sendall(data_sent) except socket.timeout: assert 0.1 - 0.01 <= time.time() - start <= 0.1 + 0.1, (time.time() - start) else: assert time.time() - start <= 0.1 + 0.01, (time.time() - start) send_succeed = True conn, addr = acceptor.get() if send_succeed: client.close() data_read = conn.makefile().read() self.assertEqual(len(data_sent), len(data_read)) self.assertEqual(data_sent, data_read) print 'WARNING: read the data instead of failing with timeout'
def test_put_to_closed_file(self): with gevent.Timeout(1): pipe = Pipe() file = open(os.devnull, "w") pf = gevent.spawn_link_exception(pipe_to_file, pipe, file) # Wait for pipe_to_file to block while not pipe.getters: gevent.sleep(0) # Close file file.close() # Write something to pipe pipe.put(".") # Wait for pipe_to_file process to end pf.join() # Pipe should be closed now self.assertTrue(pipe.closed())
def test_sendall_timeout(self): acceptor = gevent.spawn_link_exception(self.listener.accept) try: client = self.create_connection() client.settimeout(0.1) start = time.time() send_succeed = False data_sent = 'h' * 100000 try: client.sendall(data_sent) except self.TIMEOUT_ERROR: assert 0.1 - 0.01 <= time.time() - start <= 0.1 + 0.1, ( time.time() - start) else: assert time.time() - start <= 0.1 + 0.01, (time.time() - start) send_succeed = True finally: conn, addr = acceptor.get() if send_succeed: client.close() data_read = conn.makefile().read() self.assertEqual(len(data_sent), len(data_read)) self.assertEqual(data_sent, data_read) print '%s: WARNING: read the data instead of failing with timeout' % self.__class__.__name__
def test11(): try: jobs = [gevent.spawn_link_exception(test10) for _ in range(10)] gevent.joinall(jobs) except Exception, e: print 'liushuaikobe~~!!'
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'roundtrip' not in config: raise RuntimeError('roundtrip section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.roundtrip: raise RuntimeError( "Missing roundtrip config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.roundtrip.files: raise RuntimeError( "Missing roundtrip config item: files.{item}".format( item=item)) seeds = dict(config.roundtrip.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.roundtrip.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) objnames = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) objnames = itertools.islice(objnames, config.roundtrip.files.num) objnames = list(objnames) files = realistic.files( mean=1024 * config.roundtrip.files.size, stddev=1024 * config.roundtrip.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() logger_g = gevent.spawn_link_exception(yaml.safe_dump_all, q, stream=real_stdout) print "Writing {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.writers, ) pool = gevent.pool.Pool(size=config.roundtrip.writers) start = time.time() for objname in objnames: fp = next(files) pool.spawn_link_exception( writer, bucket=bucket, objname=objname, fp=fp, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put( dict( type='write_done', duration=int(round(elapsed * NANOSECOND)), )) print "Reading {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.readers, ) # avoid accessing them in the same order as the writing rand.shuffle(objnames) pool = gevent.pool.Pool(size=config.roundtrip.readers) start = time.time() for objname in objnames: pool.spawn_link_exception( reader, bucket=bucket, objname=objname, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put( dict( type='read_done', duration=int(round(elapsed * NANOSECOND)), )) q.put(StopIteration) logger_g.get() finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)
def test_motion_threshold(self): slow_rate = 10.0 fast_rate = 50.0 threshold0 = 50 threshold1 = 5 motion = mock.Mock() result = mock.Mock() result.featuredata = dict(motion=10) motion.execute = mock.Mock(return_value=[result]) self.core.get_inspection = mock.Mock(return_value=motion) def capture(): print time.time(), 'Capture' capture.call_count += 1 return [ mock.Mock() ] capture.call_count = 0 self.core.capture = capture @self.core.state('start') def slow_state(state): core = state.core core.set_rate(slow_rate) motion = core.get_inspection('motion') while True: core.tick() frame = core.capture()[0] features = motion.execute(frame) if features[0].featuredata['motion'] > threshold0: return core.state('fast') @self.core.state('fast') def fast_state(state): core = state.core core.set_rate(fast_rate) motion = core.get_inspection('motion') while True: core.tick() frame = core.capture()[0] features = motion.execute(frame) if features[0].featuredata['motion'] < threshold1: return core.state('start') @self.core.on('start', 'terminate') @self.core.on('fast', 'terminate') def terminate(state, name, data): state.transition(None) gl = gevent.spawn_link_exception(self.core.run, audit=True) gevent.sleep(0.2) print 'Set motion to 55' result.featuredata['motion'] = 55 gevent.sleep(0.2) print 'Set motion to 4' result.featuredata['motion'] = 4 gevent.sleep(0.2) print 'Call terminate' self.core.trigger('terminate') trail = gl.get() self.assertEqual(trail, ['start', 'fast', 'start', None]) num_frames = self.core.capture.call_count print num_frames self.assertGreater(num_frames, 12) self.assertLess(num_frames, 18)
def __schedule(self, sid, client_map, interval, instance): return gevent.spawn_link_exception(self.__run, sid, client_map, interval, instance)
def schedule(func, args, seconds): return gevent.spawn_link_exception(_run, func, args, seconds)
def schedule(func, args, seconds): log.debug("Inside the schedule") return gevent.spawn_link_exception(_run, func, args, seconds)
def run_find_item(): global find_item_book while True: (num, ia) = item_queue.get() find_item_book = ia #print 'find_item:', ia t0_find_item = time() try: (host, path) = find_item(ia) except FindItemError: t1_find_item = time() - t0_find_item #print 'fail find_item:', ia, t1_find_item item_queue.task_done() done(ia, False) continue t1_find_item = time() - t0_find_item #print 'find_item:', ia, t1_find_item if len(locator_times) == 100: locator_times.pop(0) locator_times.append((t1_find_item, host)) body = None if False: url = 'http://' + solr_src_host + '/solr/inside/select?wt=json&rows=10&q=ia:' + ia response = json.load(urllib2.urlopen(url))['response'] if response['numFound']: doc = response['docs'][0] for doc_lang in ['eng', 'fre', 'deu', 'spa', 'other']: if doc.get('body_' + doc_lang): body = doc['body_' + doc_lang] break assert body filename = '/1/abbyy_text/data/' + ia[:2] + '/' + ia if os.path.exists(filename): body = codecs.open(filename, 'r', 'utf-8').read() if body: try: meta_xml = urlread_keep_trying('http://%s%s/%s_meta.xml' % (host, path, ia)) except urllib2.HTTPError as error: if error.code != 403: raise print('403 on meta XML for:', ia) item_queue.task_done() # skip done(ia, False) continue try: root = fromstring(meta_xml) except: print('identifer:', ia) collection = [e.text for e in root.findall('collection')] elem_noindex = root.find('noindex') if elem_noindex is not None and elem_noindex.text == 'true' and ( 'printdisabled' not in collection and 'lendinglibrary' not in collection): item_queue.task_done() # skip done(ia, False) continue lang_elem = root.find('language') if lang_elem is None: print(meta_xml) if lang_elem is not None: lang = tidy_lang(lang_elem.text) or 'other' else: lang = 'other' #print 'solr_queue.put((ia, body, page_count))' solr_queue.put((ia, body, lang, page_counts[ia], collection)) #print 'solr_queue.put() done' else: host_queues[host].put((num, ia, path)) if host not in host_threads: host_threads[host] = spawn_link_exception( read_text_from_node, host) item_queue.task_done()
print('host queues: %8d' % host_count) print('items queued: %8d' % queued_items) if locator_times: print('average locator time: %8.2f secs' % (float(sum(t[0] for t in locator_times)) / len(locator_times))) #print sorted(locator_times, key=lambda t:t[0], reverse=True)[:10] print() if run_time < 120: sleep(1) else: sleep(5) if __name__ == '__main__': t_status = spawn_link_exception(status_thread) t_item_queue = spawn_link_exception(add_to_item_queue) for i in range(80): spawn_link_exception(run_find_item) #t_index_items = spawn_link_exception(index_items) for i in range(8): spawn_link_exception(run_solr_queue, i) #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr]) sleep(1) print('join item_queue thread') t_item_queue.join() print('item_queue thread complete') #print 'join item_and_host_queue:', item_and_host_queue.qsize() #item_and_host_queue.join()
host_count += 1 qsize = host_queue.qsize() queued_items += qsize print 'host queues: %8d' % host_count print 'items queued: %8d' % queued_items if locator_times: print 'average locator time: %8.2f secs' % (float(sum(t[0] for t in locator_times)) / len(locator_times)) #print sorted(locator_times, key=lambda t:t[0], reverse=True)[:10] print if run_time < 120: sleep(1) else: sleep(5) if __name__ == '__main__': t_status = spawn_link_exception(status_thread) t_item_queue = spawn_link_exception(add_to_item_queue) for i in range(80): spawn_link_exception(run_find_item) #t_index_items = spawn_link_exception(index_items) for i in range(8): spawn_link_exception(run_solr_queue, i) #joinall([t_run_find_item, t_item_queue, t_index_items, t_solr]) sleep(1) print 'join item_queue thread' t_item_queue.join() print 'item_queue thread complete' #print 'join item_and_host_queue:', item_and_host_queue.qsize() #item_and_host_queue.join()
def run_find_item(): global find_item_book while True: (num, ia) = item_queue.get() find_item_book = ia #print 'find_item:', ia t0_find_item = time() try: (host, path) = find_item(ia) except FindItemError: t1_find_item = time() - t0_find_item #print 'fail find_item:', ia, t1_find_item item_queue.task_done() done(ia, False) continue t1_find_item = time() - t0_find_item #print 'find_item:', ia, t1_find_item if len(locator_times) == 100: locator_times.pop(0) locator_times.append((t1_find_item, host)) body = None if False: url = 'http://' + solr_src_host + '/solr/inside/select?wt=json&rows=10&q=ia:' + ia response = json.load(urllib2.urlopen(url))['response'] if response['numFound']: doc = response['docs'][0] for doc_lang in ['eng', 'fre', 'deu', 'spa', 'other']: if doc.get('body_' + doc_lang): body = doc['body_' + doc_lang] break assert body filename = '/1/abbyy_text/data/' + ia[:2] + '/' + ia if os.path.exists(filename): body = codecs.open(filename, 'r', 'utf-8').read() if body: try: meta_xml = urlread_keep_trying('http://%s%s/%s_meta.xml' % (host, path, ia)) except urllib2.HTTPError, error: if error.code != 403: raise print '403 on meta XML for:', ia item_queue.task_done() # skip done(ia, False) continue try: root = fromstring(meta_xml) except: print 'identifer:', ia collection = [e.text for e in root.findall('collection')] elem_noindex = root.find('noindex') if elem_noindex is not None and elem_noindex.text == 'true' and ('printdisabled' not in collection and 'lendinglibrary' not in collection): item_queue.task_done() # skip done(ia, False) continue lang_elem = root.find('language') if lang_elem is None: print meta_xml if lang_elem is not None: lang = tidy_lang(lang_elem.text) or 'other' else: lang = 'other' #print 'solr_queue.put((ia, body, page_count))' solr_queue.put((ia, body, lang, page_counts[ia], collection)) #print 'solr_queue.put() done' else: host_queues[host].put((num, ia, path)) if host not in host_threads: host_threads[host] = spawn_link_exception(read_text_from_node, host) item_queue.task_done()
def main(): # parse options (options, args) = parse_options() if os.isatty(sys.stdin.fileno()): raise RuntimeError('Need configuration in stdin.') config = common.read_config(sys.stdin) conn = common.connect(config.s3) bucket = None try: # setup real_stdout = sys.stdout sys.stdout = sys.stderr # verify all required config items are present if 'roundtrip' not in config: raise RuntimeError('roundtrip section not found in config') for item in ['readers', 'writers', 'duration', 'files', 'bucket']: if item not in config.roundtrip: raise RuntimeError("Missing roundtrip config item: {item}".format(item=item)) for item in ['num', 'size', 'stddev']: if item not in config.roundtrip.files: raise RuntimeError("Missing roundtrip config item: files.{item}".format(item=item)) seeds = dict(config.roundtrip.get('random_seed', {})) seeds.setdefault('main', random.randrange(2**32)) rand = random.Random(seeds['main']) for name in ['names', 'contents', 'writer', 'reader']: seeds.setdefault(name, rand.randrange(2**32)) print 'Using random seeds: {seeds}'.format(seeds=seeds) # setup bucket and other objects bucket_name = common.choose_bucket_prefix(config.roundtrip.bucket, max_len=30) bucket = conn.create_bucket(bucket_name) print "Created bucket: {name}".format(name=bucket.name) objnames = realistic.names( mean=15, stddev=4, seed=seeds['names'], ) objnames = itertools.islice(objnames, config.roundtrip.files.num) objnames = list(objnames) files = realistic.files( mean=1024 * config.roundtrip.files.size, stddev=1024 * config.roundtrip.files.stddev, seed=seeds['contents'], ) q = gevent.queue.Queue() logger_g = gevent.spawn_link_exception(yaml.safe_dump_all, q, stream=real_stdout) print "Writing {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.writers, ) pool = gevent.pool.Pool(size=config.roundtrip.writers) start = time.time() for objname in objnames: fp = next(files) pool.spawn_link_exception( writer, bucket=bucket, objname=objname, fp=fp, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put(dict( type='write_done', duration=int(round(elapsed * NANOSECOND)), )) print "Reading {num} objects with {w} workers...".format( num=config.roundtrip.files.num, w=config.roundtrip.readers, ) # avoid accessing them in the same order as the writing rand.shuffle(objnames) pool = gevent.pool.Pool(size=config.roundtrip.readers) start = time.time() for objname in objnames: pool.spawn_link_exception( reader, bucket=bucket, objname=objname, queue=q, ) pool.join() stop = time.time() elapsed = stop - start q.put(dict( type='read_done', duration=int(round(elapsed * NANOSECOND)), )) q.put(StopIteration) logger_g.get() finally: # cleanup if options.cleanup: if bucket is not None: common.nuke_bucket(bucket)
def schedule(func, host, config, wmi_out): return gevent.spawn_link_exception(func, host, config, wmi_out)
def start(self, checkpoint=None): gevent.spawn_link_exception(self.triggers.run)