def direct_delete_container_entry(container_ring, account_name, container_name, object_name, headers=None): """ Talk directly to the primary container servers to delete a particular object listing. Does not talk to object servers; use this only when a container entry does not actually have a corresponding object. """ if headers is None: headers = {} headers[USE_REPLICATION_NETWORK_HEADER] = 'true' pool = GreenPool() part, nodes = container_ring.get_nodes(account_name, container_name) for node in nodes: pool.spawn_n(direct_delete_container_object, node, part, account_name, container_name, object_name, headers=headers) # This either worked or it didn't; if it didn't, we'll retry on the next # reconciler loop when we see the queue entry again. pool.waitall()
def reconcile(self): """ Main entry point for concurrent processing of misplaced objects. Iterate over all queue entries and delegate processing to spawned workers in the pool. """ self.logger.debug('pulling items from the queue') pool = GreenPool(self.concurrency) for container in self._iter_containers(): self.logger.debug('checking container %s', container) for raw_obj in self._iter_objects(container): try: queue_item = parse_raw_obj(raw_obj) except Exception: self.stats_log('invalid_record', 'invalid queue record: %r', raw_obj, level=logging.ERROR, exc_info=True) continue if self.should_process(queue_item): pool.spawn_n(self.process_queue_item, container, raw_obj['name'], queue_item) self.log_stats() pool.waitall()
def run(self): signal.signal(signal.SIGINT, self.signal_handler) pool = GreenPool() with open("beka.yaml") as file: config = yaml.load(file.read()) for router in config["routers"]: printmsg("Starting Beka on %s" % router["local_address"]) beka = Beka( router["local_address"], router["bgp_port"], router["local_as"], router["router_id"], self.peer_up_handler, self.peer_down_handler, self.route_handler, self.error_handler ) for peer in router["peers"]: beka.add_neighbor( "passive", peer["peer_ip"], peer["peer_as"], ) if "routes" in router: for route in router["routes"]: beka.add_route( route["prefix"], route["next_hop"] ) self.bekas.append(beka) pool.spawn_n(beka.run) pool.waitall() printmsg("All greenlets gone, exiting")
def run_fd_server(self): server = eventlet.listen(('127.0.0.1', consts.GLOBAL_FB_PORT)) pool = GreenPool(10000) while True: fd, addr = server.accept() #accept returns (conn,address) so fd is a connection self.logger.info("global receives a connection") # self.global_conn(fd) pool.spawn_n(self.global_conn, fd)
def runtestsmulti(self, envlist): pool = GreenPool(size=self._toxconfig.option.numproc) for env in envlist: pool.spawn_n(self.runtests, env) pool.waitall() if not self.toxsession.config.option.sdistonly: retcode = self._toxsession._summary() return retcode
class Concurrency(object): """ Convenience class to support concurrency, if Eventlet is available; otherwise it just performs at single concurrency. :param concurrency: The level of concurrency desired. Default: 10 """ def __init__(self, concurrency=10): if concurrency and GreenPool: self._pool = GreenPool(concurrency) else: self._pool = None self._queue = Queue() self._results = {} def _spawner(self, ident, func, *args, **kwargs): self._queue.put((ident, func(*args, **kwargs))) def spawn(self, ident, func, *args, **kwargs): """ Returns immediately to the caller and begins executing the func in the background. Use get_results and the ident given to retrieve the results of the func. :param ident: An identifier to find the results of the func from get_results. This identifier can be anything unique to the Concurrency instance. :param func: The function to execute in the concurrently. :param args: The args to give the func. :param kwargs: The keyword args to the give the func. :returns: None """ if self._pool: self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs) else: self._spawner(ident, func, *args, **kwargs) def get_results(self): """ Returns a dict of the results currently available. The keys are the ident values given with the calls to spawn. The values are the return values of the funcs. """ try: while True: ident, value = self._queue.get(block=False) self._results[ident] = value except Empty: pass return self._results def join(self): """ Blocks until all currently pending functions have finished. """ if self._pool: self._pool.waitall()
def run(self, *args, **kwargs): try: self.logger.info('event agent: starting') pool = GreenPool(len(self.workers)) for worker in self.workers: pool.spawn(worker.start) def front(server, backend): while True: msg = server.recv_multipart() if validate_msg(msg): try: event_id = sqlite3.Binary(msg[2]) data = msg[3] self.queue.put(event_id, data) event = ['', msg[2], msg[3]] backend.send_multipart(event) except Exception: pass finally: ack = msg[0:3] server.send_multipart(ack) def back(backend): while True: msg = backend.recv_multipart() event_id = msg[1] event_id = sqlite3.Binary(event_id) self.queue.delete(event_id) boss_pool = GreenPool(2) boss_pool.spawn_n(front, self.server, self.backend) boss_pool.spawn_n(back, self.backend) while True: sleep(1) now = time.time() if now - self.last_retry > self.retry_interval: self.retry() self.last_retry = now for w in self.workers: if w.failed: self.workers.remove(w) self.logger.warn('restart worker "%s"', w.name) new_w = EventWorker(self.conf, w.name, self.context) self.workers.append(new_w) pool.spawn(new_w.start) except Exception as e: self.logger.error('ERROR in main loop %s', e) raise e finally: self.logger.warn('event agent: stopping') self.stop_workers()
def calculate_similar_movies(n=10, similarity=sim_pearson): """ Calculate and save similarity scores for all movies in the database. this will take a long time. Algorithm is parallelized using a greenlet pool. """ pool = GreenPool(size=30) movies = db.get_movies() movie_count = len(movies) sys.stdout.write("Processing {0} movies\n".format(movie_count)) for movie in movies: pool.spawn_n(do_movie_similarity_calculation, movie, n=n, similarity=sim_distance) pool.waitall()
def test_connection(self): """ conn = Connection(auth_endpoint="https://identity.api.rackspacecloud.com/v2.0", client_id=str(uuid.uuid4()), endpoint="http://localhost:8888/v1/12345", user="", key="") """ conn = Connection(auth_endpoint="https://identity.api.rackspacecloud.com/v2.0", client_id=str(uuid.uuid4()), endpoint="http://166.78.143.130/v1/12345", user="", key="") conn.connect(token='blah') def create_worker(queue_name): return conn.create_queue(queue_name, 100) def post_worker(queue): return queue.post_message('test_message', 10) def delete_worker(queue_name): conn.delete_queue(queue_name) return queue_name pool = GreenPool(1000) def on_message_posted(greenthread): msg = greenthread.wait() print msg._href def on_queue_created(greenthread): queue = greenthread.wait() print queue.name for x in range(0, 10): gt = pool.spawn(post_worker, queue) gt.link(on_message_posted) queue_names = ["queue-"+str(x) for x in xrange(0,5)] for queue_name in queue_names: gt = pool.spawn(create_worker, queue_name) gt.link(on_queue_created) pool.waitall() def delete_worker(queue_name): conn.delete_queue(queue_name) print "Queue:", queue_name, " deleted" for queue in conn.get_queues(): gt = pool.spawn_n(delete_worker, queue.name) print "Waiting for everything to finish" pool.waitall() print "Done"
def direct_delete_container_entry(container_ring, account_name, container_name, object_name, headers=None): """ Talk directly to the primary container servers to delete a particular object listing. Does not talk to object servers; use this only when a container entry does not actually have a corresponding object. """ pool = GreenPool() part, nodes = container_ring.get_nodes(account_name, container_name) for node in nodes: pool.spawn_n(direct_delete_container_object, node, part, account_name, container_name, object_name, headers=headers) # This either worked or it didn't; if it didn't, we'll retry on the next # reconciler loop when we see the queue entry again. pool.waitall()
class ZerovmDaemon: def __init__(self, socket_name): self.server_address = socket_name self.zerovm_exename = ['zerovm'] self.pool = GreenPool() self.jobs = set() self.stats_dir = '/tmp' def parse_command(self, fd): try: size = int(fd.read(8), 0) data = fd.read(size) return data except IOError: return None def handle(self, fd): data = self.parse_command(fd) manifest = data report = self.execute(manifest) self.send_response(fd, report) def serve(self): try: os.remove(self.server_address) except OSError: pass server = listen(self.server_address, family=socket.AF_UNIX) while True: try: new_sock, address = server.accept() self.pool.spawn_n(self.handle, new_sock.makefile('rw')) except (SystemExit, KeyboardInterrupt): break def send_response(self, fd, report): data = '0x%06x%s' % (len(report), report) try: fd.write(data) except IOError: pass def execute(self, manifest): pass
def run(self, run_command): """ Run the crawler of a code project """ crawler = import_user_module("crawlers") models = import_user_module("models") pool = GreenPool() for crawler_class in user_crawlers: spider = crawler_class(sessions=run_command.syncdb.sessions, debug=run_command.settings.SHOW_DEBUG_INFO) pool.spawn_n(spider.start) pool.waitall() for session in run_command.syncdb.sessions: session.close()
class Replicator(Daemon): """ Implements the logic for directing db replication. """ def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(conf, log_route='replicator') self.root = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.port = int(conf.get('bind_port', self.default_port)) concurrency = int(conf.get('concurrency', 8)) self.cpool = GreenPool(size=concurrency) swift_dir = conf.get('swift_dir', '/etc/swift') self.ring = ring.Ring(swift_dir, ring_name=self.server_type) self._local_device_ids = set() self.per_diff = int(conf.get('per_diff', 1000)) self.max_diffs = int(conf.get('max_diffs') or 100) self.interval = int(conf.get('interval') or conf.get('run_pause') or 30) self.vm_test_mode = config_true_value(conf.get('vm_test_mode', 'no')) self.node_timeout = int(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.reclaim_age = float(conf.get('reclaim_age', 86400 * 7)) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self._zero_stats() self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.recon_replicator = '%s.recon' % self.server_type self.rcache = os.path.join(self.recon_cache_path, self.recon_replicator) self.extract_device_re = re.compile('%s%s([^%s]+)' % ( self.root, os.path.sep, os.path.sep)) def _zero_stats(self): """Zero out the stats.""" self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0, 'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0, 'remove': 0, 'empty': 0, 'remote_merge': 0, 'start': time.time(), 'diff_capped': 0} def _report_stats(self): """Report the current stats to the logs.""" now = time.time() self.logger.info( _('Attempted to replicate %(count)d dbs in %(time).5f seconds ' '(%(rate).5f/s)'), {'count': self.stats['attempted'], 'time': now - self.stats['start'], 'rate': self.stats['attempted'] / (now - self.stats['start'] + 0.0000001)}) self.logger.info(_('Removed %(remove)d dbs') % self.stats) self.logger.info(_('%(success)s successes, %(failure)s failures') % self.stats) dump_recon_cache( {'replication_stats': self.stats, 'replication_time': now - self.stats['start'], 'replication_last': now}, self.rcache, self.logger) self.logger.info(' '.join(['%s:%s' % item for item in self.stats.items() if item[0] in ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty', 'diff_capped')])) def _rsync_file(self, db_file, remote_file, whole_file=True): """ Sync a single file using rsync. Used by _rsync_db to handle syncing. :param db_file: file to be synced :param remote_file: remote location to sync the DB file to :param whole-file: if True, uses rsync's --whole-file flag :returns: True if the sync was successful, False otherwise """ popen_args = ['rsync', '--quiet', '--no-motd', '--timeout=%s' % int(math.ceil(self.node_timeout)), '--contimeout=%s' % int(math.ceil(self.conn_timeout))] if whole_file: popen_args.append('--whole-file') popen_args.extend([db_file, remote_file]) proc = subprocess.Popen(popen_args) proc.communicate() if proc.returncode != 0: self.logger.error(_('ERROR rsync failed with %(code)s: %(args)s'), {'code': proc.returncode, 'args': popen_args}) return proc.returncode == 0 def _rsync_db(self, broker, device, http, local_id, replicate_method='complete_rsync', replicate_timeout=None): """ Sync a whole db using rsync. :param broker: DB broker object of DB to be synced :param device: device to sync to :param http: ReplConnection object :param local_id: unique ID of the local database replica :param replicate_method: remote operation to perform after rsync :param replicate_timeout: timeout to wait in seconds """ device_ip = rsync_ip(device['replication_ip']) if self.vm_test_mode: remote_file = '%s::%s%s/%s/tmp/%s' % ( device_ip, self.server_type, device['replication_port'], device['device'], local_id) else: remote_file = '%s::%s/%s/tmp/%s' % ( device_ip, self.server_type, device['device'], local_id) mtime = os.path.getmtime(broker.db_file) if not self._rsync_file(broker.db_file, remote_file): return False # perform block-level sync if the db was modified during the first sync if os.path.exists(broker.db_file + '-journal') or \ os.path.getmtime(broker.db_file) > mtime: # grab a lock so nobody else can modify it with broker.lock(): if not self._rsync_file(broker.db_file, remote_file, False): return False with Timeout(replicate_timeout or self.node_timeout): response = http.replicate(replicate_method, local_id) return response and response.status >= 200 and response.status < 300 def _usync_db(self, point, broker, http, remote_id, local_id): """ Sync a db by sending all records since the last sync. :param point: synchronization high water mark between the replicas :param broker: database broker object :param http: ReplConnection object for the remote server :param remote_id: database id for the remote replica :param local_id: database id for the local replica :returns: boolean indicating completion and success """ self.stats['diff'] += 1 self.logger.increment('diffs') self.logger.debug('Syncing chunks with %s, starting at %s', http.host, point) sync_table = broker.get_syncs() objects = broker.get_items_since(point, self.per_diff) diffs = 0 while len(objects) and diffs < self.max_diffs: diffs += 1 with Timeout(self.node_timeout): response = http.replicate('merge_items', objects, local_id) if not response or response.status >= 300 or response.status < 200: if response: self.logger.error(_('ERROR Bad response %(status)s from ' '%(host)s'), {'status': response.status, 'host': http.host}) return False # replication relies on db order to send the next merge batch in # order with no gaps point = objects[-1]['ROWID'] objects = broker.get_items_since(point, self.per_diff) if objects: self.logger.debug( 'Synchronization for %s has fallen more than ' '%s rows behind; moving on and will try again next pass.', broker, self.max_diffs * self.per_diff) self.stats['diff_capped'] += 1 self.logger.increment('diff_caps') else: with Timeout(self.node_timeout): response = http.replicate('merge_syncs', sync_table) if response and response.status >= 200 and response.status < 300: broker.merge_syncs([{'remote_id': remote_id, 'sync_point': point}], incoming=False) return True return False def _in_sync(self, rinfo, info, broker, local_sync): """ Determine whether or not two replicas of a databases are considered to be in sync. :param rinfo: remote database info :param info: local database info :param broker: database broker object :param local_sync: cached last sync point between replicas :returns: boolean indicating whether or not the replicas are in sync """ if max(rinfo['point'], local_sync) >= info['max_row']: self.stats['no_change'] += 1 self.logger.increment('no_changes') return True if rinfo['hash'] == info['hash']: self.stats['hashmatch'] += 1 self.logger.increment('hashmatches') broker.merge_syncs([{'remote_id': rinfo['id'], 'sync_point': rinfo['point']}], incoming=False) return True def _http_connect(self, node, partition, db_file): """ Make an http_connection using ReplConnection :param node: node dictionary from the ring :param partition: partition partition to send in the url :param db_file: DB file :returns: ReplConnection object """ return ReplConnection(node, partition, os.path.basename(db_file).split('.', 1)[0], self.logger) def _gather_sync_args(self, info): """ Convert local replication_info to sync args tuple. """ sync_args_order = ('max_row', 'hash', 'id', 'created_at', 'put_timestamp', 'delete_timestamp', 'metadata') return tuple(info[key] for key in sync_args_order) def _repl_to_node(self, node, broker, partition, info): """ Replicate a database to a node. :param node: node dictionary from the ring to be replicated to :param broker: DB broker for the DB to be replication :param partition: partition on the node to replicate to :param info: DB info as a dictionary of {'max_row', 'hash', 'id', 'created_at', 'put_timestamp', 'delete_timestamp', 'metadata'} :returns: True if successful, False otherwise """ http = self._http_connect(node, partition, broker.db_file) sync_args = self._gather_sync_args(info) with Timeout(self.node_timeout): response = http.replicate('sync', *sync_args) if not response: return False return self._handle_sync_response(node, response, info, broker, http) def _handle_sync_response(self, node, response, info, broker, http): if response.status == HTTP_NOT_FOUND: # completely missing, rsync self.stats['rsync'] += 1 self.logger.increment('rsyncs') return self._rsync_db(broker, node, http, info['id']) elif response.status == HTTP_INSUFFICIENT_STORAGE: raise DriveNotMounted() elif response.status >= 200 and response.status < 300: rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) if self._in_sync(rinfo, info, broker, local_sync): return True # if the difference in rowids between the two differs by # more than 50%, rsync then do a remote merge. if rinfo['max_row'] / float(info['max_row']) < 0.5: self.stats['remote_merge'] += 1 self.logger.increment('remote_merges') return self._rsync_db(broker, node, http, info['id'], replicate_method='rsync_then_merge', replicate_timeout=(info['count'] / 2000)) # else send diffs over to the remote server return self._usync_db(max(rinfo['point'], local_sync), broker, http, rinfo['id'], info['id']) def _post_replicate_hook(self, broker, info, responses): """ :param broker: the container that just replicated :param info: pre-replication full info dict :param responses: a list of bools indicating success from nodes """ pass def _replicate_object(self, partition, object_file, node_id): """ Replicate the db, choosing method based on whether or not it already exists on peers. :param partition: partition to be replicated to :param object_file: DB file name to be replicated :param node_id: node id of the node to be replicated to """ start_time = now = time.time() self.logger.debug('Replicating db %s', object_file) self.stats['attempted'] += 1 self.logger.increment('attempts') shouldbehere = True try: broker = self.brokerclass(object_file, pending_timeout=30) broker.reclaim(now - self.reclaim_age, now - (self.reclaim_age * 2)) info = broker.get_replication_info() bpart = self.ring.get_part( info['account'], info.get('container')) if bpart != int(partition): partition = bpart # Important to set this false here since the later check only # checks if it's on the proper device, not partition. shouldbehere = False name = '/' + quote(info['account']) if 'container' in info: name += '/' + quote(info['container']) self.logger.error( 'Found %s for %s when it should be on partition %s; will ' 'replicate out and remove.' % (object_file, name, bpart)) except (Exception, Timeout) as e: if 'no such table' in str(e): self.logger.error(_('Quarantining DB %s'), object_file) quarantine_db(broker.db_file, broker.db_type) else: self.logger.exception(_('ERROR reading db %s'), object_file) self.stats['failure'] += 1 self.logger.increment('failures') return # The db is considered deleted if the delete_timestamp value is greater # than the put_timestamp, and there are no objects. delete_timestamp = Timestamp(info.get('delete_timestamp') or 0) put_timestamp = Timestamp(info.get('put_timestamp') or 0) if delete_timestamp < (now - self.reclaim_age) and \ delete_timestamp > put_timestamp and \ info['count'] in (None, '', 0, '0'): if self.report_up_to_date(info): self.delete_db(broker) self.logger.timing_since('timing', start_time) return responses = [] nodes = self.ring.get_part_nodes(int(partition)) if shouldbehere: shouldbehere = bool([n for n in nodes if n['id'] == node_id]) # See Footnote [1] for an explanation of the repl_nodes assignment. i = 0 while i < len(nodes) and nodes[i]['id'] != node_id: i += 1 repl_nodes = nodes[i + 1:] + nodes[:i] more_nodes = self.ring.get_more_nodes(int(partition)) for node in repl_nodes: success = False try: success = self._repl_to_node(node, broker, partition, info) except DriveNotMounted: repl_nodes.append(more_nodes.next()) self.logger.error(_('ERROR Remote drive not mounted %s'), node) except (Exception, Timeout): self.logger.exception(_('ERROR syncing %(file)s with node' ' %(node)s'), {'file': object_file, 'node': node}) self.stats['success' if success else 'failure'] += 1 self.logger.increment('successes' if success else 'failures') responses.append(success) try: self._post_replicate_hook(broker, info, responses) except (Exception, Timeout): self.logger.exception('UNHANDLED EXCEPTION: in post replicate ' 'hook for %s', broker.db_file) if not shouldbehere and all(responses): # If the db shouldn't be on this node and has been successfully # synced to all of its peers, it can be removed. self.delete_db(broker) self.logger.timing_since('timing', start_time) def delete_db(self, broker): object_file = broker.db_file hash_dir = os.path.dirname(object_file) suf_dir = os.path.dirname(hash_dir) with lock_parent_directory(object_file): shutil.rmtree(hash_dir, True) try: os.rmdir(suf_dir) except OSError as err: if err.errno not in (errno.ENOENT, errno.ENOTEMPTY): self.logger.exception( _('ERROR while trying to clean up %s') % suf_dir) self.stats['remove'] += 1 device_name = self.extract_device(object_file) self.logger.increment('removes.' + device_name) def extract_device(self, object_file): """ Extract the device name from an object path. Returns "UNKNOWN" if the path could not be extracted successfully for some reason. :param object_file: the path to a database file. """ match = self.extract_device_re.match(object_file) if match: return match.groups()[0] return "UNKNOWN" def report_up_to_date(self, full_info): return True def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips() if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): if self.mount_check and not ismount( os.path.join(self.root, node['device'])): self.logger.warn( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats() def run_forever(self, *args, **kwargs): """ Replicate dbs under the given root in an infinite loop. """ sleep(random.random() * self.interval) while True: begin = time.time() try: self.run_once() except (Exception, Timeout): self.logger.exception(_('ERROR trying to replicate')) elapsed = time.time() - begin if elapsed < self.interval: sleep(self.interval - elapsed)
class btclient(Thread): def __init__(self, infohash_queue): Thread.__init__(self) self.setDaemon(True) self.infohash_queue = infohash_queue self.metadata_queue = Queue() self.dowloaded = set() self.pool = GreenPool() self.running = False def run(self): self.running = True while self.running: if self.infohash_queue.empty(): sleep(3) else: infohash, address = self.infohash_queue.get() self.pool.spawn_n(self.download_metadata, address, infohash, self.metadata_queue) def stop(self): self.running = False def metadata_queue(self): return self.metadata_queue def download_metadata(self, address, infohash, metadata_queue, timeout=5): metadata = [] start_time = time() if infohash in self.dowloaded: return try: the_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # the_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # the_socket.bind(('0.0.0.0', 9000)) the_socket.settimeout(timeout) the_socket.connect(address) # handshake send_handshake(the_socket, infohash) packet = the_socket.recv(4096) # handshake error if not check_handshake(packet, infohash): return # ext handshake send_ext_handshake(the_socket) packet = the_socket.recv(4096) # get ut_metadata and metadata_size ut_metadata, metadata_size = get_ut_metadata( packet), get_metadata_size(packet) # request each piece of metadata for piece in range(int(math.ceil(metadata_size / (16.0 * 1024)))): if infohash in self.dowloaded: break request_metadata(the_socket, ut_metadata, piece) packet = recvall(the_socket, timeout) # the_socket.recv(1024*17) metadata.append(packet[packet.index("ee") + 2:]) if '6:pieces' in packet: break except socket.timeout: logger.debug('Connect timeout to %s:%d' % address) # TODO: Maybe need NAT Traversa except socket.error as error: errno, err_msg = error if errno == 10052: logger.debug( 'Network dropped connection on reset(10052) %s:%d' % address) elif errno == 10061: logger.debug('Connection refused(10061) %s:%d' % address) else: logger.error(err_msg) except Exception: pass finally: the_socket.close() metadata = "".join(metadata) if metadata.startswith('d') and '6:pieces' in metadata: metadata = metadata[:metadata.index('6:pieces')] + 'e' try: d_metadata = bdecode(metadata) except Exception as e: logger.error(str(e) + 'metadata: ' + metadata) else: self.dowloaded.add(infohash) metadata_queue.put( (infohash, address, d_metadata, time() - start_time))
def test_connection(self): """ conn = Connection( auth_endpoint="https://identity.api.rackspacecloud.com/v2.0", client_id=str(uuid.uuid4()), endpoint="http://localhost:8888/v1/12345", user="", key="") """ conn = Connection( auth_endpoint="https://identity.api.rackspacecloud.com/v2.0", client_id=str(uuid.uuid4()), endpoint="http://166.78.143.130/v1/12345", user="", key="") conn.connect(token='blah') def create_worker(queue_name): return conn.create_queue(queue_name) def post_worker(queue): return queue.post_message('test_message', 10) def delete_worker(queue_name): conn.delete_queue(queue_name) return queue_name pool = GreenPool(100) def on_message_posted(greenthread): msg = greenthread.wait() print msg._href def on_queue_created(greenthread): queue = greenthread.wait() print queue.name for x in range(0, 10): gt = pool.spawn(post_worker, queue) gt.link(on_message_posted) queue_names = ["queue-" + str(x) for x in xrange(0, 5)] for queue_name in queue_names: gt = pool.spawn(create_worker, queue_name) gt.link(on_queue_created) pool.waitall() def delete_worker(queue_name): conn.delete_queue(queue_name) print "Queue:", queue_name, " deleted" for queue in conn.get_queues(): gt = pool.spawn_n(delete_worker, queue.name) print "Waiting for everything to finish" pool.waitall() print "Done"
def run(self, *args, **kwargs): try: self.logger.info('event agent: starting') pool = GreenPool(len(self.workers)) for worker in self.workers: pool.spawn(worker.start) def front(server, backend): while True: msg = server.recv_multipart() if validate_msg(msg): try: event_id = sqlite3.Binary(msg[2]) data = msg[3] self.queue.put(event_id, data) event = ['', msg[2], msg[3]] backend.send_multipart(event) except Exception: pass finally: ack = msg[0:3] server.send_multipart(ack) def back(backend): while True: msg = backend.recv_multipart() event_id = msg[1] success = msg[2] event_id = sqlite3.Binary(event_id) if not success: self.queue.failed(event_id) else: self.queue.delete(event_id) boss_pool = GreenPool(2) boss_pool.spawn_n(front, self.server, self.backend) boss_pool.spawn_n(back, self.backend) while True: sleep(1) now = time.time() if now - self.last_retry > self.retry_interval: self.retry() self.last_retry = now for w in self.workers: if w.failed: self.workers.remove(w) self.logger.warn('restart worker "%s"', w.name) new_w = EventWorker(self.conf, w.name, self.context) self.workers.append(new_w) pool.spawn(new_w.start) except Exception as e: self.logger.error('ERROR in main loop %s', e) raise e finally: self.logger.warn('event agent: stopping') self.stop_workers()
class Replicator(Daemon): """ Implements the logic for directing db replication. """ def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(conf, log_route='replicator') self.root = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.bind_ip = conf.get('bind_ip', '0.0.0.0') self.port = int(conf.get('bind_port', self.default_port)) concurrency = int(conf.get('concurrency', 8)) self.cpool = GreenPool(size=concurrency) swift_dir = conf.get('swift_dir', '/etc/swift') self.ring = ring.Ring(swift_dir, ring_name=self.server_type) self._local_device_ids = set() self.per_diff = int(conf.get('per_diff', 1000)) self.max_diffs = int(conf.get('max_diffs') or 100) self.interval = int(conf.get('interval') or conf.get('run_pause') or 30) if 'run_pause' in conf and 'interval' not in conf: self.logger.warning('Option %(type)s-replicator/run_pause ' 'is deprecated and will be removed in a ' 'future version. Update your configuration' ' to use option %(type)s-replicator/' 'interval.' % {'type': self.server_type}) self.databases_per_second = int( conf.get('databases_per_second', 50)) self.node_timeout = float(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.rsync_compress = config_true_value( conf.get('rsync_compress', 'no')) self.rsync_module = conf.get('rsync_module', '').rstrip('/') if not self.rsync_module: self.rsync_module = '{replication_ip}::%s' % self.server_type self.reclaim_age = float(conf.get('reclaim_age', 86400 * 7)) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self._zero_stats() self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.recon_replicator = '%s.recon' % self.server_type self.rcache = os.path.join(self.recon_cache_path, self.recon_replicator) self.extract_device_re = re.compile('%s%s([^%s]+)' % ( self.root, os.path.sep, os.path.sep)) self.handoffs_only = config_true_value(conf.get('handoffs_only', 'no')) def _zero_stats(self): """Zero out the stats.""" self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0, 'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0, 'remove': 0, 'empty': 0, 'remote_merge': 0, 'start': time.time(), 'diff_capped': 0, 'deferred': 0, 'failure_nodes': {}} def _report_stats(self): """Report the current stats to the logs.""" now = time.time() self.logger.info( _('Attempted to replicate %(count)d dbs in %(time).5f seconds ' '(%(rate).5f/s)'), {'count': self.stats['attempted'], 'time': now - self.stats['start'], 'rate': self.stats['attempted'] / (now - self.stats['start'] + 0.0000001)}) self.logger.info(_('Removed %(remove)d dbs') % self.stats) self.logger.info(_('%(success)s successes, %(failure)s failures') % self.stats) dump_recon_cache( {'replication_stats': self.stats, 'replication_time': now - self.stats['start'], 'replication_last': now}, self.rcache, self.logger) self.logger.info(' '.join(['%s:%s' % item for item in sorted(self.stats.items()) if item[0] in ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty', 'diff_capped', 'remote_merge')])) def _add_failure_stats(self, failure_devs_info): for node, dev in failure_devs_info: self.stats['failure'] += 1 failure_devs = self.stats['failure_nodes'].setdefault(node, {}) failure_devs.setdefault(dev, 0) failure_devs[dev] += 1 def _rsync_file(self, db_file, remote_file, whole_file=True, different_region=False): """ Sync a single file using rsync. Used by _rsync_db to handle syncing. :param db_file: file to be synced :param remote_file: remote location to sync the DB file to :param whole-file: if True, uses rsync's --whole-file flag :param different_region: if True, the destination node is in a different region :returns: True if the sync was successful, False otherwise """ popen_args = ['rsync', '--quiet', '--no-motd', '--timeout=%s' % int(math.ceil(self.node_timeout)), '--contimeout=%s' % int(math.ceil(self.conn_timeout))] if whole_file: popen_args.append('--whole-file') if self.rsync_compress and different_region: # Allow for compression, but only if the remote node is in # a different region than the local one. popen_args.append('--compress') popen_args.extend([db_file, remote_file]) proc = subprocess.Popen(popen_args) proc.communicate() if proc.returncode != 0: self.logger.error(_('ERROR rsync failed with %(code)s: %(args)s'), {'code': proc.returncode, 'args': popen_args}) return proc.returncode == 0 def _rsync_db(self, broker, device, http, local_id, replicate_method='complete_rsync', replicate_timeout=None, different_region=False): """ Sync a whole db using rsync. :param broker: DB broker object of DB to be synced :param device: device to sync to :param http: ReplConnection object :param local_id: unique ID of the local database replica :param replicate_method: remote operation to perform after rsync :param replicate_timeout: timeout to wait in seconds :param different_region: if True, the destination node is in a different region """ rsync_module = rsync_module_interpolation(self.rsync_module, device) rsync_path = '%s/tmp/%s' % (device['device'], local_id) remote_file = '%s/%s' % (rsync_module, rsync_path) mtime = os.path.getmtime(broker.db_file) if not self._rsync_file(broker.db_file, remote_file, different_region=different_region): return False # perform block-level sync if the db was modified during the first sync if os.path.exists(broker.db_file + '-journal') or \ os.path.getmtime(broker.db_file) > mtime: # grab a lock so nobody else can modify it with broker.lock(): if not self._rsync_file(broker.db_file, remote_file, whole_file=False, different_region=different_region): return False with Timeout(replicate_timeout or self.node_timeout): response = http.replicate(replicate_method, local_id, os.path.basename(broker.db_file)) return response and 200 <= response.status < 300 def _send_replicate_request(self, http, *repl_args): with Timeout(self.node_timeout): response = http.replicate(*repl_args) if not response or not is_success(response.status): if response: self.logger.error('ERROR Bad response %s from %s', response.status, http.host) return False return True def _usync_db(self, point, broker, http, remote_id, local_id): """ Sync a db by sending all records since the last sync. :param point: synchronization high water mark between the replicas :param broker: database broker object :param http: ReplConnection object for the remote server :param remote_id: database id for the remote replica :param local_id: database id for the local replica :returns: boolean indicating completion and success """ self.stats['diff'] += 1 self.logger.increment('diffs') self.logger.debug('%s usyncing chunks to %s, starting at row %s', broker.db_file, '%(ip)s:%(port)s/%(device)s' % http.node, point) start = time.time() sync_table = broker.get_syncs() objects = broker.get_items_since(point, self.per_diff) diffs = 0 while len(objects) and diffs < self.max_diffs: diffs += 1 if not self._send_replicate_request( http, 'merge_items', objects, local_id): return False # replication relies on db order to send the next merge batch in # order with no gaps point = objects[-1]['ROWID'] objects = broker.get_items_since(point, self.per_diff) self.logger.debug('%s usyncing chunks to %s, finished at row %s (%gs)', broker.db_file, '%(ip)s:%(port)s/%(device)s' % http.node, point, time.time() - start) if objects: self.logger.debug( 'Synchronization for %s has fallen more than ' '%s rows behind; moving on and will try again next pass.', broker, self.max_diffs * self.per_diff) self.stats['diff_capped'] += 1 self.logger.increment('diff_caps') else: with Timeout(self.node_timeout): response = http.replicate('merge_syncs', sync_table) if response and 200 <= response.status < 300: broker.merge_syncs([{'remote_id': remote_id, 'sync_point': point}], incoming=False) return True return False def _in_sync(self, rinfo, info, broker, local_sync): """ Determine whether or not two replicas of a databases are considered to be in sync. :param rinfo: remote database info :param info: local database info :param broker: database broker object :param local_sync: cached last sync point between replicas :returns: boolean indicating whether or not the replicas are in sync """ if max(rinfo['point'], local_sync) >= info['max_row']: self.stats['no_change'] += 1 self.logger.increment('no_changes') return True if rinfo['hash'] == info['hash']: self.stats['hashmatch'] += 1 self.logger.increment('hashmatches') broker.merge_syncs([{'remote_id': rinfo['id'], 'sync_point': rinfo['point']}], incoming=False) return True def _http_connect(self, node, partition, db_file): """ Make an http_connection using ReplConnection :param node: node dictionary from the ring :param partition: partition partition to send in the url :param db_file: DB file :returns: ReplConnection object """ hsh, other, ext = parse_db_filename(db_file) return ReplConnection(node, partition, hsh, self.logger) def _gather_sync_args(self, info): """ Convert local replication_info to sync args tuple. """ sync_args_order = ('max_row', 'hash', 'id', 'created_at', 'put_timestamp', 'delete_timestamp', 'metadata') return tuple(info[key] for key in sync_args_order) def _repl_to_node(self, node, broker, partition, info, different_region=False): """ Replicate a database to a node. :param node: node dictionary from the ring to be replicated to :param broker: DB broker for the DB to be replication :param partition: partition on the node to replicate to :param info: DB info as a dictionary of {'max_row', 'hash', 'id', 'created_at', 'put_timestamp', 'delete_timestamp', 'metadata'} :param different_region: if True, the destination node is in a different region :returns: True if successful, False otherwise """ http = self._http_connect(node, partition, broker.db_file) sync_args = self._gather_sync_args(info) with Timeout(self.node_timeout): response = http.replicate('sync', *sync_args) if not response: return False return self._handle_sync_response(node, response, info, broker, http, different_region=different_region) def _handle_sync_response(self, node, response, info, broker, http, different_region=False): if response.status == HTTP_NOT_FOUND: # completely missing, rsync self.stats['rsync'] += 1 self.logger.increment('rsyncs') return self._rsync_db(broker, node, http, info['id'], different_region=different_region) elif response.status == HTTP_INSUFFICIENT_STORAGE: raise DriveNotMounted() elif 200 <= response.status < 300: rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) if rinfo.get('metadata', ''): broker.update_metadata(json.loads(rinfo['metadata'])) return self._choose_replication_mode( node, rinfo, info, local_sync, broker, http, different_region) return False def _choose_replication_mode(self, node, rinfo, info, local_sync, broker, http, different_region): if self._in_sync(rinfo, info, broker, local_sync): self.logger.debug('%s in sync with %s, nothing to do', broker.db_file, '%(ip)s:%(port)s/%(device)s' % node) return True # if the difference in rowids between the two differs by # more than 50% and the difference is greater than per_diff, # rsync then do a remote merge. # NOTE: difference > per_diff stops us from dropping to rsync # on smaller containers, who have only a few rows to sync. if (rinfo['max_row'] / float(info['max_row']) < 0.5 and info['max_row'] - rinfo['max_row'] > self.per_diff): self.stats['remote_merge'] += 1 self.logger.increment('remote_merges') return self._rsync_db(broker, node, http, info['id'], replicate_method='rsync_then_merge', replicate_timeout=(info['count'] / 2000), different_region=different_region) # else send diffs over to the remote server return self._usync_db(max(rinfo['point'], local_sync), broker, http, rinfo['id'], info['id']) def _post_replicate_hook(self, broker, info, responses): """ :param broker: broker instance for the database that just replicated :param info: pre-replication full info dict :param responses: a list of bools indicating success from nodes """ pass def cleanup_post_replicate(self, broker, orig_info, responses): """ Cleanup non primary database from disk if needed. :param broker: the broker for the database we're replicating :param orig_info: snapshot of the broker replication info dict taken before replication :param responses: a list of boolean success values for each replication request to other nodes :return success: returns False if deletion of the database was attempted but unsuccessful, otherwise returns True. """ log_template = 'Not deleting db %s (%%s)' % broker.db_file max_row_delta = broker.get_max_row() - orig_info['max_row'] if max_row_delta < 0: reason = 'negative max_row_delta: %s' % max_row_delta self.logger.error(log_template, reason) return True if max_row_delta: reason = '%s new rows' % max_row_delta self.logger.debug(log_template, reason) return True if not (responses and all(responses)): reason = '%s/%s success' % (responses.count(True), len(responses)) self.logger.debug(log_template, reason) return True # If the db has been successfully synced to all of its peers, it can be # removed. Callers should have already checked that the db is not on a # primary node. if not self.delete_db(broker): self.logger.debug( 'Failed to delete db %s', broker.db_file) return False self.logger.debug('Successfully deleted db %s', broker.db_file) return True def _replicate_object(self, partition, object_file, node_id): """ Replicate the db, choosing method based on whether or not it already exists on peers. :param partition: partition to be replicated to :param object_file: DB file name to be replicated :param node_id: node id of the node to be replicated to :returns: a tuple (success, responses). ``success`` is a boolean that is True if the method completed successfully, False otherwise. ``responses`` is a list of booleans each of which indicates the success or not of replicating to a peer node if replication has been attempted. ``success`` is False if any of ``responses`` is False; when ``responses`` is empty, ``success`` may be either True or False. """ start_time = now = time.time() self.logger.debug('Replicating db %s', object_file) self.stats['attempted'] += 1 self.logger.increment('attempts') shouldbehere = True responses = [] try: broker = self.brokerclass(object_file, pending_timeout=30) broker.reclaim(now - self.reclaim_age, now - (self.reclaim_age * 2)) info = broker.get_replication_info() bpart = self.ring.get_part( info['account'], info.get('container')) if bpart != int(partition): partition = bpart # Important to set this false here since the later check only # checks if it's on the proper device, not partition. shouldbehere = False name = '/' + quote(info['account']) if 'container' in info: name += '/' + quote(info['container']) self.logger.error( 'Found %s for %s when it should be on partition %s; will ' 'replicate out and remove.' % (object_file, name, bpart)) except (Exception, Timeout) as e: if 'no such table' in str(e): self.logger.error(_('Quarantining DB %s'), object_file) quarantine_db(broker.db_file, broker.db_type) else: self.logger.exception(_('ERROR reading db %s'), object_file) nodes = self.ring.get_part_nodes(int(partition)) self._add_failure_stats([(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes]) self.logger.increment('failures') return False, responses if broker.is_reclaimable(now, self.reclaim_age): if self.report_up_to_date(info): self.delete_db(broker) self.logger.timing_since('timing', start_time) return True, responses failure_devs_info = set() nodes = self.ring.get_part_nodes(int(partition)) local_dev = None for node in nodes: if node['id'] == node_id: local_dev = node break if shouldbehere: shouldbehere = bool([n for n in nodes if n['id'] == node_id]) # See Footnote [1] for an explanation of the repl_nodes assignment. if len(nodes) > 1: i = 0 while i < len(nodes) and nodes[i]['id'] != node_id: i += 1 repl_nodes = nodes[i + 1:] + nodes[:i] else: # Special case if using only a single replica repl_nodes = nodes more_nodes = self.ring.get_more_nodes(int(partition)) if not local_dev: # Check further if local device is a handoff node for node in self.ring.get_more_nodes(int(partition)): if node['id'] == node_id: local_dev = node break for node in repl_nodes: different_region = False if local_dev and local_dev['region'] != node['region']: # This additional information will help later if we # want to handle syncing to a node in different # region with some optimizations. different_region = True success = False try: success = self._repl_to_node(node, broker, partition, info, different_region) except DriveNotMounted: try: repl_nodes.append(next(more_nodes)) except StopIteration: self.logger.error( _('ERROR There are not enough handoff nodes to reach ' 'replica count for partition %s'), partition) self.logger.error(_('ERROR Remote drive not mounted %s'), node) except (Exception, Timeout): self.logger.exception(_('ERROR syncing %(file)s with node' ' %(node)s'), {'file': object_file, 'node': node}) if not success: failure_devs_info.add((node['replication_ip'], node['device'])) self.logger.increment('successes' if success else 'failures') responses.append(success) try: self._post_replicate_hook(broker, info, responses) except (Exception, Timeout): self.logger.exception('UNHANDLED EXCEPTION: in post replicate ' 'hook for %s', broker.db_file) if not shouldbehere: if not self.cleanup_post_replicate(broker, info, responses): failure_devs_info.update( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in repl_nodes]) target_devs_info = set([(target_dev['replication_ip'], target_dev['device']) for target_dev in repl_nodes]) self.stats['success'] += len(target_devs_info - failure_devs_info) self._add_failure_stats(failure_devs_info) self.logger.timing_since('timing', start_time) if shouldbehere: responses.append(True) return all(responses), responses def delete_db(self, broker): object_file = broker.db_file hash_dir = os.path.dirname(object_file) suf_dir = os.path.dirname(hash_dir) with lock_parent_directory(object_file): shutil.rmtree(hash_dir, True) try: os.rmdir(suf_dir) except OSError as err: if err.errno not in (errno.ENOENT, errno.ENOTEMPTY): self.logger.exception( _('ERROR while trying to clean up %s') % suf_dir) return False self.stats['remove'] += 1 device_name = self.extract_device(object_file) self.logger.increment('removes.' + device_name) return True def extract_device(self, object_file): """ Extract the device name from an object path. Returns "UNKNOWN" if the path could not be extracted successfully for some reason. :param object_file: the path to a database file. """ match = self.extract_device_re.match(object_file) if match: return match.groups()[0] return "UNKNOWN" def _partition_dir_filter(self, device_id, partitions_to_replicate): def filt(partition_dir): partition = int(partition_dir) if self.handoffs_only: primary_node_ids = [ d['id'] for d in self.ring.get_part_nodes(partition)] if device_id in primary_node_ids: return False if partition not in partitions_to_replicate: return False return True return filt def report_up_to_date(self, full_info): return True def roundrobin_datadirs(self, dirs): return RateLimitedIterator( roundrobin_datadirs(dirs), elements_per_second=self.databases_per_second) def run_once(self, *args, **kwargs): """Run a replication pass once.""" override_options = parse_override_options(once=True, **kwargs) devices_to_replicate = override_options.devices or Everything() partitions_to_replicate = override_options.partitions or Everything() self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return if self.handoffs_only: self.logger.warning( 'Starting replication pass with handoffs_only enabled. ' 'This mode is not intended for normal ' 'operation; use handoffs_only with care.') self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True try: dev_path = check_drive(self.root, node['device'], self.mount_check) except ValueError as err: self._add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev]) self.logger.warning('Skipping: %s', err) continue if node['device'] not in devices_to_replicate: self.logger.debug( 'Skipping device %s due to given arguments', node['device']) continue unlink_older_than( os.path.join(dev_path, 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) part_filt = self._partition_dir_filter( node['id'], partitions_to_replicate) dirs.append((datadir, node['id'], part_filt)) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in self.roundrobin_datadirs(dirs): self.cpool.spawn_n( self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) if self.handoffs_only: self.logger.warning( 'Finished replication pass with handoffs_only enabled. ' 'If handoffs_only is no longer required, disable it.') self._report_stats() def run_forever(self, *args, **kwargs): """ Replicate dbs under the given root in an infinite loop. """ sleep(random.random() * self.interval) while True: begin = time.time() try: self.run_once() except (Exception, Timeout): self.logger.exception(_('ERROR trying to replicate')) elapsed = time.time() - begin if elapsed < self.interval: sleep(self.interval - elapsed)
order = Order(id=nextid, exchange=d['exchange'], ticker=d['ticker'], price=oprice, volume=d['volume'], type=d['action']) ORDERS[nextid] = order logger.info("ORDER: %s", order) resp = { 'order_id': nextid, 'price': oprice, 'profit': profit, 'retcode': 0 } nextid += 1 # Unknown action otherwise socket.send(bytes(json.dumps(resp), 'utf-8')) # Spawn green threads logging.basicConfig(level=logging.INFO) pool = GreenPool() try: pool.spawn_n(handle_tick) pool.spawn_n(handle_broker) pool.waitall() # Loops forever finally: # There might some orphan orders left over print("ORPHANS:", ORDERS)
class ProfileMiddleware(object): def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='profile') self.log_filename_prefix = conf.get('log_filename_prefix', DEFAULT_PROFILE_PREFIX) dirname = os.path.dirname(self.log_filename_prefix) # Notes: this effort may fail due to permission denied. # it is better to be created and authorized to current # user in advance. if not os.path.exists(dirname): os.makedirs(dirname) self.dump_interval = float(conf.get('dump_interval', 5.0)) self.dump_timestamp = config_true_value( conf.get('dump_timestamp', 'no')) self.flush_at_shutdown = config_true_value( conf.get('flush_at_shutdown', 'no')) self.path = conf.get('path', '__profile__').replace('/', '') self.unwind = config_true_value(conf.get('unwind', 'no')) self.profile_module = conf.get('profile_module', 'eventlet.green.profile') self.profiler = get_profiler(self.profile_module) self.profile_log = ProfileLog(self.log_filename_prefix, self.dump_timestamp) self.viewer = HTMLViewer(self.path, self.profile_module, self.profile_log) self.dump_pool = GreenPool(1000) self.last_dump_at = None def __del__(self): if self.flush_at_shutdown: self.profile_log.clear(str(os.getpid())) def _combine_body_qs(self, request): wsgi_input = request.environ['wsgi.input'] query_dict = request.params qs_in_body = wsgi_input.read() query_dict.update( parse_qs(qs_in_body, keep_blank_values=True, strict_parsing=False)) return query_dict def dump_checkpoint(self): current_time = time.time() if self.last_dump_at is None or self.last_dump_at +\ self.dump_interval < current_time: self.dump_pool.spawn_n(self.profile_log.dump_profile, self.profiler, os.getpid()) self.last_dump_at = current_time def __call__(self, environ, start_response): request = Request(environ) path_entry = request.path_info.split('/') # hijack favicon request sent by browser so that it doesn't # invoke profiling hook and contaminate the data. if path_entry[1] == 'favicon.ico': start_response('200 OK', []) return '' elif path_entry[1] == self.path: try: self.dump_checkpoint() query_dict = self._combine_body_qs(request) content, headers = self.viewer.render(request.url, request.method, path_entry, query_dict, self.renew_profile) start_response('200 OK', headers) return [bytes_(content)] except MethodNotAllowed as mx: start_response('405 Method Not Allowed', []) return '%s' % mx except NotFoundException as nx: start_response('404 Not Found', []) return '%s' % nx except ProfileException as pf: start_response('500 Internal Server Error', []) return '%s' % pf except Exception as ex: start_response('500 Internal Server Error', []) return _('Error on render profiling results: %s') % ex else: try: _locals = locals() code = self.unwind and PROFILE_EXEC_EAGER or\ PROFILE_EXEC_LAZY self.profiler.runctx(code, globals(), _locals) app_iter = _locals['app_iter_'] self.dump_checkpoint() return app_iter except: self.logger.exception(_('Error profiling code')) finally: pass def renew_profile(self): self.profiler = get_profiler(self.profile_module)
if len(messages) > 0: last_message = messages[-1] ttl = int(last_message['body']['ttl']) reset_count = int(last_message['body']['work_item_count']) if reset_count >= 0: work_item_count = reset_count print 'New control message: ttl=%d, work_item_count=%d' % \ (ttl, reset_count) except Exception as ex: print ex eventlet.sleep(1) pool.spawn_n(get_worker_information) def post_stats(): global work_item_count s = socket.socket() s.connect((sys.argv[2], int(sys.argv[3]))) while True: start_time = time.time() graphite_message = 'openstack.worker.result.sum %d %d\n' % ( work_item_count, int(time.time())) s.sendall(graphite_message) elapsed_time = time.time() - start_time
global rate, ttl queue = conn.create_queue('openstack-producer-controller') while True: messages = list(queue.get_messages(restart=True)) if len(messages) > 0: last_message = messages[-1] rate = last_message['body']['rate'] ttl = last_message['body']['ttl'] else: rate, ttl = 10, 60 eventlet.sleep(1) pool.spawn_n(get_production_information) def post_work(): global rate, ttl, messages_created queue = conn.create_queue('openstack-tasks') job_types = {0: 'prime', 1: 'fibonacci'} while True: if rate == 0: eventlet.sleep(1) continue else: eventlet.sleep(1.0 / rate) job_type = random.randint(0, 1)
class Concurrency(object): """ Convenience class to support concurrency, if Eventlet is available; otherwise it just performs at single concurrency. :param concurrency: The level of concurrency desired. Default: 10 """ def __init__(self, concurrency=10): self.concurrency = concurrency if self.concurrency and GreenPool: self._pool = GreenPool(self.concurrency) else: self._pool = None self._queue = Queue.Queue() self._results = {} def _spawner(self, ident, func, *args, **kwargs): exc_type = exc_value = exc_tb = result = None try: result = func(*args, **kwargs) except (Exception, Timeout): exc_type, exc_value, exc_tb = sys.exc_info() self._queue.put((ident, (exc_type, exc_value, exc_tb, result))) def spawn(self, ident, func, *args, **kwargs): """ Returns immediately to the caller and begins executing the func in the background. Use get_results and the ident given to retrieve the results of the func. If the func causes an exception, this exception will be caught and the sys.exc_info() will be returned via get_results. :param ident: An identifier to find the results of the func from get_results. This identifier can be anything unique to the Concurrency instance. :param func: The function to execute concurrently. :param args: The args to give the func. :param kwargs: The keyword args to the give the func. :returns: None """ if self._pool: self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs) sleep() else: self._spawner(ident, func, *args, **kwargs) def get_results(self): """ Returns a dict of the results currently available. The keys are the ident values given with the calls to spawn. The values are tuples of (exc_type, exc_value, exc_tb, result) where: ========= ============================================ exc_type The type of any exception raised. exc_value The actual exception if any was raised. exc_tb The traceback if any exception was raised. result If no exception was raised, this will be the return value of the called function. ========= ============================================ """ try: while True: ident, value = self._queue.get(block=False) self._results[ident] = value except Queue.Empty: pass return self._results def join(self): """ Blocks until all currently pending functions have finished. """ if self._pool: self._pool.waitall()
def run(self, *args, **kwargs): try: self.logger.info('event agent: starting') pool = GreenPool(len(self.workers)) for worker in self.workers: pool.spawn(worker.start) def front(server, backend): while True: msg = server.recv_multipart() if validate_msg(msg): try: event_id = msg[2] data = msg[3] self.queue.put(event_id, data) event = ['', msg[2], msg[3]] backend.send_multipart(event) except Exception: pass finally: ack = msg[0:3] server.send_multipart(ack) def back(backend): while True: msg = backend.recv_multipart() event_id = msg[1] success = msg[2] if not success: self.queue.failed(event_id) self.logger.warn('event %s moved to failed', binascii.hexlify(event_id)) else: self.queue.delete(event_id) self.logger.debug('event %s removed from queue', binascii.hexlify(event_id)) boss_pool = GreenPool(2) boss_pool.spawn_n(front, self.server, self.backend) boss_pool.spawn_n(back, self.backend) while True: results = self.queue.load(self.batch_size) for event in results: event_id, data = event msg = ['', event_id, str(data)] self.backend.send_multipart(msg) self.retries_run_time = ratelimit( self.retries_run_time, self.max_retries_per_second) for w in self.workers: if w.failed: self.workers.remove(w) self.logger.warn('restart worker "%s"', w.name) new_w = EventWorker(self.conf, w.name, self.context) self.workers.append(new_w) pool.spawn(new_w.start) sleep(SLEEP_TIME) except Exception as e: self.logger.error('ERROR in main loop %s', e) raise finally: self.logger.warn('event agent: stopping') self.stop_workers() self.context.destroy(linger=True) self.context = None
class Replicator(Daemon): """ Implements the logic for directing db replication. """ def __init__(self, conf, logger=None): self.conf = conf self.logger = logger or get_logger(conf, log_route='replicator') self.root = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.bind_ip = conf.get('bind_ip', '0.0.0.0') self.port = int(conf.get('bind_port', self.default_port)) concurrency = int(conf.get('concurrency', 8)) self.cpool = GreenPool(size=concurrency) swift_dir = conf.get('swift_dir', '/etc/swift') self.ring = ring.Ring(swift_dir, ring_name=self.server_type) self._local_device_ids = set() self.per_diff = int(conf.get('per_diff', 1000)) self.max_diffs = int(conf.get('max_diffs') or 100) self.interval = int( conf.get('interval') or conf.get('run_pause') or 30) self.node_timeout = float(conf.get('node_timeout', 10)) self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.rsync_compress = config_true_value( conf.get('rsync_compress', 'no')) self.rsync_module = conf.get('rsync_module', '').rstrip('/') if not self.rsync_module: self.rsync_module = '{replication_ip}::%s' % self.server_type self.reclaim_age = float(conf.get('reclaim_age', 86400 * 7)) swift.common.db.DB_PREALLOCATION = \ config_true_value(conf.get('db_preallocation', 'f')) self._zero_stats() self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.recon_replicator = '%s.recon' % self.server_type self.rcache = os.path.join(self.recon_cache_path, self.recon_replicator) self.extract_device_re = re.compile( '%s%s([^%s]+)' % (self.root, os.path.sep, os.path.sep)) def _zero_stats(self): """Zero out the stats.""" self.stats = { 'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0, 'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0, 'remove': 0, 'empty': 0, 'remote_merge': 0, 'start': time.time(), 'diff_capped': 0, 'failure_nodes': {} } def _report_stats(self): """Report the current stats to the logs.""" now = time.time() self.logger.info( _('Attempted to replicate %(count)d dbs in %(time).5f seconds ' '(%(rate).5f/s)'), { 'count': self.stats['attempted'], 'time': now - self.stats['start'], 'rate': self.stats['attempted'] / (now - self.stats['start'] + 0.0000001) }) self.logger.info(_('Removed %(remove)d dbs') % self.stats) self.logger.info( _('%(success)s successes, %(failure)s failures') % self.stats) dump_recon_cache( { 'replication_stats': self.stats, 'replication_time': now - self.stats['start'], 'replication_last': now }, self.rcache, self.logger) self.logger.info(' '.join([ '%s:%s' % item for item in sorted(self.stats.items()) if item[0] in ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty', 'diff_capped', 'remote_merge') ])) def _add_failure_stats(self, failure_devs_info): for node, dev in failure_devs_info: self.stats['failure'] += 1 failure_devs = self.stats['failure_nodes'].setdefault(node, {}) failure_devs.setdefault(dev, 0) failure_devs[dev] += 1 def _rsync_file(self, db_file, remote_file, whole_file=True, different_region=False): """ Sync a single file using rsync. Used by _rsync_db to handle syncing. :param db_file: file to be synced :param remote_file: remote location to sync the DB file to :param whole-file: if True, uses rsync's --whole-file flag :param different_region: if True, the destination node is in a different region :returns: True if the sync was successful, False otherwise """ popen_args = [ 'rsync', '--quiet', '--no-motd', '--timeout=%s' % int(math.ceil(self.node_timeout)), '--contimeout=%s' % int(math.ceil(self.conn_timeout)) ] if whole_file: popen_args.append('--whole-file') if self.rsync_compress and different_region: # Allow for compression, but only if the remote node is in # a different region than the local one. popen_args.append('--compress') popen_args.extend([db_file, remote_file]) proc = subprocess.Popen(popen_args) proc.communicate() if proc.returncode != 0: self.logger.error(_('ERROR rsync failed with %(code)s: %(args)s'), { 'code': proc.returncode, 'args': popen_args }) return proc.returncode == 0 def _rsync_db(self, broker, device, http, local_id, replicate_method='complete_rsync', replicate_timeout=None, different_region=False): """ Sync a whole db using rsync. :param broker: DB broker object of DB to be synced :param device: device to sync to :param http: ReplConnection object :param local_id: unique ID of the local database replica :param replicate_method: remote operation to perform after rsync :param replicate_timeout: timeout to wait in seconds :param different_region: if True, the destination node is in a different region """ rsync_module = rsync_module_interpolation(self.rsync_module, device) rsync_path = '%s/tmp/%s' % (device['device'], local_id) remote_file = '%s/%s' % (rsync_module, rsync_path) mtime = os.path.getmtime(broker.db_file) if not self._rsync_file(broker.db_file, remote_file, different_region=different_region): return False # perform block-level sync if the db was modified during the first sync if os.path.exists(broker.db_file + '-journal') or \ os.path.getmtime(broker.db_file) > mtime: # grab a lock so nobody else can modify it with broker.lock(): if not self._rsync_file(broker.db_file, remote_file, whole_file=False, different_region=different_region): return False with Timeout(replicate_timeout or self.node_timeout): response = http.replicate(replicate_method, local_id) return response and 200 <= response.status < 300 def _usync_db(self, point, broker, http, remote_id, local_id): """ Sync a db by sending all records since the last sync. :param point: synchronization high water mark between the replicas :param broker: database broker object :param http: ReplConnection object for the remote server :param remote_id: database id for the remote replica :param local_id: database id for the local replica :returns: boolean indicating completion and success """ self.stats['diff'] += 1 self.logger.increment('diffs') self.logger.debug('Syncing chunks with %s, starting at %s', http.host, point) sync_table = broker.get_syncs() objects = broker.get_items_since(point, self.per_diff) diffs = 0 while len(objects) and diffs < self.max_diffs: diffs += 1 with Timeout(self.node_timeout): response = http.replicate('merge_items', objects, local_id) if not response or response.status >= 300 or response.status < 200: if response: self.logger.error( _('ERROR Bad response %(status)s from ' '%(host)s'), { 'status': response.status, 'host': http.host }) return False # replication relies on db order to send the next merge batch in # order with no gaps point = objects[-1]['ROWID'] objects = broker.get_items_since(point, self.per_diff) if objects: self.logger.debug( 'Synchronization for %s has fallen more than ' '%s rows behind; moving on and will try again next pass.', broker, self.max_diffs * self.per_diff) self.stats['diff_capped'] += 1 self.logger.increment('diff_caps') else: with Timeout(self.node_timeout): response = http.replicate('merge_syncs', sync_table) if response and 200 <= response.status < 300: broker.merge_syncs([{ 'remote_id': remote_id, 'sync_point': point }], incoming=False) return True return False def _in_sync(self, rinfo, info, broker, local_sync): """ Determine whether or not two replicas of a databases are considered to be in sync. :param rinfo: remote database info :param info: local database info :param broker: database broker object :param local_sync: cached last sync point between replicas :returns: boolean indicating whether or not the replicas are in sync """ if max(rinfo['point'], local_sync) >= info['max_row']: self.stats['no_change'] += 1 self.logger.increment('no_changes') return True if rinfo['hash'] == info['hash']: self.stats['hashmatch'] += 1 self.logger.increment('hashmatches') broker.merge_syncs([{ 'remote_id': rinfo['id'], 'sync_point': rinfo['point'] }], incoming=False) return True def _http_connect(self, node, partition, db_file): """ Make an http_connection using ReplConnection :param node: node dictionary from the ring :param partition: partition partition to send in the url :param db_file: DB file :returns: ReplConnection object """ return ReplConnection(node, partition, os.path.basename(db_file).split('.', 1)[0], self.logger) def _gather_sync_args(self, info): """ Convert local replication_info to sync args tuple. """ sync_args_order = ('max_row', 'hash', 'id', 'created_at', 'put_timestamp', 'delete_timestamp', 'metadata') return tuple(info[key] for key in sync_args_order) def _repl_to_node(self, node, broker, partition, info, different_region=False): """ Replicate a database to a node. :param node: node dictionary from the ring to be replicated to :param broker: DB broker for the DB to be replication :param partition: partition on the node to replicate to :param info: DB info as a dictionary of {'max_row', 'hash', 'id', 'created_at', 'put_timestamp', 'delete_timestamp', 'metadata'} :param different_region: if True, the destination node is in a different region :returns: True if successful, False otherwise """ http = self._http_connect(node, partition, broker.db_file) sync_args = self._gather_sync_args(info) with Timeout(self.node_timeout): response = http.replicate('sync', *sync_args) if not response: return False return self._handle_sync_response(node, response, info, broker, http, different_region=different_region) def _handle_sync_response(self, node, response, info, broker, http, different_region=False): if response.status == HTTP_NOT_FOUND: # completely missing, rsync self.stats['rsync'] += 1 self.logger.increment('rsyncs') return self._rsync_db(broker, node, http, info['id'], different_region=different_region) elif response.status == HTTP_INSUFFICIENT_STORAGE: raise DriveNotMounted() elif 200 <= response.status < 300: rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) if self._in_sync(rinfo, info, broker, local_sync): return True # if the difference in rowids between the two differs by # more than 50% and the difference is greater than per_diff, # rsync then do a remote merge. # NOTE: difference > per_diff stops us from dropping to rsync # on smaller containers, who have only a few rows to sync. if rinfo['max_row'] / float(info['max_row']) < 0.5 and \ info['max_row'] - rinfo['max_row'] > self.per_diff: self.stats['remote_merge'] += 1 self.logger.increment('remote_merges') return self._rsync_db(broker, node, http, info['id'], replicate_method='rsync_then_merge', replicate_timeout=(info['count'] / 2000), different_region=different_region) # else send diffs over to the remote server return self._usync_db(max(rinfo['point'], local_sync), broker, http, rinfo['id'], info['id']) def _post_replicate_hook(self, broker, info, responses): """ :param broker: the container that just replicated :param info: pre-replication full info dict :param responses: a list of bools indicating success from nodes """ pass def _replicate_object(self, partition, object_file, node_id): """ Replicate the db, choosing method based on whether or not it already exists on peers. :param partition: partition to be replicated to :param object_file: DB file name to be replicated :param node_id: node id of the node to be replicated to """ start_time = now = time.time() self.logger.debug('Replicating db %s', object_file) self.stats['attempted'] += 1 self.logger.increment('attempts') shouldbehere = True try: broker = self.brokerclass(object_file, pending_timeout=30) broker.reclaim(now - self.reclaim_age, now - (self.reclaim_age * 2)) info = broker.get_replication_info() bpart = self.ring.get_part(info['account'], info.get('container')) if bpart != int(partition): partition = bpart # Important to set this false here since the later check only # checks if it's on the proper device, not partition. shouldbehere = False name = '/' + quote(info['account']) if 'container' in info: name += '/' + quote(info['container']) self.logger.error( 'Found %s for %s when it should be on partition %s; will ' 'replicate out and remove.' % (object_file, name, bpart)) except (Exception, Timeout) as e: if 'no such table' in str(e): self.logger.error(_('Quarantining DB %s'), object_file) quarantine_db(broker.db_file, broker.db_type) else: self.logger.exception(_('ERROR reading db %s'), object_file) nodes = self.ring.get_part_nodes(int(partition)) self._add_failure_stats([(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes]) self.logger.increment('failures') return # The db is considered deleted if the delete_timestamp value is greater # than the put_timestamp, and there are no objects. delete_timestamp = Timestamp(info.get('delete_timestamp') or 0) put_timestamp = Timestamp(info.get('put_timestamp') or 0) if (now - self.reclaim_age) > delete_timestamp > put_timestamp and \ info['count'] in (None, '', 0, '0'): if self.report_up_to_date(info): self.delete_db(broker) self.logger.timing_since('timing', start_time) return responses = [] failure_devs_info = set() nodes = self.ring.get_part_nodes(int(partition)) local_dev = None for node in nodes: if node['id'] == node_id: local_dev = node break if shouldbehere: shouldbehere = bool([n for n in nodes if n['id'] == node_id]) # See Footnote [1] for an explanation of the repl_nodes assignment. if len(nodes) > 1: i = 0 while i < len(nodes) and nodes[i]['id'] != node_id: i += 1 repl_nodes = nodes[i + 1:] + nodes[:i] else: # Special case if using only a single replica repl_nodes = nodes more_nodes = self.ring.get_more_nodes(int(partition)) if not local_dev: # Check further if local device is a handoff node for node in self.ring.get_more_nodes(int(partition)): if node['id'] == node_id: local_dev = node break for node in repl_nodes: different_region = False if local_dev and local_dev['region'] != node['region']: # This additional information will help later if we # want to handle syncing to a node in different # region with some optimizations. different_region = True success = False try: success = self._repl_to_node(node, broker, partition, info, different_region) except DriveNotMounted: try: repl_nodes.append(next(more_nodes)) except StopIteration: self.logger.error( _('ERROR There are not enough handoff nodes to reach ' 'replica count for partition %s'), partition) self.logger.error(_('ERROR Remote drive not mounted %s'), node) except (Exception, Timeout): self.logger.exception( _('ERROR syncing %(file)s with node' ' %(node)s'), { 'file': object_file, 'node': node }) if not success: failure_devs_info.add((node['replication_ip'], node['device'])) self.logger.increment('successes' if success else 'failures') responses.append(success) try: self._post_replicate_hook(broker, info, responses) except (Exception, Timeout): self.logger.exception( 'UNHANDLED EXCEPTION: in post replicate ' 'hook for %s', broker.db_file) if not shouldbehere and responses and all(responses): # If the db shouldn't be on this node and has been successfully # synced to all of its peers, it can be removed. if not self.delete_db(broker): failure_devs_info.update([(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in repl_nodes]) target_devs_info = set([(target_dev['replication_ip'], target_dev['device']) for target_dev in repl_nodes]) self.stats['success'] += len(target_devs_info - failure_devs_info) self._add_failure_stats(failure_devs_info) self.logger.timing_since('timing', start_time) def delete_db(self, broker): object_file = broker.db_file hash_dir = os.path.dirname(object_file) suf_dir = os.path.dirname(hash_dir) with lock_parent_directory(object_file): shutil.rmtree(hash_dir, True) try: os.rmdir(suf_dir) except OSError as err: if err.errno not in (errno.ENOENT, errno.ENOTEMPTY): self.logger.exception( _('ERROR while trying to clean up %s') % suf_dir) return False self.stats['remove'] += 1 device_name = self.extract_device(object_file) self.logger.increment('removes.' + device_name) return True def extract_device(self, object_file): """ Extract the device name from an object path. Returns "UNKNOWN" if the path could not be extracted successfully for some reason. :param object_file: the path to a database file. """ match = self.extract_device_re.match(object_file) if match: return match.groups()[0] return "UNKNOWN" def report_up_to_date(self, full_info): return True def run_once(self, *args, **kwargs): """Run a replication pass once.""" self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): found_local = True if not check_drive(self.root, node['device'], self.mount_check): self._add_failure_stats([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in self.ring.devs if failure_dev ]) self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) if not found_local: self.logger.error( "Can't find itself %s with port %s in ring " "file, not replicating", ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n(self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) self._report_stats() def run_forever(self, *args, **kwargs): """ Replicate dbs under the given root in an infinite loop. """ sleep(random.random() * self.interval) while True: begin = time.time() try: self.run_once() except (Exception, Timeout): self.logger.exception(_('ERROR trying to replicate')) elapsed = time.time() - begin if elapsed < self.interval: sleep(self.interval - elapsed)
class Concurrency(object): """ Convenience class to support concurrency, if Eventlet is available; otherwise it just performs at single concurrency. :param concurrency: The level of concurrency desired. Default: 10 """ def __init__(self, concurrency=10): self.concurrency = concurrency if self.concurrency and GreenPool: self._pool = GreenPool(self.concurrency) else: self._pool = None self._queue = Queue.Queue() self._results = {} def _spawner(self, ident, func, *args, **kwargs): exc_type = exc_value = exc_tb = result = None try: result = func(*args, **kwargs) except (Exception, Timeout): exc_type, exc_value, exc_tb = sys.exc_info() self._queue.put((ident, (exc_type, exc_value, exc_tb, result))) def spawn(self, ident, func, *args, **kwargs): """ Returns immediately to the caller and begins executing the func in the background. Use get_results and the ident given to retrieve the results of the func. If the func causes an exception, this exception will be caught and the sys.exc_info() will be returned via get_results. :param ident: An identifier to find the results of the func from get_results. This identifier can be anything unique to the Concurrency instance. :param func: The function to execute concurrently. :param args: The args to give the func. :param kwargs: The keyword args to the give the func. :returns: None """ if self._pool: self._pool.spawn_n(self._spawner, ident, func, *args, **kwargs) else: self._spawner(ident, func, *args, **kwargs) def get_results(self): """ Returns a dict of the results currently available. The keys are the ident values given with the calls to spawn. The values are tuples of (exc_type, exc_value, exc_tb, result) where: ========= ============================================ exc_type The type of any exception raised. exc_value The actual exception if any was raised. exc_tb The traceback if any exception was raised. result If no exception was raised, this will be the return value of the called function. ========= ============================================ """ try: while True: ident, value = self._queue.get(block=False) self._results[ident] = value except Queue.Empty: pass return self._results def join(self): """ Blocks until all currently pending functions have finished. """ if self._pool: self._pool.waitall()
global ttl queue = conn.get_queue('openstack-worker-controller') while True: messages = list(queue.get_messages(restart=True)) if len(messages) > 0: last_message = messages[-1] ttl = last_message['body']['ttl'] else: ttl = 60 eventlet.sleep(1) pool.spawn_n(get_worker_information) queue_tasks = conn.get_queue('openstack-tasks') queue_results = conn.get_queue('openstack-responses') s = socket.socket() s.connect((sys.argv[2], int(sys.argv[3]))) work_item_count = 0 while True: claim = queue_tasks.claim(ttl=60, grace=60) for msg in claim.messages: #print claim['ttl'] msg_body = msg['body']
class ProfileMiddleware(object): def __init__(self, app, conf): self.app = app self.logger = get_logger(conf, log_route='profile') self.log_filename_prefix = conf.get('log_filename_prefix', DEFAULT_PROFILE_PREFIX) dirname = os.path.dirname(self.log_filename_prefix) # Notes: this effort may fail due to permission denied. # it is better to be created and authorized to current # user in advance. if not os.path.exists(dirname): os.makedirs(dirname) self.dump_interval = float(conf.get('dump_interval', 5.0)) self.dump_timestamp = config_true_value(conf.get( 'dump_timestamp', 'no')) self.flush_at_shutdown = config_true_value(conf.get( 'flush_at_shutdown', 'no')) self.path = conf.get('path', '__profile__').replace('/', '') self.unwind = config_true_value(conf.get('unwind', 'no')) self.profile_module = conf.get('profile_module', 'eventlet.green.profile') self.profiler = get_profiler(self.profile_module) self.profile_log = ProfileLog(self.log_filename_prefix, self.dump_timestamp) self.viewer = HTMLViewer(self.path, self.profile_module, self.profile_log) self.dump_pool = GreenPool(1000) self.last_dump_at = None def __del__(self): if self.flush_at_shutdown: self.profile_log.clear(str(os.getpid())) def _combine_body_qs(self, request): wsgi_input = request.environ['wsgi.input'] query_dict = request.params qs_in_body = wsgi_input.read() query_dict.update(parse_qs(qs_in_body, keep_blank_values=True, strict_parsing=False)) return query_dict def dump_checkpoint(self): current_time = time.time() if self.last_dump_at is None or self.last_dump_at +\ self.dump_interval < current_time: self.dump_pool.spawn_n(self.profile_log.dump_profile, self.profiler, os.getpid()) self.last_dump_at = current_time def __call__(self, environ, start_response): request = Request(environ) path_entry = request.path_info.split('/') # hijack favicon request sent by browser so that it doesn't # invoke profiling hook and contaminate the data. if path_entry[1] == 'favicon.ico': start_response('200 OK', []) return '' elif path_entry[1] == self.path: try: self.dump_checkpoint() query_dict = self._combine_body_qs(request) content, headers = self.viewer.render(request.url, request.method, path_entry, query_dict, self.renew_profile) start_response('200 OK', headers) return [bytes_(content)] except MethodNotAllowed as mx: start_response('405 Method Not Allowed', []) return '%s' % mx except NotFoundException as nx: start_response('404 Not Found', []) return '%s' % nx except ProfileException as pf: start_response('500 Internal Server Error', []) return '%s' % pf except Exception as ex: start_response('500 Internal Server Error', []) return _('Error on render profiling results: %s') % ex else: _locals = locals() code = self.unwind and PROFILE_EXEC_EAGER or\ PROFILE_EXEC_LAZY self.profiler.runctx(code, globals(), _locals) app_iter = _locals['app_iter_'] self.dump_checkpoint() return app_iter def renew_profile(self): self.profiler = get_profiler(self.profile_module)