def take_action(self, parsed_args): self.log.debug('take_action(%s)', parsed_args) digits = self.app.client_manager.meta1_digits concurrency = parsed_args.concurrency conf = {'namespace': self.app.client_manager.namespace} if parsed_args.proxy: conf.update({'proxyd_url': parsed_args.proxy}) else: ns_conf = self.app.client_manager.sds_conf proxy = ns_conf.get('proxy') conf.update({'proxyd_url': proxy}) workers = list() with green.ContextPool(concurrency) as pool: pile = GreenPile(pool) prefix_queue = Queue(16) # Prepare some workers for _ in range(concurrency): worker = WarmupWorker(self.app.client_manager.client_conf, self.log) workers.append(worker) pile.spawn(worker.run, prefix_queue) # Feed the queue trace_increment = 0.01 trace_next = trace_increment sent, total = 0, float(count_prefixes(digits)) for prefix in generate_prefixes(digits): sent += 1 prefix_queue.put(prefix) # Display the progression ratio = float(sent) / total if ratio >= trace_next: self.log.info("... %d%%", int(ratio * 100.0)) trace_next += trace_increment self.log.debug("Send the termination marker") prefix_queue.join() self.log.info("All the workers are done")
class EcChunkWriter(object): """ Writes an EC chunk """ def __init__(self, chunk, conn, write_timeout=None, chunk_checksum_algo='md5', **_kwargs): self._chunk = chunk self._conn = conn self.failed = False self.bytes_transferred = 0 if chunk_checksum_algo: self.checksum = hashlib.new(chunk_checksum_algo) else: self.checksum = None self.write_timeout = write_timeout or io.CHUNK_TIMEOUT # we use eventlet Queue to pass data to the send coroutine self.queue = Queue(io.PUT_QUEUE_DEPTH) @property def chunk(self): return self._chunk @property def conn(self): return self._conn @classmethod def connect(cls, chunk, sysmeta, reqid=None, connection_timeout=None, write_timeout=None, **kwargs): raw_url = chunk.get("real_url", chunk["url"]) parsed = urlparse(raw_url) chunk_path = parsed.path.split('/')[-1] hdrs = headers_from_object_metadata(sysmeta) if reqid: hdrs['X-oio-req-id'] = reqid hdrs[CHUNK_HEADERS["chunk_pos"]] = chunk["pos"] hdrs[CHUNK_HEADERS["chunk_id"]] = chunk_path # in the trailer # metachunk_size & metachunk_hash trailers = (CHUNK_HEADERS["metachunk_size"], CHUNK_HEADERS["metachunk_hash"]) if kwargs.get('chunk_checksum_algo'): trailers = trailers + (CHUNK_HEADERS["chunk_hash"], ) hdrs["Trailer"] = ', '.join(trailers) with green.ConnectionTimeout( connection_timeout or io.CONNECTION_TIMEOUT): conn = io.http_connect( parsed.netloc, 'PUT', parsed.path, hdrs) conn.chunk = chunk return cls(chunk, conn, write_timeout=write_timeout, **kwargs) def start(self, pool): """Spawn the send coroutine""" pool.spawn(self._send) def _send(self): """Send coroutine loop""" while True: # fetch input data from the queue data = self.queue.get() # use HTTP transfer encoding chunked # to write data to RAWX if not self.failed: # format the chunk to_send = "%x\r\n%s\r\n" % (len(data), data) try: with green.ChunkWriteTimeout(self.write_timeout): self.conn.send(to_send) self.bytes_transferred += len(data) except (Exception, green.ChunkWriteTimeout) as exc: self.failed = True msg = str(exc) logger.warn("Failed to write to %s (%s)", self.chunk, msg) self.chunk['error'] = 'write: %s' % msg self.queue.task_done() def wait(self): """ Wait until all data in the queue has been processed by the send coroutine """ if self.queue.unfinished_tasks: self.queue.join() def send(self, data): # do not send empty data because # this will end the chunked body if not data: return # put the data to send into the queue # it will be processed by the send coroutine self.queue.put(data) def finish(self, metachunk_size, metachunk_hash): """Send metachunk_size and metachunk_hash as trailers""" parts = [ '0\r\n', '%s: %s\r\n' % (CHUNK_HEADERS['metachunk_size'], metachunk_size), '%s: %s\r\n' % (CHUNK_HEADERS['metachunk_hash'], metachunk_hash) ] if self.checksum: parts.append('%s: %s\r\n' % (CHUNK_HEADERS['chunk_hash'], self.checksum.hexdigest())) parts.append('\r\n') to_send = "".join(parts) self.conn.send(to_send) def getresponse(self): """Read the HTTP response from the connection""" # As the server may buffer data before writing it to non-volatile # storage, we don't know if we have to wait while sending data or # while reading response, thus we apply the same timeout to both. with Timeout(self.write_timeout): return self.conn.getresponse()
class EcChunkWriter(object): """ Writes an EC chunk """ def __init__(self, chunk, conn, write_timeout=None, chunk_checksum_algo='md5', perfdata=None, **kwargs): self._chunk = chunk self._conn = conn self.failed = False self.bytes_transferred = 0 if chunk_checksum_algo: self.checksum = hashlib.new(chunk_checksum_algo) else: self.checksum = None self.write_timeout = write_timeout or io.CHUNK_TIMEOUT # we use eventlet Queue to pass data to the send coroutine self.queue = Queue(io.PUT_QUEUE_DEPTH) self.reqid = kwargs.get('reqid') self.perfdata = perfdata self.logger = kwargs.get('logger', LOGGER) @property def chunk(self): return self._chunk @property def conn(self): return self._conn @classmethod def connect(cls, chunk, sysmeta, reqid=None, connection_timeout=None, write_timeout=None, **kwargs): raw_url = chunk.get("real_url", chunk["url"]) parsed = urlparse(raw_url) chunk_path = parsed.path.split('/')[-1] hdrs = headers_from_object_metadata(sysmeta) if reqid: hdrs[REQID_HEADER] = reqid hdrs[CHUNK_HEADERS["chunk_pos"]] = chunk["pos"] hdrs[CHUNK_HEADERS["chunk_id"]] = chunk_path # in the trailer # metachunk_size & metachunk_hash trailers = (CHUNK_HEADERS["metachunk_size"], CHUNK_HEADERS["metachunk_hash"]) if kwargs.get('chunk_checksum_algo'): trailers = trailers + (CHUNK_HEADERS["chunk_hash"], ) hdrs["Trailer"] = ', '.join(trailers) with ConnectionTimeout( connection_timeout or io.CONNECTION_TIMEOUT): perfdata = kwargs.get('perfdata', None) if perfdata is not None: connect_start = monotonic_time() conn = io.http_connect( parsed.netloc, 'PUT', parsed.path, hdrs, scheme=parsed.scheme) conn.set_cork(True) if perfdata is not None: connect_end = monotonic_time() perfdata_rawx = perfdata.setdefault('rawx', dict()) perfdata_rawx['connect.' + chunk['url']] = \ connect_end - connect_start conn.chunk = chunk return cls(chunk, conn, write_timeout=write_timeout, reqid=reqid, **kwargs) def start(self, pool): """Spawn the send coroutine""" pool.spawn(self._send) def _send(self): """Send coroutine loop""" self.conn.upload_start = None while not self.failed: # fetch input data from the queue data = self.queue.get() # use HTTP transfer encoding chunked # to write data to RAWX try: with ChunkWriteTimeout(self.write_timeout): if self.perfdata is not None \ and self.conn.upload_start is None: self.conn.upload_start = monotonic_time() self.conn.send(b"%x\r\n" % len(data)) self.conn.send(data) self.conn.send(b"\r\n") self.bytes_transferred += len(data) eventlet_yield() except (Exception, ChunkWriteTimeout) as exc: self.failed = True msg = text_type(exc) self.logger.warn("Failed to write to %s (%s, reqid=%s)", self.chunk, msg, self.reqid) self.chunk['error'] = 'write: %s' % msg # Indicate that the data is completely sent self.queue.task_done() # Drain the queue before quitting while True: try: self.queue.get_nowait() self.queue.task_done() except Empty: break def wait(self): """ Wait until all data in the queue has been processed by the send coroutine """ self.logger.debug("Waiting for %s to receive data", self.chunk['url']) # Wait until the data is completely sent to continue self.queue.join() def send(self, data): # do not send empty data because # this will end the chunked body if not data: return # put the data to send into the queue # it will be processed by the send coroutine self.queue.put(data) def finish(self, metachunk_size, metachunk_hash): """ Send metachunk_size and metachunk_hash as trailers. :returns: the chunk object if the upload has failed, else None """ self.wait() if self.failed: self.logger.debug("NOT sending end marker and trailers to %s, " "because upload has failed", self.chunk['url']) return self.chunk self.logger.debug("Sending end marker and trailers to %s", self.chunk['url']) parts = [ '0\r\n', '%s: %s\r\n' % (CHUNK_HEADERS['metachunk_size'], metachunk_size), '%s: %s\r\n' % (CHUNK_HEADERS['metachunk_hash'], metachunk_hash) ] if self.checksum: parts.append('%s: %s\r\n' % (CHUNK_HEADERS['chunk_hash'], self.checksum.hexdigest())) parts.append('\r\n') to_send = ''.join(parts).encode('utf-8') if self.perfdata is not None: fin_start = monotonic_time() try: with ChunkWriteTimeout(self.write_timeout): self.conn.send(to_send) # Last segment sent, disable TCP_CORK to flush buffers self.conn.set_cork(False) except (Exception, ChunkWriteTimeout) as exc: self.failed = True msg = text_type(exc) self.logger.warn("Failed to finish %s (%s, reqid=%s)", self.chunk, msg, self.reqid) self.chunk['error'] = 'finish: %s' % msg return self.chunk finally: if self.perfdata is not None: fin_end = monotonic_time() rawx_perfdata = self.perfdata.setdefault('rawx', dict()) chunk_url = self.conn.chunk['url'] rawx_perfdata['upload_finish.' + chunk_url] = \ fin_end - fin_start return None def getresponse(self): """Read the HTTP response from the connection""" try: # As the server may buffer data before writing it to non-volatile # storage, we don't know if we have to wait while sending data or # while reading response, thus we apply the same timeout to both. with ChunkWriteTimeout(self.write_timeout): resp = self.conn.getresponse() return resp finally: if self.perfdata is not None: perfdata_rawx = self.perfdata.setdefault('rawx', dict()) chunk_url = self.conn.chunk['url'] upload_end = monotonic_time() perfdata_rawx['upload.' + chunk_url] = \ upload_end - self.conn.upload_start