def quorum_or_fail(self, successes, failures): """ Compare the number of uploads against the quorum. :param successes: a list of chunk objects whose upload succeded :type successes: `list` or `tuple` :param failures: a list of chunk objects whose upload failed :type failures: `list` or `tuple` :raises `exc.SourceReadError`: if there is an error while reading data from the client :raises `exc.SourceReadTimeout`: if there is a timeout while reading data from the client :raises `exc.OioTimeout`: if there is a timeout among the errors :raises `exc.OioException`: if quorum has not been reached for any other reason """ if len(successes) < self.quorum: errors = group_chunk_errors( ((chunk["url"], chunk.get("error", "success")) for chunk in successes + failures)) new_exc = exc.OioException( "RAWX write failure, quorum not reached (%d/%d): %s" % (len(successes), self.quorum, errors)) for err in [x.get('error') for x in failures]: if isinstance(err, exc.SourceReadError): raise exc.SourceReadError(new_exc) elif isinstance(err, green.SourceReadTimeout): # Never raise 'green' timeouts out of our API raise exc.SourceReadTimeout(new_exc) elif isinstance(err, (exc.OioTimeout, green.OioTimeout)): raise exc.OioTimeout(new_exc) raise new_exc
def _stream(self, source, size, writers): bytes_transferred = 0 # create EC encoding generator ec_stream = ec_encode(self.storage_method, len(self.meta_chunk)) # init generator ec_stream.send(None) def send(data): self.checksum.update(data) self.global_checksum.update(data) # get the encoded fragments fragments = ec_stream.send(data) if fragments is None: # not enough data given return current_writers = list(writers) failed_chunks = list() for writer in current_writers: fragment = fragments[chunk_index[writer]] if not writer.failed: if writer.checksum: writer.checksum.update(fragment) writer.send(fragment) else: current_writers.remove(writer) failed_chunks.append(writer.chunk) self.quorum_or_fail([w.chunk for w in current_writers], failed_chunks) try: # we use eventlet GreenPool to manage writers with green.ContextPool(len(writers)) as pool: # convenient index to figure out which writer # handles the resulting fragments chunk_index = self._build_index(writers) # init writers in pool for writer in writers: writer.start(pool) def read(read_size): with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as exc: raise SourceReadError(str(exc)) return data # the main write loop if size: while True: remaining_bytes = size - bytes_transferred if io.WRITE_CHUNK_SIZE < remaining_bytes: read_size = io.WRITE_CHUNK_SIZE else: read_size = remaining_bytes data = read(read_size) bytes_transferred += len(data) if len(data) == 0: break send(data) else: while True: data = read(io.WRITE_CHUNK_SIZE) bytes_transferred += len(data) if len(data) == 0: break send(data) # flush out buffered data send('') # wait for all data to be processed for writer in writers: writer.wait() # trailer headers # metachunk size # metachunk hash metachunk_size = bytes_transferred metachunk_hash = self.checksum.hexdigest() for writer in writers: writer.finish(metachunk_size, metachunk_hash) return bytes_transferred except green.SourceReadTimeout as exc: logger.warn('%s', exc) raise exceptions.SourceReadTimeout(exc) except SourceReadError as exc: logger.warn('Source read error: %s', exc) raise except Timeout as to: logger.error('Timeout writing data: %s', to) raise exceptions.OioTimeout(to) except Exception: logger.exception('Exception writing data') raise
def _stream(self, source, size, writers): bytes_transferred = 0 # create EC encoding generator ec_stream = ec_encode(self.storage_method, len(self.meta_chunk)) # init generator ec_stream.send(None) try: # we use eventlet GreenPool to manage writers with ContextPool(len(writers) * 2) as pool: # init writers in pool for writer in writers: writer.start(pool) def read(read_size): with SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as exc: raise SourceReadError(str(exc)) return data # the main write loop # Maintain a list of writers which continue writing # TODO(FVE): use an instance variable # to maintain the list of writers curr_writers = writers if size: while True: buffer_size = self.buffer_size() remaining_bytes = size - bytes_transferred if buffer_size < remaining_bytes: read_size = buffer_size else: read_size = remaining_bytes data = read(read_size) bytes_transferred += len(data) if len(data) == 0: break curr_writers = self.encode_and_send( ec_stream, data, curr_writers) else: while True: data = read(self.buffer_size()) bytes_transferred += len(data) if len(data) == 0: break curr_writers = self.encode_and_send( ec_stream, data, curr_writers) # flush out buffered data self.encode_and_send(ec_stream, '', curr_writers) # trailer headers # metachunk size # metachunk hash metachunk_size = bytes_transferred metachunk_hash = self.checksum.hexdigest() finish_pile = GreenPile(pool) for writer in writers: finish_pile.spawn(writer.finish, metachunk_size, metachunk_hash) for just_failed in finish_pile: # Avoid reporting problems twice if just_failed and not any(x['url'] == just_failed['url'] for x in self.failed_chunks): self.failed_chunks.append(just_failed) return bytes_transferred except SourceReadTimeout as exc: self.logger.warn('%s (reqid=%s)', exc, self.reqid) raise exceptions.SourceReadTimeout(exc) except SourceReadError as exc: self.logger.warn('Source read error (reqid=%s): %s', self.reqid, exc) raise except Timeout as to: self.logger.warn('Timeout writing data (reqid=%s): %s', self.reqid, to) # Not the same class as the globally imported OioTimeout class raise exceptions.OioTimeout(to) except Exception: self.logger.exception('Exception writing data (reqid=%s)', self.reqid) raise
def stream(self, source, size=None): bytes_transferred = 0 meta_chunk = self.meta_chunk if self.chunk_checksum_algo: meta_checksum = hashlib.new(self.chunk_checksum_algo) else: meta_checksum = None pile = GreenPile(len(meta_chunk)) failed_chunks = [] current_conns = [] for chunk in meta_chunk: pile.spawn(self._connect_put, chunk) for conn, chunk in [d for d in pile]: if not conn: failed_chunks.append(chunk) else: current_conns.append(conn) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) bytes_transferred = 0 try: with green.ContextPool(len(meta_chunk)) as pool: for conn in current_conns: conn.failed = False conn.queue = Queue(io.PUT_QUEUE_DEPTH) pool.spawn(self._send_data, conn) while True: if size is not None: remaining_bytes = size - bytes_transferred if io.WRITE_CHUNK_SIZE < remaining_bytes: read_size = io.WRITE_CHUNK_SIZE else: read_size = remaining_bytes else: read_size = io.WRITE_CHUNK_SIZE with green.SourceReadTimeout(self.read_timeout): try: data = source.read(read_size) except (ValueError, IOError) as e: raise SourceReadError(str(e)) if len(data) == 0: for conn in current_conns: if not conn.failed: conn.queue.put('0\r\n\r\n') break self.checksum.update(data) if meta_checksum: meta_checksum.update(data) bytes_transferred += len(data) # copy current_conns to be able to remove a failed conn for conn in current_conns[:]: if not conn.failed: conn.queue.put('%x\r\n%s\r\n' % (len(data), data)) else: current_conns.remove(conn) failed_chunks.append(conn.chunk) self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks) for conn in current_conns: if conn.queue.unfinished_tasks: conn.queue.join() except green.SourceReadTimeout as err: logger.warn('Source read timeout (reqid=%s): %s', self.reqid, err) raise exc.SourceReadTimeout(err) except SourceReadError as err: logger.warn('Source read error (reqid=%s): %s', self.reqid, err) raise except Timeout as to: logger.error('Timeout writing data (reqid=%s): %s', self.reqid, to) raise exc.OioTimeout(to) except Exception: logger.exception('Exception writing data (reqid=%s)', self.reqid) raise success_chunks = [] for conn in current_conns: if conn.failed: failed_chunks.append(conn.chunk) continue pile.spawn(self._get_response, conn) for (conn, resp) in pile: if resp: self._handle_resp( conn, resp, meta_checksum.hexdigest() if meta_checksum else None, success_chunks, failed_chunks) self.quorum_or_fail(success_chunks, failed_chunks) for chunk in success_chunks: chunk["size"] = bytes_transferred return bytes_transferred, success_chunks[0]['hash'], success_chunks