def read_byte_stream( client, fs: AbstractFileSystem, stream: ByteStream, path: str, chunk_size: int = CHUNK_SIZE, ): logger.info('start reading blob at %s', path) with fs.open(path, mode="rb") as f: try: total_size = f.size() except TypeError: total_size = f.size writer = stream.open_writer(client) try: begin, end = 0, total_size while begin < end: buffer = read_block(f, begin, min(chunk_size, end - begin)) chunk = writer.next(len(buffer)) vineyard.memory_copy(chunk, 0, buffer) begin += len(buffer) except Exception: report_exception() writer.fail() sys.exit(-1) writer.finish() return total_size
def read_stream_collections( client, fs: AbstractFileSystem, queue: "ConcurrentQueue[Tuple[ByteStream, str]]", base_prefix: str, prefix: str, ): metadata_path = os.path.join(prefix, 'metadata.json') blob_path = os.path.join(prefix, 'blob') if fs.exists(metadata_path): metadata = read_metadata(fs, metadata_path) streams = [] for path in fs.listdir(prefix): if path['type'] == 'directory': streams.append( read_stream_collections( client, fs, queue, base_prefix, path['name'] ) ) stream_collection = StreamCollection.new(client, metadata, streams) return stream_collection.id else: # make a blob with fs.open(blob_path, 'rb') as f: try: total_size = f.size() except TypeError: total_size = f.size # create a stream stream = ByteStream.new( client, params={ StreamCollection.KEY_OF_PATH: os.path.relpath( blob_path, base_prefix ), 'length': total_size, }, ) queue.put((stream, blob_path)) return stream.id
def read_metadata(fs: AbstractFileSystem, path: str) -> Dict: logger.info('start reading metadata at %s', path) with fs.open(path, mode="rb") as f: return json.loads(f.read().decode('utf-8', errors='ignore'))