def test_compress_level(compression_method): from cloudfiles import CloudFiles, exceptions filepath = "/tmp/cloudfiles/compress_level" url = "file://" + filepath content = b'some_string' * 1000 compress_levels = range(1, 9, 2) for compress_level in compress_levels: cf = CloudFiles(url, num_threads=5) cf.put('info', content, compress=compression_method, compression_level=compress_level) retrieved = cf.get('info') assert content == retrieved conn = cf._get_connection() _, encoding, server_md5, hash_type = conn.get_file("info") assert encoding == compression_method assert hash_type in ('md5', None) assert cf.get('nonexistentfile') is None rmtree(filepath)
def test_exceptions_raised(green): from cloudfiles import CloudFiles, exceptions from cloudfiles.lib import mkdir path = compute_url("file", "exceptions_raised") cf = CloudFiles(path, green=green) pth = mkdir(path.replace("file://", "")) with open(f"{pth}/wontdecompress.gz", "wb") as f: f.write(b"not a valid gzip stream") try: x = cf.get("wontdecompress") assert False except exceptions.DecompressionError: pass try: x = cf.get(["wontdecompress"], raise_errors=True) assert False except exceptions.DecompressionError: pass try: x = cf.get(["wontdecompress"], return_dict=True) assert False except exceptions.DecompressionError: pass cf.delete("wontdecompress")
def test_delete(s3, green, protocol): from cloudfiles import CloudFiles, exceptions if protocol == 'file': url = "file:///tmp/cloudfiles/delete" else: url = "{}://cloudfiles/delete".format(protocol) cf = CloudFiles(url, green=green, num_threads=1) content = b'some_string' cf.put('delete-test', content, compress=None, cache_control='no-cache') cf.put('delete-test-compressed', content, compress='gzip', cache_control='no-cache') assert cf.get('delete-test') == content cf.delete('delete-test') assert cf.get('delete-test') is None assert cf.get('delete-test-compressed') == content cf.delete('delete-test-compressed') assert cf.get('delete-test-compressed') is None # Reset for batch delete cf.put('delete-test', content, compress=None, cache_control='no-cache') cf.put('delete-test-compressed', content, compress='gzip', cache_control='no-cache') assert cf.get('delete-test') == content assert cf.get('delete-test-compressed') == content cf.delete(['delete-test', 'delete-nonexistent', 'delete-test-compressed']) assert cf.get('delete-test') is None assert cf.get('delete-test-compressed') is None
def load_images(p: str, extension: str = "tif") -> dict: """Assume directory contains only the images to be stored""" files = CloudFiles(p) names = [] for f in sorted(files.list()): if extension in f: names.append(f) files.get(names, raw=True) files_bytes = [files[k] for k in names] imgs = [] for f in files_bytes: imgs.append(_load_image(f)) return {"seg": np.asarray(imgs).transpose(2, 1, 0)}
def MultiResUnshardedMeshMergeTask( cloudpath:str, prefix:str, cache_control:bool = False, draco_compression_level:int = 1, mesh_dir:Optional[str] = None, num_lod:int = 1, progress:bool = False, ): cv = CloudVolume(cloudpath) if mesh_dir is None and 'mesh' in cv.info: mesh_dir = cv.info['mesh'] files_per_label = get_mesh_filenames_subset( cloudpath, mesh_dir, prefix ) cf = CloudFiles(cv.meta.join(cloudpath, mesh_dir)) for label, filenames in tqdm(files_per_label.items(), disable=(not progress)): files = cf.get(filenames) # we should handle draco as well files = [ Mesh.from_precomputed(f["content"]) for f in files ] (manifest, mesh) = process_mesh( cv, label, files, num_lod, draco_compression_level ) cf.put(f"{label}.index", manifest.to_binary(), cache_control="no-cache") cf.put(f"{label}", mesh, cache_control="no-cache")
def get_manifest(self, segid, progress=None): """Retrieve the manifest for one or more segments.""" segid, multiple_return = toiter(segid, is_iter=True) progress = progress if progress is not None else self.config.progress cloudpath = self.meta.join(self.meta.cloudpath, self.path) cf = CloudFiles(cloudpath, progress=progress) results = cf.get((f"{sid}.index" for sid in segid), total=len(segid)) if not multiple_return: if not results: return None binary = results[0]["content"] if binary is None: return None return MultiLevelPrecomputedMeshManifest.from_binary( binary, segment_id=first(segid), shard_offset=0) regexp = re.compile(r'(\d+)\.index$') manifests = [] for res in results: key = res["path"] sid = int(re.match(regexp, key).groups()[0]) binary = res["content"] if binary is None: manifests.append(None) manifest = MultiLevelPrecomputedMeshManifest.from_binary( binary, segment_id=sid, shard_offset=0) manifests.append(manifest) return manifests
def fetch_z_levels(self, bounds): cf = CloudFiles(self.levels_path) levelfilenames = [ cf.join('levels', f"{self.mip}", f"{z}") for z in range(bounds.minpt.z, bounds.maxpt.z) ] levels = cf.get(levelfilenames) errors = [ level['path'] \ for level in levels if level['content'] == None ] if len(errors): raise Exception(", ".join( errors) + " were not defined. Did you run a LuminanceLevelsTask for these slices?") levels = [( int(os.path.basename(item['path'])), json.loads(item['content'].decode('utf-8')) ) for item in levels ] levels.sort(key=lambda x: x[0]) levels = [x[1] for x in levels] return [ np.array(x['levels'], dtype=np.uint64) for x in levels ]
def test_access_non_cannonical_minimal_path(s3, protocol): from cloudfiles import CloudFiles, exceptions if protocol == 'file': url = "file:///tmp/" else: url = "{}://cloudfiles/".format(protocol) cf = CloudFiles(url, num_threads=5) content = b'some_string' cf.put('info', content, compress=None) # time.sleep(0.5) # sometimes it takes a moment for google to update the list assert cf.get('info') == content assert cf.get('nonexistentfile') is None cf.delete('info')
def get_meshes_on_bypass(self, segids, allow_missing=False): """ Attempt to fetch a mesh directly from storage without going through the chunk graph server. This capability should only be used in special circumstances. """ segids = toiter(segids) dynamic_cloudpath = self.meta.join(self.meta.meta.cloudpath, self.dynamic_path()) filenames = [self.compute_filename(segid) for segid in segids] cf = CloudFiles(dynamic_cloudpath, progress=self.config.progress, green=self.config.green, secrets=self.config.secrets) raw_binaries = cf.get(filenames) # extract the label ID from the mesh manifest. # e.g. 387463568301300850:0:24576-25088_17920-18432_2048-3072 label_regexp = re.compile(r'(\d+):\d:[\d_-]+$') output = {} remaining = [] for res in raw_binaries: if res['error']: raise res['error'] (label, ) = re.search(label_regexp, res['path']).groups() label = int(label) if res['content'] is None: remaining.append(label) else: output[label] = res['content'] layers = defaultdict(list) for segid in remaining: layer_id = self.meta.meta.decode_layer_id(segid) layers[layer_id].append(segid) for layer_id, labels in layers.items(): subdirectory = self.meta.join(self.meta.mesh_path, 'initial', str(layer_id)) initial_output = self.readers[layer_id].get_data( labels, path=subdirectory, progress=self.config.progress) for label, raw_binary in initial_output.items(): if raw_binary is None: if allow_missing: continue else: raise IndexError( 'No mesh found for segment {}'.format(label)) else: output[label] = raw_binary return { label: Mesh.from_draco(raw_binary, segid=label) for label, raw_binary in output.items() }
def get(self, cloudpaths, progress=None): progress = self.config.progress if progress is None else progress cf = CloudFiles('file://' + self.path, progress=progress) results = cf.get(list(cloudpaths)) return {res['path']: res['content'] for res in results}
def download(self, paths, compress=None, progress=None): """ Download the provided paths, but grab them from cache first if they are present and the cache is enabled. Returns: { filename: content, ... } """ if len(paths) == 0: return {} progress = nvl(progress, self.config.progress) compress = nvl(compress, self.compress, self.config.compress) locs = self.compute_data_locations(paths) locs['remote'] = [str(x) for x in locs['remote']] fragments = {} if self.enabled: fragments = self.get(locs['local'], progress=progress) # fixes e.g. mesh\info -> mesh/info on Windows if self.meta.path.protocol != 'file' and os.path.sep == '\\': fragments = { "/".join(key.split('\\')): val for key, val in fragments.items() } cf = CloudFiles( self.meta.cloudpath, progress=progress, secrets=self.config.secrets, parallel=self.config.parallel, ) remote_fragments = cf.get(locs['remote'], raw=True) for frag in remote_fragments: if frag['error'] is not None: raise frag['error'] if self.enabled: cf_cache = CloudFiles('file://' + self.path, progress=('to Cache' if progress else None)) cf_cache.puts(compression.transcode( (frag for frag in remote_fragments if frag['content'] is not None), encoding=compress, progress=progress, in_place=False), compress=compress, raw=True) remote_fragments_dict = {} while remote_fragments: res = remote_fragments.pop() remote_fragments_dict[res['path']] = compression.decompress( res['content'], res['compress']) fragments.update(remote_fragments_dict) return fragments
def _fetch(self) -> None: from json import loads from cloudfiles import CloudFiles raw_meta = {} cf = CloudFiles(self.path) fnames = ["params.json", "metadata.json"] cf.get(fnames, raw=True) for f in fnames: try: raw_meta = loads(cf[f]) break except: pass if not raw_meta: raise ValueError("Could not load meta, cannot proceed.") self._parse(raw_meta)
def test_http_read_brotli_image(): from cloudfiles import CloudFiles, exceptions cf = CloudFiles('https://open-neurodata.s3.amazonaws.com/kharris15/apical/em') imgbytes = cf.get("2_2_50/4096-4608_4096-4608_112-128") assert len(imgbytes) == 4194304 expected = b'v\\BAT[]\\TVcsxshj{\x84vjo\x7f}oqyz\x89\x92\x91\x98\x81\x99\xb2\xb2\xb1\xa9\x9d\xa3\xb4\xb8' assert imgbytes[:len(expected)] == expected
def get_skeletons(self, folder): skeleton_filenames = [str(skeleton_id) for skeleton_id in self.skeleton_ids] cf = CloudFiles(folder) skeleton_files = cf.get(skeleton_filenames) skeletons = {} for skeleton_file in skeleton_files: skeleton_id_str = skeleton_file["path"] skeleton = Skeleton.from_precomputed(skeleton_file["content"]) skeletons[skeleton_id_str] = skeleton return skeletons
def test_compression(s3, protocol, method, green): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "compress") cf = CloudFiles(url, num_threads=5, green=green) content = b'some_string' cf.put('info', content, compress=method) retrieved = cf.get('info') assert content == retrieved assert cf.get('nonexistentfile') is None try: cf.put('info', content, compress='nonexistent') assert False except ValueError: pass cf.delete(iter(cf))
def download(self, bbox, mip, parallel=1, renumber=False): if parallel != 1: raise ValueError("Only parallel=1 is supported for n5.") elif renumber != False: raise ValueError("Only renumber=False is supported for n5.") bounds = Bbox.clamp(bbox, self.meta.bounds(mip)) if self.autocrop: image, bounds = autocropfn(self.meta, image, bounds, mip) if bounds.subvoxel(): raise exceptions.EmptyRequestException( f'Requested less than one pixel of volume. {bounds}') cf = CloudFiles(self.meta.cloudpath, progress=self.config.progress) realized_bbox = bbox.expand_to_chunk_size(self.meta.chunk_size(mip)) grid_bbox = realized_bbox // self.meta.chunk_size(mip) urls = [ cf.join(f"s{mip}", str(x), str(y), str(z)) for x, y, z in xyzrange(grid_bbox.minpt, grid_bbox.maxpt) ] all_chunks = cf.get(urls, parallel=parallel, return_dict=True) shape = list(bbox.size3()) + [self.meta.num_channels] renderbuffer = np.zeros(shape=shape, dtype=self.meta.dtype, order='F') sep = '/' if cf._path.protocol == "file": sep = os.path.sep if sep == '\\': sep = '\\\\' # compensate for regexp escaping regexp = re.compile( rf"s(?P<mip>\d+){sep}(?P<x>\d+){sep}(?P<y>\d+){sep}(?P<z>\d+)") for fname, binary in all_chunks.items(): m = re.search(regexp, fname).groupdict() assert mip == int(m["mip"]) gridpoint = Vec(*[int(i) for i in [m["x"], m["y"], m["z"]]]) chunk_bbox = Bbox(gridpoint, gridpoint + 1) * self.meta.chunk_size(mip) chunk_bbox = Bbox.clamp(chunk_bbox, self.meta.bounds(mip)) default_shape = list(chunk_bbox.size3()) + [self.meta.num_channels] chunk, chunk_shape = self.parse_chunk(binary, mip, fname, default_shape) chunk_bbox = Bbox(chunk_bbox.minpt, chunk_bbox.minpt + Vec(*chunk_shape[:3])) chunk_bbox = Bbox.clamp(chunk_bbox, self.meta.bounds(mip)) shade(renderbuffer, bbox, chunk, chunk_bbox) return VolumeCutout.from_volume(self.meta, mip, renderbuffer, bbox)
class AggregateSkeletonFragmentsOperator(OperatorBase): """Merge skeleton fragments for Neuroglancer visualization.""" def __init__(self, fragments_path: str, output_path: str, name: str = 'aggregate-skeleton-fragments'): """ Parameters ------------ fragments_path: path to store fragment files output_path: save the merged skeleton file here. """ super().__init__(name=name) self.fragments_storage = CloudFiles(fragments_path) self.output_storage = CloudFiles(output_path) def __call__(self, prefix: str): logging.info(f'aggregate skeletons with prefix of {prefix}') id2filenames = defaultdict(list) for filename in self.fragments_storage.list_files(prefix=prefix): filename = os.path.basename(filename) # `match` implies the beginning (^). `search` matches whole string matches = re.search(r'(\d+):', filename) if not matches: continue # skeleton ID skl_id = int(matches.group(0)[:-1]) id2filenames[skl_id].append(filename) for skl_id, filenames in id2filenames.items(): logging.info(f'skeleton id: {skl_id}') frags = self.fragments_storage.get(filenames) frags = [ PrecomputedSkeleton.from_precomputed(x['content']) for x in frags ] skel = PrecomputedSkeleton.simple_merge(frags).consolidate() skel = kimimaro.postprocess(skel, dust_threshold=1000, tick_threshold=3500) self.output_storage.put( file_path=str(skl_id), content=skel.to_precomputed(), ) # the last few hundred files will not be uploaded without sleeping! sleep(0.01)
def test_read_write(s3, protocol, num_threads, green): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "rw") cf = CloudFiles(url, num_threads=num_threads, green=green) content = b'some_string' cf.put('info', content, compress=None, cache_control='no-cache') cf['info2'] = content assert cf.get('info') == content assert cf['info2'] == content assert cf['info2', 0:3] == content[0:3] assert cf['info2', :] == content[:] assert cf.get('nonexistentfile') is None del cf['info2'] assert cf.exists('info2') == False num_infos = max(num_threads, 1) results = cf.get(['info' for i in range(num_infos)]) assert len(results) == num_infos assert results[0]['path'] == 'info' assert results[0]['content'] == content assert all(map(lambda x: x['error'] is None, results)) assert cf.get(['nonexistentfile'])[0]['content'] is None cf.delete('info') cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache') results = cf.get_json('info') assert results == {'omg': 'wow'} cf.delete('info') if protocol == 'file': rmtree(url)
def execute(self): corgie_logger.info( f"Generate new skeleton vertices task for id {self.skeleton_id_str}" ) skeleton = get_skeleton(self.src_path, self.skeleton_id_str) if self.vertex_sort: vertex_sort = skeleton.vertices[:, 2].argsort() else: vertex_sort = np.arange(0, len(skeleton.vertices)) number_vertices = len(skeleton.vertices) index_points = list(range(0, number_vertices, self.task_vertex_size)) cf = CloudFiles(f"{self.dst_path}") array_filenames = [] for i in range(len(index_points)): start_index = index_points[i] if i + 1 == len(index_points): end_index = number_vertices else: end_index = index_points[i + 1] array_filenames.append( f"intermediary_arrays/{self.skeleton_id_str}:{start_index}-{end_index}" ) array_files = cf.get(array_filenames) # Dict to make sure arrays are concatenated in correct order array_dict = {} for array_file in array_files: array_dict[array_file["path"]] = pickle.loads( array_file["content"]) array_arrays = [] for array_filename in array_filenames: array_arrays.append(array_dict[array_filename]) array_arrays = np.concatenate(array_arrays) # Restore the correct order of the vertices restore_sort = vertex_sort.argsort() new_vertices = array_arrays[restore_sort] new_skeleton = Skeleton( vertices=new_vertices, edges=skeleton.edges, radii=skeleton.radius, vertex_types=skeleton.vertex_types, space=skeleton.space, transform=skeleton.transform, ) cf.put( path=self.skeleton_id_str, content=new_skeleton.to_precomputed(), compress="gzip", )
def download(self, paths, compress=None, progress=None): """ Download the provided paths, but grab them from cache first if they are present and the cache is enabled. Returns: { filename: content, ... } """ if len(paths) == 0: return {} progress = nvl(progress, self.config.progress) compress = nvl(compress, self.compress, self.config.compress) locs = self.compute_data_locations(paths) locs['remote'] = [str(x) for x in locs['remote']] fragments = {} if self.enabled: fragments = self.get(locs['local'], progress=progress) cf = CloudFiles(self.meta.cloudpath, progress=progress, secrets=self.config.secrets) remote_fragments = cf.get(locs['remote'], raw=True) for frag in remote_fragments: if frag['error'] is not None: raise frag['error'] if self.enabled: cf_cache = CloudFiles('file://' + self.path, progress=('to Cache' if progress else None)) cf_cache.puts(compression.transcode( (frag for frag in remote_fragments if frag['content'] is not None), encoding=compress, progress=progress, in_place=False), compress=compress, raw=True) remote_fragments = { res['path']: compression.decompress(res['content'], res['compress']) \ for res in remote_fragments } fragments.update(remote_fragments) return fragments
def get_skeletons_by_segid(self, filenames): cf = CloudFiles(self.cloudpath, progress=True) skels = cf.get(filenames) skeletons = defaultdict(list) for skel in skels: try: segid = filename_to_segid(skel['filename']) except ValueError: # Typically this is due to preexisting fully # formed skeletons e.g. skeletons_mip_3/1588494 continue skeletons[segid].append((Bbox.from_filename(skel['filename']), pickle.loads(skel['content']))) return skeletons
def cache(task, cloudpath): layer_path, filename = os.path.split(cloudpath) classname = task.__class__.__name__ lcldir = mkdir(os.path.join('/tmp/', classname)) lclpath = os.path.join(lcldir, filename) if os.path.exists(lclpath): with open(lclpath, 'rb') as f: filestr = f.read() else: cf = CloudFiles(layer_path) filestr = cf.get(filename) with open(lclpath, 'wb') as f: f.write(filestr) return filestr
def download(self, paths, compress=None, progress=None): """ Download the provided paths, but grab them from cache first if they are present and the cache is enabled. Returns: { filename: content, ... } """ if len(paths) == 0: return {} progress = progress if progress is not None else self.config.progress locs = self.compute_data_locations(paths) locs['remote'] = [str(x) for x in locs['remote']] fragments = {} if self.enabled: fragments = self.get(locs['local'], progress=progress) cf = CloudFiles(self.meta.cloudpath, progress=progress) remote_fragments = cf.get(locs['remote']) for frag in remote_fragments: if frag['error'] is not None: raise frag['error'] remote_fragments = { res['path']: res['content'] \ for res in remote_fragments } if self.enabled: self.put( [ (filename, content) for filename, content in remote_fragments.items() \ if content is not None ], compress=compress, progress=progress ) fragments.update(remote_fragments) return fragments
class MergeSkeletonTask(scheduling.Task): def __init__(self, dst_path, mip, dust_threshold, tick_threshold, prefix=""): super().__init__(self) self.dst_path = dst_path self.cf = CloudFiles(self.dst_path) self.mip = mip self.dust_threshold = dust_threshold self.tick_threshold = tick_threshold self.prefix = prefix def execute(self): corgie_logger.info(f"Merging skeletons at {self.dst_path}") fragment_filenames = self.cf.list(prefix=self.prefix, flat=True) skeleton_files = self.cf.get(fragment_filenames) skeletons = defaultdict(list) for skeleton_file in skeleton_files: try: colon_index = skeleton_file["path"].index(":") except ValueError: # File is full skeleton, not fragment continue seg_id = skeleton_file["path"][0:colon_index] skeleton_fragment = pickle.loads(skeleton_file["content"]) if not skeleton_fragment.empty(): skeletons[seg_id].append(skeleton_fragment) for seg_id, skeleton_fragments in skeletons.items(): skeleton = PrecomputedSkeleton.simple_merge( skeleton_fragments).consolidate() skeleton = kimimaro.postprocess(skeleton, self.dust_threshold, self.tick_threshold) skeleton.id = int(seg_id) self.cf.put(path=seg_id, content=skeleton.to_precomputed(), compress="gzip") corgie_logger.info(f"Finished skeleton {seg_id}")
def fetch_provenance(self): """ Refresh the current provenance file from primary storage (e.g. the cloud) without reference to cache. The cache will not be updated. Raises cloudvolume.exceptions.provenanceUnavailableError when the info file is unable to be retrieved. See also: refresh_provenance Returns: dict """ cf = CloudFiles(self.cloudpath) provfile = cf.get('provenance') if provfile: provfile = provfile.decode('utf-8') # The json5 decoder is *very* slow # so use the stricter but much faster json # decoder first, and try it only if it fails. try: provfile = json.loads(provfile) except json.decoder.JSONDecodeError: try: provfile = json5.loads(provfile) except ValueError: raise ValueError( red("""The provenance file could not be JSON decoded. Please reformat the provenance file before continuing. Contents: {}""".format(provfile))) else: provfile = { "sources": [], "owners": [], "processing": [], "description": "", } return self._cast_provenance(provfile)
def test_get_generator(num_threads, green): from cloudfiles import CloudFiles, exceptions path = '/tmp/cloudfiles/gen' rmtree(path) url = 'file://' + path cf = CloudFiles(url, num_threads=num_threads, green=green) gen = ( (str(i), b'hello world') for i in range(100) ) cf.puts(gen) files = cf.get(( str(i) for i in range(100) ), total=100) assert all([ f['error'] is None for f in files ]) assert len(files) == 100 assert all([ f['content'] == b'hello world' for f in files ]) fnames = [ str(i) for i in range(100) ] assert sorted(list(cf.list())) == sorted(fnames) cf.delete(( str(i) for i in range(100) )) assert list(cf.list()) == []
def get_skeleton(src_path, skeleton_id_str): cf = CloudFiles(src_path) return Skeleton.from_precomputed(cf.get(skeleton_id_str))
def _cp_single(ctx, source, destination, recursive, compression, progress, block_size): use_stdin = (source == '-') nsrc = normalize_path(source) ndest = normalize_path(destination) ctx.ensure_object(dict) parallel = int(ctx.obj.get("parallel", 1)) issrcdir = ispathdir(source) and use_stdin == False isdestdir = ispathdir(destination) srcpath = nsrc if issrcdir else os.path.dirname(nsrc) many, flat, prefix = get_mfp(nsrc, recursive) if issrcdir and not many: print(f"cloudfiles: {source} is a directory (not copied).") return xferpaths = os.path.basename(nsrc) if use_stdin: xferpaths = sys.stdin.readlines() xferpaths = [x.replace("\n", "") for x in xferpaths] prefix = os.path.commonprefix(xferpaths) xferpaths = [x.replace(prefix, "") for x in xferpaths] srcpath = cloudpathjoin(srcpath, prefix) elif many: xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix, flat=flat) destpath = ndest if isinstance(xferpaths, str): destpath = ndest if isdestdir else os.path.dirname(ndest) elif not isdestdir: if os.path.exists(ndest.replace("file://", "")): print(f"cloudfiles: {ndest} is not a directory (not copied).") return if compression == "same": compression = None elif compression == "none": compression = False if not isinstance(xferpaths, str): if parallel == 1: _cp(srcpath, destpath, compression, progress, block_size, xferpaths) return total = None try: total = len(xferpaths) except TypeError: pass fn = partial(_cp, srcpath, destpath, compression, False, block_size) with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar: with pathos.pools.ProcessPool(parallel) as executor: for _ in executor.imap(fn, sip(xferpaths, block_size)): pbar.update(block_size) else: cfsrc = CloudFiles(srcpath, green=True, progress=progress) if not cfsrc.exists(xferpaths): print( f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}" ) return downloaded = cfsrc.get(xferpaths, raw=True) if compression is not None: downloaded = transcode(downloaded, compression, in_place=True) cfdest = CloudFiles(destpath, green=True, progress=progress) if isdestdir: cfdest.put(os.path.basename(nsrc), downloaded, raw=True) else: cfdest.put(os.path.basename(ndest), downloaded, raw=True)
def _cp_single(ctx, source, destination, recursive, compression, progress, block_size): use_stdin = (source == '-') use_stdout = (destination == '-') if use_stdout: progress = False # can't have the progress bar interfering nsrc = normalize_path(source) ndest = normalize_path(destination) # For more information see: # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed # Try to follow cp rules. If the directory exists, # copy the base source directory into the dest directory # If the directory does not exist, then we copy into # the dest directory. # Both x* and x** should not copy the base directory if recursive and nsrc[-1] != "*": if CloudFiles(ndest).isdir(): if nsrc[-1] == '/': nsrc = nsrc[:-1] ndest = cloudpathjoin(ndest, os.path.basename(nsrc)) ctx.ensure_object(dict) parallel = int(ctx.obj.get("parallel", 1)) issrcdir = ispathdir(source) and use_stdin == False isdestdir = ispathdir(destination) srcpath = nsrc if issrcdir else os.path.dirname(nsrc) many, flat, prefix = get_mfp(nsrc, recursive) if issrcdir and not many: print(f"cloudfiles: {source} is a directory (not copied).") return xferpaths = os.path.basename(nsrc) if use_stdin: xferpaths = sys.stdin.readlines() xferpaths = [x.replace("\n", "") for x in xferpaths] prefix = os.path.commonprefix(xferpaths) xferpaths = [x.replace(prefix, "") for x in xferpaths] srcpath = cloudpathjoin(srcpath, prefix) elif many: xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix, flat=flat) destpath = ndest if isinstance(xferpaths, str): destpath = ndest if isdestdir else os.path.dirname(ndest) elif not isdestdir: if os.path.exists(ndest.replace("file://", "")): print(f"cloudfiles: {ndest} is not a directory (not copied).") return if compression == "same": compression = None elif compression == "none": compression = False if not isinstance(xferpaths, str): if parallel == 1: _cp(srcpath, destpath, compression, progress, block_size, xferpaths) return total = None try: total = len(xferpaths) except TypeError: pass if use_stdout: fn = partial(_cp_stdout, srcpath) else: fn = partial(_cp, srcpath, destpath, compression, False, block_size) with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar: with pathos.pools.ProcessPool(parallel) as executor: for _ in executor.imap(fn, sip(xferpaths, block_size)): pbar.update(block_size) else: cfsrc = CloudFiles(srcpath, green=True, progress=progress) if not cfsrc.exists(xferpaths): print( f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}" ) return if use_stdout: _cp_stdout(srcpath, xferpaths) return downloaded = cfsrc.get(xferpaths, raw=True) if compression is not None: downloaded = transcode(downloaded, compression, in_place=True) cfdest = CloudFiles(destpath, green=True, progress=progress) if isdestdir: cfdest.put(os.path.basename(nsrc), downloaded, raw=True) else: cfdest.put(os.path.basename(ndest), downloaded, raw=True)
def _cp_stdout(src, paths): paths = toiter(paths) cf = CloudFiles(src, green=True, progress=False) for res in cf.get(paths): content = res["content"].decode("utf8") sys.stdout.write(content)