def test_windows_path_extraction(): from cloudfiles import paths extract = paths.extract(r'file://C:\wow\this\is\a\cool\path', windows=True) assert extract.format == 'precomputed' assert extract.protocol == 'file' assert extract.bucket is None assert extract.path == 'C:\\wow\\this\\is\\a\\cool\\path' assert extract.host is None extract = paths.extract('file://C:\\wow\\this\\is\\a\\cool\\path\\', windows=True) assert extract.format == 'precomputed' assert extract.protocol == 'file' assert extract.bucket is None assert extract.path == 'C:\\wow\\this\\is\\a\\cool\\path\\' assert extract.host is None extract = paths.extract( 'precomputed://https://storage.googleapis.com/neuroglancer-public-data/kasthuri2011/ground_truth', windows=True) assert extract.format == 'precomputed' assert extract.protocol == 'https' assert extract.bucket == None assert extract.path == 'neuroglancer-public-data/kasthuri2011/ground_truth' assert extract.host == 'https://storage.googleapis.com'
def okgoogle(url): path = paths.extract(url) assert path.protocol == 'gs', url assert path.bucket == 'bucket', url assert path.path in ('dataset/layer', 'dataset/layer/'), url assert path.host is None assert path.format == 'precomputed', url
def test_s3_custom_endpoint_path(): from cloudfiles import paths extract = paths.extract("precomputed://s3://https://s3-hpcrc.rc.princeton.edu/hello/world") assert extract.format == 'precomputed' assert extract.protocol == 's3' assert extract.bucket == 'hello' assert extract.path == 'world' assert extract.host == 'https://s3-hpcrc.rc.princeton.edu'
def test_to_https_protocol(): from cloudfiles.paths import extract, to_https_protocol, ExtractedPath pth = to_https_protocol("gs://my_bucket/to/heaven") assert pth == "https://storage.googleapis.com/my_bucket/to/heaven" pth = to_https_protocol("s3://my_bucket/to/heaven") assert pth == "https://s3.amazonaws.com/my_bucket/to/heaven" pth = to_https_protocol("matrix://my_bucket/to/heaven") assert pth == "https://s3-hpcrc.rc.princeton.edu/my_bucket/to/heaven" pth = to_https_protocol("file://my_bucket/to/heaven") assert pth == "file://my_bucket/to/heaven" pth = to_https_protocol("mem://my_bucket/to/heaven") assert pth == "mem://my_bucket/to/heaven" pth = ExtractedPath('precomputed', 'gs', 'my_bucket', 'to/heaven', None) pth = to_https_protocol(pth) assert pth == extract("https://storage.googleapis.com/my_bucket/to/heaven")
def ls(shortpath, flat, expr, cloudpath): """Recursively lists the contents of a directory.""" cloudpath = normalize_path(cloudpath) _, flt, prefix = get_mfp(cloudpath, True) epath = extract(cloudpath) if len(epath.path) > 0: if prefix == "" and flt == False: prefix = os.path.basename(cloudpath) cloudpath = os.path.dirname(cloudpath) flat = flat or flt cf = CloudFiles(cloudpath, green=True) iterables = [] if expr: # TODO: make this a reality using a parser # match "[abc]{2}" or "[123]" meaning generate a 2 character cartesian # product of a,b, and c or a 1 character cartesian product of 1,2,3 # e.g. aa, ab, ac, ba, bb, bc, ca, cb, cc # 1, 2, 3 matches = re.findall(r'\[([a-zA-Z0-9]+)\]', prefix) if len(matches): iterables.extend([ cf.list(prefix=pfx, flat=flat) for pfx in exprgen(prefix, matches) ]) else: iterables.append(cf.list(flat=flat)) else: iterables = [cf.list(prefix=prefix, flat=flat)] iterables = itertools.chain(*iterables) for pathset in sip(iterables, 1000): if not shortpath: pathset = [cloudpathjoin(cloudpath, pth) for pth in pathset] print("\n".join(pathset))
def shoulderror(url): try: pth = paths.extract(url) assert False, url except exceptions.UnsupportedProtocolError: pass
def test_path_extraction(): from cloudfiles import paths, exceptions, lib ExtractedPath = paths.ExtractedPath def shoulderror(url): try: pth = paths.extract(url) assert False, url except exceptions.UnsupportedProtocolError: pass def okgoogle(url): path = paths.extract(url) assert path.protocol == 'gs', url assert path.bucket == 'bucket', url assert path.path in ('dataset/layer', 'dataset/layer/'), url assert path.host is None assert path.format == 'precomputed', url okgoogle('gs://bucket/dataset/layer') shoulderror('s4://dataset/layer') shoulderror('dataset/layer') # don't error assert (paths.extract('graphene://http://localhost:8080/segmentation/1.0/testvol') == ExtractedPath( 'graphene', 'http', None, 'segmentation/1.0/testvol', 'http://localhost:8080')) assert (paths.extract('precomputed://gs://fafb-ffn1-1234567') == ExtractedPath( 'precomputed', 'gs', 'fafb-ffn1-1234567', '', None)) assert (paths.extract('precomputed://gs://fafb-ffn1-1234567/segmentation') == ExtractedPath( 'precomputed', 'gs', 'fafb-ffn1-1234567', 'segmentation', None)) firstdir = lambda x: '/' + x.split('/')[1] homepath = lib.toabs('~') homerintermediate = homepath.replace(firstdir(homepath), '')[1:] curpath = lib.toabs('.') curintermediate = curpath.replace(firstdir(curpath), '')[1:] match = re.match(r'((?:(?:\w:\\\\)|/).+?)\b', lib.toabs('.')) bucket, = match.groups() assert (paths.extract('s3://seunglab-test/intermediate/path/dataset/layer') == ExtractedPath( 'precomputed', 's3', 'seunglab-test', 'intermediate/path/dataset/layer', None )) assert (paths.extract('file:///tmp/dataset/layer') == ExtractedPath( 'precomputed', 'file', None, "/tmp/dataset/layer", None )) assert (paths.extract('file://seunglab-test/intermediate/path/dataset/layer') == ExtractedPath( 'precomputed', 'file', None, os.path.join(curpath, 'seunglab-test/intermediate/path/dataset/layer'), None )) assert (paths.extract('gs://seunglab-test/intermediate/path/dataset/layer') == ExtractedPath( 'precomputed', 'gs', 'seunglab-test', 'intermediate/path/dataset/layer', None )) assert (paths.extract('file://~/seunglab-test/intermediate/path/dataset/layer') == ExtractedPath( 'precomputed', 'file', None, os.path.join(homepath, 'seunglab-test/intermediate/path/dataset/layer'), None ) ) assert (paths.extract('file:///User/me/.cloudvolume/cache/gs/bucket/dataset/layer') == ExtractedPath( 'precomputed', 'file', None, '/User/me/.cloudvolume/cache/gs/bucket/dataset/layer', None )) shoulderror('ou3bouqjsa fkj aojsf oaojf ojsaf') okgoogle('gs://bucket/dataset/layer/') # shoulderror('gs://bucket/dataset/layer/info') path = paths.extract('s3://bucketxxxxxx/datasetzzzzz91h8__3/layer1br9bobasjf/') assert path.format == 'precomputed' assert path.protocol == 's3' assert path.bucket == 'bucketxxxxxx' assert path.path == 'datasetzzzzz91h8__3/layer1br9bobasjf/' assert path.host is None path = paths.extract('file:///bucket/dataset/layer/') assert path.format == 'precomputed' assert path.protocol == 'file' assert path.bucket is None assert path.path == '/bucket/dataset/layer' assert path.host is None shoulderror('lucifer://bucket/dataset/layer/') shoulderror('gs://///') path = paths.extract('file:///tmp/removeme/layer/') assert path.format == 'precomputed' assert path.protocol == 'file' assert path.bucket is None assert path.path == '/tmp/removeme/layer' assert path.host is None assert (paths.extract('gs://username/a/username2/b/c/d') == ExtractedPath( 'precomputed', 'gs', 'username', 'a/username2/b/c/d', None ))
def ispathdir(cloudpath): expath = extract(normalize_path(cloudpath)) return ((expath.protocol != "file" and cloudpath[-1] == "/") or (expath.protocol == "file" and cloudpath[-1] == os.path.sep) or (expath.protocol == "file" and os.path.isdir(expath.path)))