Exemplo n.º 1
0
def main():
    if len(sys.argv) == 1 or sys.argv[1] == "-h" or sys.argv[1] == "--help":
        printUsage()
        sys.exit(1)

    chunk_id = sys.argv[-1]
    if not isValidChunkId(chunk_id):
        print("Invalid chunk id")
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    session = get_session(loop=loop)

    app = {}
    app["session"] = session
    app['bucket_name'] = config.get("bucket_name")
    app['node_count'] = 1
    app['node_number'] = 0  
    app['deleted_ids'] = set()
    app['meta_cache'] = {}
    app['pending_s3_read'] = {}
    app['meta_cache'] = LruCache(mem_target=1024*1024, chunk_cache=False)
    app['chunk_cache'] = LruCache(mem_target=64*1024*1024, chunk_cache=True)
    domain = config.get("domain")
    if not domain:
        printUsage()
        sys.exit(-1)
    print("got domain:", domain)

    loop.run_until_complete(printChunkValues(app, domain, chunk_id))

    loop.close()
Exemplo n.º 2
0
def main():

    do_update = False

    if len(sys.argv) < 4:
        printUsage()

    rootid = sys.argv[1]
    prefix_old = sys.argv[2]
    prefix_new = sys.argv[3]
    if len(sys.argv) > 4 and sys.argv[4] == "-update":
        do_update = True

    if not isValidUuid(rootid):
        print("Invalid root id!")
        sys.exit(1)

    if not isSchema2Id(rootid):
        print("This tool can only be used with Schema v2 ids")
        sys.exit(1)

    if prefix_old == prefix_new:
        print("prefix_old and prefix_new or the same")
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["prefix_old"] = prefix_old
    app["prefix_new"] = prefix_new
    app["do_update"] = do_update
    app["dataset_count"] = 0
    app["matched_dset_uri"] = 0
    app["indirect_dataset_keys"] = []
    app["loop"] = loop
    session = get_session()
    app["session"] = session
    app["filter_map"] = {}

    # need the metadata cache since we will be calling into some SN methods
    metadata_mem_cache_size = int(config.get("metadata_mem_cache_size"))
    app['meta_cache'] = LruCache(mem_target=metadata_mem_cache_size,
                                 name="MetaCache")

    loop.run_until_complete(run_scan(app, rootid=rootid, update=do_update))

    loop.close()

    print("datsets scanned:", app["dataset_count"])
    print(
        "datasets with matching uri ('H5D_CONTIGUOUS_REF', 'H5D_CHUNKED_REF' layouts):",
        app["matched_dset_uri"])

    print("done!")
Exemplo n.º 3
0
def main():

    if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"):
        printUsage()

    if len(sys.argv) > 1 and sys.argv[1] == "--update":
        do_update = True
    else:
        do_update = False

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session()
    app["session"] = session
    loop.run_until_complete(run_scan(app, update=do_update))

    loop.close()

    results = app["bucket_scan"]
    print("root_count:", results["root_count"])
    print("info_count:", results["info_count"])
    print("group_count", results["group_count"])
    print("dataset_count:", results["dataset_count"])
    print("datatype_count", results["datatype_count"])
    print("chunk_count:", results["chunk_count"])
    print('allocated_bytes:', results["allocated_bytes"])
    print("metadata_bytes:", results["metadata_bytes"])
    print("updated_count:", results["updated_count"])

    print("done!")
Exemplo n.º 4
0
def main():

    if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or
                                                    sys.argv[1] == "--help"):
        printUsage()

    rootid = sys.argv[1]

    if not isValidUuid(rootid):
        print("Invalid root id!")
        sys.exit(1)

    if not isSchema2Id(rootid):
        print("This tool can only be used with Schema v2 ids")
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session(loop=loop)
    app["session"] = session
    loop.run_until_complete(run_delete(app, rootid))

    loop.close()

    print("done!")
Exemplo n.º 5
0
def main():

    if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"):
        printUsage()


    rootid = sys.argv[1]

    if len(sys.argv) > 2 and sys.argv[2] == "-update":
        do_update = True
    else:
        do_update = False

    if not isValidUuid(rootid):
        print("Invalid root id!")
        sys.exit(1)

    if not isSchema2Id(rootid):
        print("This tool can only be used with Schema v2 ids")
        sys.exit(1)


    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session(loop=loop)
    app["session"] = session
    loop.run_until_complete(run_scan(app, rootid=rootid, update=do_update))

    loop.close()

    results = app["scanRoot_results"]
    datasets = results["datasets"]
    lastModified = datetime.fromtimestamp(results["lastModified"])
    total_size  = results["metadata_bytes"] + results["allocated_bytes"]
    print(f"lastModified: {lastModified}")
    print(f"size: {total_size}")
    print(f"num chunks: {results['num_chunks']}")
    print(f"num_groups: {results['num_groups']}")
    print(f"num_datatypes: {results['num_datatypes']}")
    print(f"num_datasets: {len(datasets)}")
    for dsetid in datasets:
        dataset_info = datasets[dsetid]
        print(f"   {dsetid}: {dataset_info['lastModified']}, {dataset_info['num_chunks']}, {dataset_info['allocated_bytes']}")

    scan_start = datetime.fromtimestamp(results["scan_start"])
    print(f"scan_start: {scan_start}")
    scan_complete = datetime.fromtimestamp(results["scan_complete"])
    print(f"scan_complete: {scan_complete}")



    print("done!")
Exemplo n.º 6
0
def main():

    if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"):
        printUsage()

    prefix = ''
    deliminator = ''
    suffix = ''
    showstats = False

    argn = 1
    while argn < len(sys.argv):
        arg = sys.argv[argn]
        val = None
        if len(sys.argv) > argn + 1:
            val = sys.argv[argn + 1]
        if arg == "--prefix":
            prefix = val
            argn += 2
        elif arg == "--deliminator":
            deliminator = val
            argn += 2
        elif arg == "--suffix":
            suffix = val
            argn += 2
        elif arg == "--showstats":
            showstats = True
            argn += 1
        else:
            printUsage()

    print("prefix:", prefix)
    print("deliminator:", deliminator)
    print("suffix:", suffix)
    print("showstats:", showstats)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session()
    app["session"] = session
    loop.run_until_complete(
        listObjects(app,
                    prefix=prefix,
                    deliminator=deliminator,
                    suffix=suffix,
                    showstats=showstats))

    loop.close()

    print("done!")
Exemplo n.º 7
0
    def testStorUtil(self):

        cors_domain = config.get("cors_domain")
        print(f"cors_domain: [{cors_domain}]")
        bucket = config.get("hsds_unit_test_bucket")
        if not bucket:
            print(
                "No bucket configured, create bucket and export HSDS_UNIT_TEST_BUCKET=<bucket_name> to enable test"
            )
            return

        # we need to setup a asyncio loop to query s3
        loop = asyncio.get_event_loop()
        session = get_session(loop=loop)

        app = {}
        app["session"] = session
        app["bucket_name"] = bucket
        app["loop"] = loop

        loop.run_until_complete(self.stor_util_test(app))

        loop.close()
Exemplo n.º 8
0
def main():
    if len(sys.argv) == 1 or sys.argv[1] == "-h" or sys.argv[1] == "--help":
        printUsage()
        sys.exit(1)

    obj_id = sys.argv[-1]
    if not isValidUuid(obj_id):
        print("Invalid obj id")

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    session = get_session(loop=loop)

    app = {}
    app["session"] = session
    app['bucket_name'] = config.get("bucket_name")

    loop.run_until_complete(printS3Obj(app, obj_id))

    loop.close()
Exemplo n.º 9
0
def main():

    if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"):
        printUsage()
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    #loop.run_until_complete(init(loop))
    session = get_session(loop=loop)
    app = {}
    app['bucket_name'] = config.get("bucket_name")
    app["session"] = session
    app["loop"] = loop

    loop.run_until_complete(deleteAll(app))
    #releaseClient(app)

    loop.close()

    print("done!")
Exemplo n.º 10
0
def main():
    default_public_perm = {
        'create': False,
        'read': True,
        'update': False,
        'delete': False,
        'readACL': False,
        'updateACL': False
    }
    default_private_perm = {
        'create': False,
        'read': False,
        'update': False,
        'delete': False,
        'readACL': False,
        'updateACL': False
    }

    if len(sys.argv) == 1 or sys.argv[1] == "-h" or sys.argv[1] == "--help":
        printUsage()
        sys.exit(1)

    default_perm = default_public_perm  # will switch if private is specified
    userarg = None
    domain = None
    for arg in sys.argv[1:]:
        if arg.startswith('--user='******'--user='******'--private':
            default_perm = default_private_perm
        elif arg.startswith('--domain='):
            domain = arg[len('--domain='):]
        else:
            print("Unexpected argument:", arg)
            printUsage()
            sys.exit(1)

    if not userarg:
        print("No user supplied")
        printUsage()
        sys.exit(1)
    usernames = []
    if userarg[0] == '[' and userarg[-1] == ']':
        names = userarg[1:-1].split(',')
        for name in names:
            usernames.append(name)
    else:
        usernames.append(userarg)

    for username in usernames:
        if username != username.lower():
            raise ValueError("username must be lowercase")
        if not username[0].isalpha():
            raise ValueError(
                "first character of username must be character a-z")
        for c in username:
            if c != '_' and not c.isalnum():
                raise ValueError(
                    "username must consist of the characters a-z, numeric or underscore"
                )
        if len(username) < 3:
            raise ValueError("username must have at least three characters")

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    app = {}
    app["loop"] = loop
    app["bucket_name"] = config.get("bucket_name")

    loop.run_until_complete(
        createDomains(app, usernames, default_perm, domain_name=domain))
    loop.run_until_complete(shutdown(app))

    loop.close()

    print("done!")
Exemplo n.º 11
0
# Main
#

if __name__ == '__main__':

    base_folder = "/home"
    if len(sys.argv) > 1:
        last_arg = sys.argv[-1]
        if last_arg in ("-h", "--help"):
            print("Usage: python bucket_check.py <base_domain>")
            sys.exit(0)
        if not last_arg.startswith("-"):
            base_folder = last_arg

    print("base_folder:", base_folder)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["s3objs"] = {}
    app["domains"] = {}  # domain to root map
    app["roots"] = {}  # root obj to domain map
    app["deleted_ids"] = set()
    app["bytes_in_bucket"] = 0
    app["loop"] = loop
    loop.run_until_complete(bucketCheck(app, base_folder))
    loop.close()

    print("done!")
Exemplo n.º 12
0
    print("total storage: {}".format(app["bytes_in_bucket"]))
    print("Num objects: {}".format(len(app["s3objs"])))
    print("Num domains: {}".format(len(app["domains"])))
    print("Num root groups: {}".format(len(app["roots"])))
    print("Unlinked objects: {}".format(unlinked_count))


#
# Main
#

if __name__ == '__main__':
    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["anonymous_ttl"] = config.get("anonymous_ttl")
    app["s3objs"] = {}
    app["domains"] = {}  # domain to root map
    app["roots"] = {}  # root obj to domain map
    app["deleted_ids"] = set()
    app["bytes_in_bucket"] = 0
    app["loop"] = loop
    session = get_session(loop=loop)
    app["session"] = session
    loop.run_until_complete(bucketCheck(app))
    releaseClient(app)
    loop.close()

    print("done!")
Exemplo n.º 13
0
def main():

    if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or
                                                    sys.argv[1] == "--help"):
        printUsage()

    rootid = sys.argv[1]

    if len(sys.argv) > 2 and sys.argv[2] == "-update":
        do_update = True
    else:
        do_update = False

    if not isValidUuid(rootid):
        print("Invalid root id!")
        sys.exit(1)

    if not isSchema2Id(rootid):
        print("This tool can only be used with Schema v2 ids")
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session()
    app["session"] = session
    app["filter_map"] = {}

    # need the metadata cache since we will be calling into some SN methods
    metadata_mem_cache_size = int(config.get("metadata_mem_cache_size"))
    app['meta_cache'] = LruCache(mem_target=metadata_mem_cache_size,
                                 name="MetaCache")

    loop.run_until_complete(run_scan(app, rootid=rootid, update=do_update))

    loop.close()

    results = app["scanRoot_results"]
    datasets = results["datasets"]
    lastModified = datetime.fromtimestamp(results["lastModified"])
    print(f"lastModified: {lastModified}")
    if "md5_sum" in results:
        checksum = results["md5_sum"]
        print(f"md5_sum: {checksum}")
    print(f"metadata bytes: {results['metadata_bytes']}")
    print(f"allocated bytes: {results['allocated_bytes']}")
    print(f"logical bytes: {results['logical_bytes']}")
    print(f"num chunks: {results['num_chunks']}")
    print(f"linked chunks: {results['num_linked_chunks']}")
    print(f"linked bytes: {results['linked_bytes']}")
    print(f"num_groups: {results['num_groups']}")
    print(f"num_datatypes: {results['num_datatypes']}")
    print(f"num_datasets: {len(datasets)}")
    if datasets:
        print(
            "    dataset_id\tlast_modified\tnum_chunks\tallocated_bytes\tlogical_bytes\tlinked_bytes\tnum_link_chunks"
        )
    for dsetid in datasets:
        dataset_info = datasets[dsetid]
        lm = dataset_info['lastModified']
        nc = dataset_info['num_chunks']
        ab = dataset_info['allocated_bytes']
        lb = dataset_info['logical_bytes']
        ln = dataset_info['linked_bytes']
        nl = dataset_info['num_linked_chunks']
        print(f"   {dsetid}: {lm}, {nc}, {ab}, {lb}, {ln}, {nl}")

    scan_start = datetime.fromtimestamp(results["scan_start"])
    print(f"scan_start:    {scan_start}")
    scan_complete = datetime.fromtimestamp(results["scan_complete"])
    print(f"scan_complete: {scan_complete}")

    print("done!")