Exemplo n.º 1
0
async def verifyDomain(domain):
    """ create domain if it doesn't already exist
    """
    params = {"host": domain}
    headers = getRequestHeaders()
    client = globals["client"]
    req = getEndpoint() + '/'
    root_id = None
    log.info("GET " + req)
    timeout = config.get("timeout")
    async with client.get(req, headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status == 200:
            domain_json = await rsp.json()
        else:
            log.info("got status: {}".format(rsp.status))
    if rsp.status == 200:
        root_id = domain_json["root"]
    elif rsp.status == 404:
        # create the domain
        setupDomain(domain)
        async with client.get(req, headers=headers, timeout=timeout, params=params) as rsp:
            if rsp.status == 200:
                domain_json = await rsp.json()
                root_id = domain_json["root"]
            else:
                log.error("got status: {} for GET req: {}".format(rsp.status, req))
                raise HttpProcessingError(code=rsp.status, message="Service error")
    globals["root"] = root_id
Exemplo n.º 2
0
def main(): 
    domain = getTestDomainName("mkgroups_perf")
    print("domain: {}".format(domain) )
       
    log.info("initializing")
    signal.signal(signal.SIGTERM, sig_handler)  # add handlers for early exit
    signal.signal(signal.SIGINT, sig_handler)

    loop = asyncio.get_event_loop()
    globals["loop"] = loop
    #domain = helper.getTestDomainName()
    
    # create a client Session here so that all client requests 
    #   will share the same connection pool
    max_tcp_connections = int(config.get("max_tcp_connections"))
    client = ClientSession(loop=loop, connector=TCPConnector(limit=max_tcp_connections))
    globals["client"] = client
    globals["group_count"] = 0
    globals["grp_request_count"] = 0
    globals["lnk_request_count"] = 0
    globals["grp_failed_posts"] = 0
    globals["lnk_failed_posts"] = 0
    globals["group_target"] = config.get("group_target")
    max_concurrent_tasks = config.get("max_concurrent_tasks")

    loop.run_until_complete(getEndpoints())

    if len(globals["sn_endpoints"]) == 0:
        log.error("no SN endpoints found!")
        loop.close()
        client.close()
        sys.exit()
    for endpoint in globals["sn_endpoints"]:
        log.info("got endpoint: {}".format(endpoint))

    loop.run_until_complete(verifyDomain(domain))
    globals["domain"] = domain # save the domain 
    globals["start_time"] = time.time()

    # start making groups!
    while globals["grp_request_count"] < globals["group_target"]:
        tasks = []
        count = max_concurrent_tasks
        if globals["group_target"] - globals["grp_request_count"] < count:
            count = globals["group_target"] - globals["grp_request_count"]
        log.info("adding {} tasks".format(count))
        for i in range(count):
            tasks.append(asyncio.ensure_future(createGroup()))   
        # got a batch, move them out!
        loop.run_until_complete(asyncio.gather(*tasks))
        tasks = []
    
    loop.close()
    client.close()
    globals["stop_time"] = time.time()

    print_results()     
Exemplo n.º 3
0
async def bucketCheck(app):
    """ Verify that contents of bucket are self-consistent
    """

    now = int(time.time())
    log.info("bucket check {}".format(unixTimeToUTC(now)))

    # do initial listKeys
    await listKeys(app)

    # clear used flags
    clearUsedFlags(app)

    # mark objs
    await markObjs(app)

    unlinked_count = 0
    s3objs = app["s3objs"]
    for objid in s3objs:
        if isValidUuid(objid) and not isValidChunkId(objid):
            try:
                s3obj = await getS3Obj(app, objid)
                if s3obj.used is False:
                    unlinked_count += 1
            except HTTPInternalServerError as hpe:
                log.warn("got error retreiving {}: {}".format(objid, hpe.code))

    domains = app["domains"]
    for domain in domains:
        print("domain:", domain)
    roots = app["roots"]
    for root in roots:
        print("root:", root)

    top_level_domains = []
    for domain in domains:
        if domain[0] != '/':
            log.error("unexpected domain: {}".format(domain))
            continue
        if domain[1:].find('/') == -1:
            top_level_domains.append(domain)

    print("top-level-domains:")
    for domain in top_level_domains:
        print(domain)
    print("=" * 80)

    print("total storage: {}".format(app["bytes_in_bucket"]))
    print("Num objects: {}".format(len(app["s3objs"])))
    print("Num domains: {}".format(len(app["domains"])))
    print("Num root groups: {}".format(len(app["roots"])))
    print("Unlinked objects: {}".format(unlinked_count))
Exemplo n.º 4
0
async def createGroup():
    """ create a new group and link it to the parent group with 
    link name of group name
    """
    client = globals["client"]
    domain = globals["domain"]
    params = {"host": domain}
    base_req = getEndpoint()
    headers = getRequestHeaders()
  
    # create a new group
    req = base_req + "/groups"
    log.info("POST:" + req)
    globals["grp_request_count"] += 1
    group_name = globals["grp_request_count"]
    timeout = config.get("timeout")
    async with client.post(req, headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status != 201:
            log.error("POST {} failed with status: {}, rsp: {}".format(req, rsp.status, str(rsp)))
            globals["grp_failed_posts"] += 1
            raise HttpProcessingError(code=rsp.status, message="Unexpected error")
        else:
            globals["group_count"] += 1
            log.info("group_count: {}".format(globals["group_count"]))
        group_json = await rsp.json()
        group_id = group_json["id"]

    # link group to parent
    root_id = globals["root"] 
    group_name = "group_{}".format(group_name)  
    req = base_req + "/groups/" + root_id + "/links/" + group_name
    data = {"id": group_id }
    log.info("PUT " + req)
    globals["lnk_request_count"] += 1
    async with client.put(req, data=json.dumps(data), headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status == 409:
            # another task has created this link already
            log.warn("got 409 in request: " + req)
        elif rsp.status != 201:
            globals["lnk_failed_posts"] += 1
            log.error("got http error: {} for request: {}, rsp: {}".format(rsp.status, req, rsp))
            raise HttpProcessingError(code=rsp.status, message="Unexpected error")
        else:
            link_created = True
    
    return group_id                
Exemplo n.º 5
0
async def checkDataset(app, dset_key):
    log.info(f"checkDataset for key: {dset_key}")
    dset_json = await getStorJSONObj(app, dset_key)
    dset_id = dset_json["id"]
    prefix_old = app["prefix_old"]
    prefix_new = app["prefix_new"]
    do_update = app["do_update"]
    indirect_dataset_keys = app["indirect_dataset_keys"]
    app["dataset_count"] += 1
    log.info(f"checkDataset for id: {dset_id}")
    if "layout" not in dset_json:
        log.info("no layout found")
        return
    layout_json = dset_json["layout"]
    if "class" not in layout_json:
        log.warn(f"no class found in layout for id: {dset_id}")
        return
    layout_class = layout_json["class"]
    log.info(f"got layout_class: {layout_class}")
    if layout_class in ('H5D_CONTIGUOUS_REF', 'H5D_CHUNKED_REF'):
        if "file_uri" not in layout_json:
            log.warn(
                f"Expected to find key 'file_uri' in layout_json for id: {dset_id}"
            )
            return
        file_uri = layout_json["file_uri"]
        if file_uri.startswith(prefix_old):
            new_file_uri = prefix_new + file_uri[len(prefix_old):]
            log.info(f"replacing uri: {file_uri} with {new_file_uri}")
            app["matched_dset_uri"] += 1
            if do_update:
                # update the dataset json
                layout_json["file_uri"] = new_file_uri
                dset_json["layout"] = layout_json
                # write back to storage
                try:
                    await putStorJSONObj(app, dset_key, dset_json)
                    log.info(f"dataset {dset_id} updated")
                except Exception as e:
                    log.error(f"get exception writing dataset json: {e}")
    elif layout_class == 'H5D_CHUNKED_REF_INDIRECT':
        # add to list to be scanned later
        indirect_dataset_keys += dset_key[:-len(".dataset.json")]
    else:
        log.info(f"skipping check for layout_class: {layout_class}")
Exemplo n.º 6
0
async def getKeysCallback(app, s3keys):
    log.info(f"getKeysCallback, {len(s3keys)} items")

    if not isinstance(s3keys, list):
        log.error("expected list result for objDeleteCallback")
        raise ValueError("unexpected callback format")

    if "root_prefix" not in app or not app["root_prefix"]:
        log.error("Unexpected getKeysCallback")
        raise ValueError("Invalid getKeysCallback")

    prefix = app["root_prefix"]
    prefix_len = len(prefix)
    for s3key in s3keys:
        if not s3key.startswith(prefix):
            log.error(f"Unexpected key {s3key} for prefix: {prefix}")
            raise ValueError("invalid s3key for getKeysCallback")
        if not s3key.endswith(".dataset.json"):
            log.info(f"got unexpected key {s3key}, ignoring")
            continue
        dset_key = prefix + s3key[prefix_len:]
        log.info(f"getKeys - :{dset_key}")
        await checkDataset(app, dset_key)

    log.info("getKeysCallback complete")
Exemplo n.º 7
0
async def getS3RootKeysCallback(app, s3keys):
    log.info(f"getS3RootKeysCallback, {len(s3keys)} items")
    if not isinstance(s3keys, list):
        log.error("expected list result for s3keys callback")
        raise ValueError("unexpected callback format")
    results = app["bucket_scan"]

    for s3key in s3keys:
        log.info(f"got key: {s3key}")
        if not s3key.startswith("db/") or s3key[-1] != '/':
            log.error(f"unexpected key for getS3RootKeysCallback: {s3key}")
            continue
        root_id = getObjId(s3key + ".group.json")
        log.info(f"root_id: {root_id}")
        results["root_count"] += 1

        info_key = s3key + ".info.json"

        if app["scanRootKeys_update"]:
            log.info("updating...")
            await scanRoot(app, root_id, update=True)

        info_obj = None
        try:
            info_obj = await getStorJSONObj(app, info_key)
        except HTTPNotFound:
            pass  # info.json not created yet
        except HTTPInternalServerError as ie:
            log.warn(f"error getting s3obj: {ie}")
            continue

        if info_obj:
            log.info(f"got obj: {info_obj}")
            results["info_count"] += 1
            results["group_count"] += info_obj["num_groups"]
            results["dataset_count"] += len(info_obj["datasets"])
            results["datatype_count"] += info_obj["num_datatypes"]
            results["chunk_count"] += info_obj["num_chunks"]
            results["allocated_bytes"] += info_obj["allocated_bytes"]
            results["metadata_bytes"] += info_obj["metadata_bytes"]
Exemplo n.º 8
0
async def run_scan(app, rootid, update=False):

    root_key = getS3Key(rootid)

    if not root_key.endswith("/.group.json"):
        raise ValueError("unexpected root key")
    root_prefix = root_key[:-(len(".group.json"))]
    app["root_prefix"] = root_prefix

    try:
        await getStorKeys(app,
                          prefix=root_prefix,
                          suffix=".dataset.json",
                          include_stats=False,
                          callback=getKeysCallback)
    except ClientError as ce:
        log.error(f"removeKeys - getS3Keys faiiled: {ce}")
    except HTTPNotFound:
        log.warn(
            f"getStorKeys - HTTPNotFound error for getStorKeys with prefix: {root_prefix}"
        )
    except HTTPInternalServerError:
        log.error(
            f"getStorKeys - HTTPInternalServerError for getStorKeys with prefix: {root_prefix}"
        )
    except Exception as e:
        log.error(
            f"getStorKeys - Unexpected Exception for getStorKeys with prefix: {root_prefix}: {e}"
        )

    # update all chunks for datasets with H5D_CHUNKED_REF_INDIRECT layout
    indirect_dataset_keys = app["indirect_dataset_keys"]
    for prefix in indirect_dataset_keys:
        log.info(f"got inidirect prefix: {prefix}")
        # TBD...

    await releaseStorageClient(app)