示例#1
0
async def DELETE_Object(request):
    log.request(request)

    app = request.app
    delete_set = app["delete_set"]

    objid = request.match_info.get('id')
    if not isValidUuid(objid):
        log.warn(f"Invalid id: {objid}")
        raise HTTPBadRequest()

    if isSchema2Id(objid):
        # get rootid for this id
        collection = getCollectionForId(objid)
        if collection == "datasets":
            delete_set.add(objid)
        elif collection == "groups":
            # only need to do anything if this the root group
            if isRootObjId(objid):
                log.info(f"adding root group: {objid} to delete_set")
                delete_set.add(objid)
            else:
                log.info(f"ignoring delete non-root group: {objid}")
        elif collection == "datatypes":
            log.info(f"ignoring delete for datatype object: {objid}")
        else:
            log.error(f"Unexpected collection type: {collection}")
         
    resp_json = {}
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
示例#2
0
    async def fetch(self, obj_id):
        log.debug(f"DomainCrawler - fetch for obj_id: {obj_id}")
        obj_json = await getObjectJson(self._app,
                                       obj_id,
                                       include_links=True,
                                       include_attrs=self._include_attrs)
        log.debug(f"DomainCrawler - for {obj_id} got json: {obj_json}")

        # including links, so don't need link count
        if "link_count" in obj_json:
            del obj_json["link_count"]
        self._obj_dict[obj_id] = obj_json
        if self._include_attrs:
            del obj_json["attributeCount"]

        # if this is a group, iterate through all the hard links and
        # add to the lookup ids set
        if getCollectionForId(obj_id) == "groups":
            links = obj_json["links"]
            log.debug(f"DomainCrawler links: {links}")
            for title in links:
                log.debug(f"DomainCrawler - got link: {title}")
                link_obj = links[title]
                if link_obj["class"] != 'H5L_TYPE_HARD':
                    continue
                link_id = link_obj["id"]
                if link_id not in self._obj_dict:
                    # haven't seen this object yet, get obj json
                    log.debug(f"DomainCrawler - adding link_id: {link_id}")
                    self._obj_dict[link_id] = {}  # placeholder for obj id
                    self._q.put_nowait(link_id)
        log.debug(f"DomainCrawler - fetch conplete obj_id: {obj_id}")
示例#3
0
async def getObjectIdByPath(app, obj_id, h5path, refresh=False):
    """ Find the object at the provided h5path location.
    If not found raise 404 error.
    """
    log.info("getObjectIdByPath obj_id: {} h5path: {} refresh: {}".format(
        obj_id, h5path, refresh))
    if h5path.startswith("./"):
        h5path = h5path[2:]  # treat as relative path
    links = h5path.split('/')
    for link in links:
        if not link:
            continue  # skip empty link
        log.debug("getObjectIdByPath for objid: {} got link: {}".format(
            obj_id, link))
        if getCollectionForId(obj_id) != "groups":
            # not a group, so won't have links
            msg = "h5path: {} not found".format(h5path)
            log.warn(msg)
            raise HTTPNotFound()
        req = getDataNodeUrl(app, obj_id)
        req += "/groups/" + obj_id + "/links/" + link
        log.debug("get LINK: " + req)
        link_json = await http_get(app, req)
        log.debug("got link_json: " + str(link_json))
        if link_json["class"] != 'H5L_TYPE_HARD':
            # don't follow soft/external links
            msg = "h5path: {} not found".format(h5path)
            log.warn(msg)
            raise HTTPInternalServerError()
        obj_id = link_json["id"]
    # if we get here, we've traveresed the entire path and found the object
    return obj_id
示例#4
0
async def validateAction(app, domain, obj_id, username, action):
    """ check that the given object belongs in the domain and that the 
        requested action (create, read, update, delete, readACL, udpateACL) 
        is permitted for the requesting user.  
    """
    meta_cache = app['meta_cache']
    log.info(
        f"validateAction(domain={domain}, obj_id={obj_id}, username={username}, action={action})"
    )
    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        msg = f"Expected root key for domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    obj_json = None
    if obj_id in meta_cache:
        obj_json = meta_cache[obj_id]
    else:
        # fetch from DN
        collection = getCollectionForId(obj_id)
        req = getDataNodeUrl(app, obj_id)
        req += '/' + collection + '/' + obj_id
        bucket = getBucketForDomain(domain)
        params = {}
        if bucket:
            params["bucket"] = bucket
        obj_json = await http_get(app, req, params=params)
        meta_cache[obj_id] = obj_json

    log.debug("obj_json[root]: {} domain_json[root]: {}".format(
        obj_json["root"], domain_json["root"]))
    if obj_json["root"] != domain_json["root"]:
        log.info("unexpected root, reloading domain")
        domain_json = await getDomainJson(app, domain, reload=True)
        if "root" not in domain_json or obj_json["root"] != domain_json["root"]:
            msg = "Object id is not a member of the given domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if action not in ("create", "read", "update", "delete", "readACL",
                      "updateACL"):
        log.error(f"unexpected action: {action}")
        raise HTTPInternalServerError()

    reload = False
    try:
        aclCheck(domain_json, action,
                 username)  # throws exception if not allowed
    except HTTPForbidden:
        log.info(
            f"got HttpProcessing error on validate action for domain: {domain}, reloading..."
        )
        # just in case the ACL was recently updated, refetch the domain
        reload = True
    if reload:
        domain_json = await getDomainJson(app, domain, reload=True)
        aclCheck(domain_json, action, username)
示例#5
0
文件: datanode.py 项目: t20100/hsds
async def bucketGC(app):
    """ remove objects from db for any deleted root groups or datasets
    """
    log.info("bucketGC start")
    async_sleep_time = int(config.get("async_sleep_time"))
    log.info("async_sleep_time: {}".format(async_sleep_time))

    # update/initialize root object before starting GC

    while True:
        if app["node_state"] != "READY":
            log.info("bucketGC - waiting for Node state to be READY")
            await asyncio.sleep(async_sleep_time)
            continue  # wait for READY state

        gc_ids = app["gc_ids"]
        while len(gc_ids) > 0:
            obj_id = gc_ids.pop()
            log.info(f"got gc id: {obj_id}")
            if not isValidUuid(obj_id):
                log.error(f"bucketGC - got unexpected gc id: {obj_id}")
                continue
            if not isSchema2Id(obj_id):
                log.warn(f"bucketGC - ignoring v1 id: {obj_id}")
                continue
            if getCollectionForId(obj_id) == "groups":
                if not isRootObjId(obj_id):
                    log.error(f"bucketGC - unexpected non-root id: {obj_id}")
                    continue
                log.info(f"bucketGC - delete root objs: {obj_id}")
                await removeKeys(app, obj_id)
            elif getCollectionForId(obj_id) == "datasets":
                log.info(f"bucketGC - delete dataset: {obj_id}")
                await removeKeys(app, obj_id)
            else:
                log.error(f"bucketGC - unexpected obj_id class: {obj_id}")

        log.info(f"bucketGC - sleep: {async_sleep_time}")
        await asyncio.sleep(async_sleep_time)

    # shouldn't ever get here
    log.error("bucketGC terminating unexpectedly")
示例#6
0
async def get_collections(app, root_id):
    """ Return the object ids for given root.
    """

    groups = {}
    datasets = {}
    datatypes = {}
    lookup_ids = set()
    lookup_ids.add(root_id)

    while lookup_ids:
        grp_id = lookup_ids.pop()
        req = getDataNodeUrl(app, grp_id)
        req += '/groups/' + grp_id + "/links"
        log.debug("collection get LINKS: " + req)
        try:
            links_json = await http_get(app,
                                        req)  # throws 404 if doesn't exist
        except HTTPNotFound:
            log.warn(f"get_collection, group {grp_id} not found")
            continue

        log.debug(f"got links json from dn for group_id: {grp_id}")
        links = links_json["links"]
        log.debug(f"get_collection: got links: {links}")
        for link in links:
            if link["class"] != 'H5L_TYPE_HARD':
                continue
            link_id = link["id"]
            obj_type = getCollectionForId(link_id)
            if obj_type == "groups":
                if link_id in groups:
                    continue  # been here before
                groups[link_id] = {}
                lookup_ids.add(link_id)
            elif obj_type == "datasets":
                if link_id in datasets:
                    continue
                datasets[link_id] = {}
            elif obj_type == "datatypes":
                if link_id in datatypes:
                    continue
                datatypes[link_id] = {}
            else:
                log.error(
                    f"get_collection: unexpected link object type: {obj_type}")
                HTTPInternalServerError()

    result = {}
    result["groups"] = groups
    result["datasets"] = datasets
    result["datatypes"] = datatypes
    return result
示例#7
0
async def getPathForObjectId(app, parent_id, idpath_map, tgt_id=None):
    """ Search the object starting with the given parent_id.
    idpath should be a dict with at minimum the key: parent_id: <parent_path>.
    If tgt_id is not None, returns first path that matches the tgt_id or None if not found.
    If Tgt_id is no, returns the idpath_map.
    """

    if not parent_id:
        log.error("No parent_id passed to getPathForObjectId")
        raise HTTPInternalServerError()

    if parent_id not in idpath_map:
        msg = "Obj {} expected to be found in idpath_map".format(parent_id)
        log.error(msg)
        raise HTTPInternalServerError()

    parent_path = idpath_map[parent_id]
    if parent_id == tgt_id:
        return parent_path

    req = getDataNodeUrl(app, parent_id)
    req += "/groups/" + parent_id + "/links"

    log.debug("getPathForObjectId LINKS: " + req)
    links_json = await http_get(app, req)
    log.debug(
        "getPathForObjectId got links json from dn for parent_id: {}".format(
            parent_id))
    links = links_json["links"]

    h5path = None
    for link in links:
        if link["class"] != "H5L_TYPE_HARD":
            continue  # ignore everything except hard links
        link_id = link["id"]
        if link_id in idpath_map:
            continue  # this node has already been visited
        title = link["title"]
        if tgt_id is not None and link_id == tgt_id:
            # found it!
            h5path = op.join(parent_path, title)
            break
        idpath_map[link_id] = op.join(parent_path, title)
        if getCollectionForId(link_id) != "groups":
            continue
        h5path = await getPathForObjectId(app, link_id, idpath_map,
                                          tgt_id)  # recursive call
        if tgt_id is not None and h5path:
            break

    return h5path
示例#8
0
async def getObjectJson(app,
                        obj_id,
                        bucket=None,
                        refresh=False,
                        include_links=False,
                        include_attrs=False):
    """ Return top-level json (i.e. excluding attributes or links by default) for a given obj_id.
    If refresh is False, any data present in the meta_cache will be returned.  If not
    the DN will be queries, and any resultant data added to the meta_cache.  
    Note: meta_cache values may be stale, but use of immutable data (e.g. type of a dataset)
    is always valid
    """
    meta_cache = app['meta_cache']
    obj_json = None
    if include_links or include_attrs:
        # links and attributes are subject to change, so always refresh
        refresh = True
    log.info(f"getObjectJson {obj_id}")
    if obj_id in meta_cache and not refresh:
        log.debug(f"found {obj_id} in meta_cache")
        obj_json = meta_cache[obj_id]
    else:
        req = getDataNodeUrl(app, obj_id)
        collection = getCollectionForId(obj_id)
        params = {}
        if include_links:
            params["include_links"] = 1
        if include_attrs:
            params["include_attrs"] = 1
        if bucket:
            params["bucket"] = bucket
        req += '/' + collection + '/' + obj_id
        obj_json = await http_get(app, req,
                                  params=params)  # throws 404 if doesn't exist
        meta_cache[obj_id] = obj_json
    if obj_json is None:
        msg = f"Object: {obj_id} not found"
        log.warn(msg)
        raise HTTPNotFound()
    return obj_json
示例#9
0
async def getObjectJson(app, obj_id, refresh=False):
    """ Return top-level json (i.e. excluding attributes or links) for a given obj_id.
    If refresh is False, any data present in the meta_cache will be returned.  If not
    the DN will be queries, and any resultant data added to the meta_cache.  
    Note: meta_cache values may be stale, but use of immutable data (e.g. type of a dataset)
    is always valid
    """
    meta_cache = app['meta_cache']
    obj_json = None
    log.info("getObjectJson {}".format(obj_id))
    if obj_id in meta_cache and not refresh:
        log.debug("found {} in meta_cache".format(obj_id))
        obj_json = meta_cache[obj_id]
    else:
        req = getDataNodeUrl(app, obj_id)
        collection = getCollectionForId(obj_id)
        req += '/' + collection + '/' + obj_id
        obj_json = await http_get(app, req)  # throws 404 if doesn't exist
        meta_cache[obj_id] = obj_json
    if obj_json is None:
        msg = "Object: {} not found".format(obj_id)
        log.warn(msg)
        raise HTTPNotFound()
    return obj_json
示例#10
0
async def GET_Links(request):
    """HTTP method to return JSON for link collection"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = "Invalid group id: {}".format(group_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    await validateAction(app, domain, group_id, username, "read")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links"
    query_sep = '?'
    if limit is not None:
        req += query_sep + "Limit=" + str(limit)
        query_sep = '&'
    if marker is not None:
        req += query_sep + "Marker=" + marker

    log.debug("get LINKS: " + req)
    links_json = await http_get(app, req)
    log.debug("got links json from dn for group_id: {}".format(group_id))
    links = links_json["links"]

    # mix in collection key, target and hrefs
    for link in links:
        if link["class"] == "H5L_TYPE_HARD":
            collection_name = getCollectionForId(link["id"])
            link["collection"] = collection_name
            target_uri = '/' + collection_name + '/' + link["id"]
            link["target"] = getHref(request, target_uri)
        link_uri = '/groups/' + group_id + '/links/' + link['title']
        link["href"] = getHref(request, link_uri)

    resp_json = {}
    resp_json["links"] = links
    hrefs = []
    group_uri = '/groups/' + group_id
    hrefs.append({
        'rel': 'self',
        'href': getHref(request, group_uri + '/links')
    })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, group_uri)})
    resp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
示例#11
0
async def GET_Link(request):
    """HTTP method to return JSON for a group link"""
    log.request(request)
    app = request.app

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = "Invalid group id: {}".format(group_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_title = request.match_info.get('title')
    validateLinkName(link_title)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    await validateAction(app, domain, group_id, username, "read")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links/" + link_title
    log.debug("get LINK: " + req)
    link_json = await http_get(app, req)
    log.debug("got link_json: " + str(link_json))
    resp_link = {}
    resp_link["title"] = link_title
    resp_link["class"] = link_json["class"]
    if link_json["class"] == "H5L_TYPE_HARD":
        resp_link["id"] = link_json["id"]
        resp_link["collection"] = getCollectionForId(link_json["id"])
    elif link_json["class"] == "H5L_TYPE_SOFT":
        resp_link["h5path"] = link_json["h5path"]
    elif link_json["class"] == "H5L_TYPE_EXTERNAL":
        resp_link["h5path"] = link_json["h5path"]
        resp_link["h5domain"] = link_json["h5domain"]
    else:
        log.warn("Unexpected link class: {}".format(link_json["class"]))
    resp_json = {}
    resp_json["link"] = resp_link
    resp_json["created"] = link_json["created"]
    # links don't get modified, so use created timestamp as lastModified
    resp_json["lastModified"] = link_json["created"]

    hrefs = []
    group_uri = '/groups/' + group_id
    hrefs.append({
        'rel': 'self',
        'href': getHref(request, group_uri + '/links/' + link_title)
    })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, group_uri)})
    if link_json["class"] == "H5L_TYPE_HARD":
        target = '/' + resp_link["collection"] + '/' + resp_link["id"]
        hrefs.append({'rel': 'target', 'href': getHref(request, target)})

    resp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
示例#12
0
文件: async_lib.py 项目: t20100/hsds
def scanRootCallback(app, s3keys):
    log.debug(f"scanRootCallback, {len(s3keys)} items")
    if isinstance(s3keys, list):
        log.error("got list result for s3keys callback")
        raise ValueError("unexpected callback format")

    results = app["scanRoot_results"]
    if results:
        log.debug(f"previous scanRoot_results:".format(results))
    for s3key in s3keys.keys():

        if not isS3ObjKey(s3key):
            log.info(f"not s3obj key, ignoring: {s3key}")
            continue
        objid = getObjId(s3key)
        etag = None
        obj_size = None
        lastModified = None
        item = s3keys[s3key]
        if "ETag" in item:
            etag = item["ETag"]
        if "Size" in item:
            obj_size = item["Size"]
        if "LastModified" in item:
            lastModified = item["LastModified"]
        log.debug(f"{objid}: {etag} {obj_size} {lastModified}")

        if lastModified > results["lastModified"]:
            log.debug(f"changing lastModified from: {results['lastModified']} to {lastModified}")
            results["lastModified"] = lastModified
        is_chunk = False
        if isValidChunkId(objid):
            is_chunk = True
            results["num_chunks"] += 1
            results["allocated_bytes"] += obj_size
        else:
            results["metadata_bytes"] += obj_size


        if is_chunk or getCollectionForId(objid) == "datasets":
            if is_chunk:
                dsetid = getDatasetId(objid)
            else:
                dsetid = objid
            datasets = results["datasets"]
            if dsetid not in datasets:
                dataset_info = {}
                dataset_info["lastModified"] = 0
                dataset_info["num_chunks"] = 0
                dataset_info["allocated_bytes"] = 0
                datasets[dsetid] = dataset_info
            dataset_info = datasets[dsetid]
            if lastModified > dataset_info["lastModified"]:
                dataset_info["lastModified"] = lastModified
                if is_chunk:
                    dataset_info["num_chunks"] += 1
                    dataset_info["allocated_bytes"] += obj_size
        elif getCollectionForId(objid) == "groups":
            results["num_groups"] += 1
        elif getCollectionForId(objid) == "datatypes":
            results["num_datatypes"] += 1
        else:
            log.error(f"Unexpected collection type for id: {objid}")