Exemplo n.º 1
0
def getShuffle(app, dset_id, dset_json):
    shuffle_size = 0
    if isShuffle(dset_json):
        type_json = dset_json["type"]
        item_size = getItemSize(type_json)
        if item_size == 'H5T_VARIABLE':
            log.warn(
                f"shuffle filter can't be used on variable datatype for datasset: {dset_id}"
            )
        else:
            shuffle_size = item_size
            log.debug(f"got shuffle_size: {shuffle_size}")
    else:
        log.debug("isShuffle is false")

    if shuffle_size > 1:
        shuffle_map = app['shuffle_map']
        if dset_id not in shuffle_map:
            # save the shuffle size so the lazy chunk writer can access it
            shuffle_map[dset_id] = shuffle_size
            log.debug(f"update shuffle_map {dset_id}: {shuffle_size}")
    return shuffle_size
Exemplo n.º 2
0
async def PUT_AttributeValue(request):
    """HTTP method to update an attributes data"""
    log.request(request)
    log.info("PUT_AttributeValue")
    app = request.app
    collection = getRequestCollectionName(request) # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    log.debug(f"Attribute name: [{attr_name}]")
    validateAttributeName(attr_name)

    log.info(f"PUT Attribute Value id: {obj_id} name: {attr_name}")
    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "PUT AttributeValue with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    
    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    
    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPInternalServerError()

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "update")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.debug("get Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    dn_json = await http_get(app, req, params=params)
    log.debug("got attributes json from dn for obj_id: " + str(obj_id)) 
    log.debug(f"got dn_json: {dn_json}")

    attr_shape = dn_json["shape"]
    if attr_shape["class"] == 'H5S_NULL':
        msg = "Null space attributes can not be updated"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    np_shape = getShapeDims(attr_shape)
    type_json = dn_json["type"]
    np_dtype = createDataType(type_json)  # np datatype

    request_type = "json"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type not in ("application/json", "application/octet-stream"):
            msg = f"Unknown content_type: {content_type}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if content_type == "application/octet-stream":
            log.debug("PUT AttributeValue - request_type is binary")
            request_type = "binary"
        else:
            log.debug("PUT AttribueValue - request type is json")

    binary_data = None
    if request_type == "binary":
        item_size = getItemSize(type_json)

        if item_size == 'H5T_VARIABLE':
            msg = "Only JSON is supported for variable length data types"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        # read binary data
        binary_data = await request.read()
        if len(binary_data) != request.content_length:
            msg = f"Read {len(binary_data)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

    arr = None  # np array to hold request data

    if binary_data:
        npoints = getNumElements(np_shape)
        if npoints*item_size != len(binary_data):
            msg = "Expected: " + str(npoints*item_size) + " bytes, but got: " + str(len(binary_data))
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        arr = np.fromstring(binary_data, dtype=np_dtype)
        arr = arr.reshape(np_shape)  # conform to selection shape
        # convert to JSON for transmission to DN
        data = arr.tolist()
        value = bytesArrayToList(data)
    else:
        body = await request.json()   

        if "value" not in body:
            msg = "PUT attribute value with no value in body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        value = body["value"]

        # validate that the value agrees with type/shape
        try:
            arr = jsonToArray(np_shape, np_dtype, value)
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    log.info(f"Got: {arr.size} array elements")
     
    # ready to add attribute now
    attr_json = {}
    attr_json["type"] = type_json
    attr_json["shape"] = attr_shape
    attr_json["value"] = value

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name  
    log.info(f"PUT Attribute Value: {req}")

    dn_json["value"] = value
    params = {}
    params = {"replace": 1}  # let the DN know we can overwrite the attribute
    if bucket:
        params["bucket"] = bucket
    put_rsp = await http_put(app, req, params=params, data=attr_json)
    log.info(f"PUT Attribute Value resp: {put_rsp}")
    
    hrefs = []  # TBD
    req_rsp = { "hrefs": hrefs }
    # attribute creation successful     
    resp = await jsonResponse(request, req_rsp)
    log.response(request, resp=resp)
    return resp
Exemplo n.º 3
0
async def POST_Dataset(request):
    """HTTP method to create a new dataset object"""
    log.request(request)
    app = request.app

    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "POST Datasets with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()

    # get domain, check authorization
    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    domain_json = await getDomainJson(app, domain, reload=True)
    root_id = domain_json["root"]

    aclCheck(domain_json, "create",
             username)  # throws exception if not allowed

    if "root" not in domain_json:
        msg = "Expected root key for domain: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    #
    # validate type input
    #
    if "type" not in body:
        msg = "POST Dataset has no type key in body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    datatype = body["type"]
    if isinstance(datatype, str) and datatype.startswith("t-"):
        # Committed type - fetch type json from DN
        ctype_id = datatype
        log.debug("got ctypeid: {}".format(ctype_id))
        ctype_json = await getObjectJson(app, ctype_id)
        log.debug("ctype: {}".format(ctype_json))
        if ctype_json["root"] != root_id:
            msg = "Referenced committed datatype must belong in same domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        datatype = ctype_json["type"]
        # add the ctype_id to type type
        datatype["id"] = ctype_id
    elif isinstance(datatype, str):
        try:
            # convert predefined type string (e.g. "H5T_STD_I32LE") to
            # corresponding json representation
            datatype = getBaseTypeJson(datatype)
            log.debug("got datatype: {}".format(datatype))
        except TypeError:
            msg = "POST Dataset with invalid predefined type"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    validateTypeItem(datatype)
    item_size = getItemSize(datatype)

    #
    # Validate shape input
    #
    dims = None
    shape_json = {}

    if "shape" not in body:
        shape_json["class"] = "H5S_SCALAR"
    else:
        shape = body["shape"]
        if isinstance(shape, int):
            shape_json["class"] = "H5S_SIMPLE"
            dims = [
                shape,
            ]
            shape_json["dims"] = dims
        elif isinstance(shape, str):
            # only valid string value is H5S_NULL
            if shape != "H5S_NULL":
                msg = "POST Datset with invalid shape value"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            shape_json["class"] = "H5S_NULL"
        elif isinstance(shape, list):
            if len(shape) == 0:
                shape_json["class"] = "H5S_SCALAR"
            else:
                shape_json["class"] = "H5S_SIMPLE"
                shape_json["dims"] = shape
                dims = shape
        else:
            msg = "Bad Request: shape is invalid"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if dims is not None:
        for i in range(len(dims)):
            extent = dims[i]
            if not isinstance(extent, int):
                msg = "Invalid shape type"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            if extent < 0:
                msg = "shape dimension is negative"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

    maxdims = None
    if "maxdims" in body:
        if dims is None:
            msg = "Maxdims cannot be supplied if space is NULL"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        maxdims = body["maxdims"]
        if isinstance(maxdims, int):
            dim1 = maxdims
            maxdims = [dim1]
        elif isinstance(maxdims, list):
            pass  # can use as is
        else:
            msg = "Bad Request: maxdims is invalid"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if len(dims) != len(maxdims):
            msg = "Maxdims rank doesn't match Shape"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if maxdims is not None:
        for extent in maxdims:
            if not isinstance(extent, int):
                msg = "Invalid maxdims type"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            if extent < 0:
                msg = "maxdims dimension is negative"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
        if len(maxdims) != len(dims):
            msg = "Bad Request: maxdims array length must equal shape array length"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        shape_json["maxdims"] = []
        for i in range(len(dims)):
            maxextent = maxdims[i]
            if not isinstance(maxextent, int):
                msg = "Bad Request: maxdims must be integer type"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            elif maxextent == 0:
                # unlimited dimension
                shape_json["maxdims"].append(0)
            elif maxextent < dims[i]:
                msg = "Bad Request: maxdims extent can't be smaller than shape extent"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            else:
                shape_json["maxdims"].append(maxextent)

    layout = None
    min_chunk_size = int(config.get("min_chunk_size"))
    max_chunk_size = int(config.get("max_chunk_size"))
    if 'creationProperties' in body:
        creationProperties = body["creationProperties"]
        if 'layout' in creationProperties:
            layout = creationProperties["layout"]

            validateChunkLayout(shape_json, item_size, layout)

    if layout is None and shape_json["class"] != "H5S_NULL":
        # default to chunked layout
        layout = {"class": "H5D_CHUNKED"}

    if layout and layout["class"] == 'H5D_CONTIGUOUS_REF':
        chunk_dims = getContiguousLayout(shape_json,
                                         item_size,
                                         chunk_min=min_chunk_size,
                                         chunk_max=max_chunk_size)
        layout["dims"] = chunk_dims
        log.debug(f"autoContiguous layout: {layout}")

    if layout and layout["class"] == 'H5D_CHUNKED' and "dims" not in layout:
        # do autochunking
        chunk_dims = guessChunk(shape_json, item_size)
        layout["dims"] = chunk_dims
        log.debug(f"initial autochunk layout: {layout}")

    if layout and layout["class"] == 'H5D_CHUNKED':
        chunk_dims = layout["dims"]
        chunk_size = getChunkSize(chunk_dims, item_size)

        log.debug("chunk_size: {}, min: {}, max: {}".format(
            chunk_size, min_chunk_size, max_chunk_size))
        # adjust the chunk shape if chunk size is too small or too big
        adjusted_chunk_dims = None
        if chunk_size < min_chunk_size:
            log.debug(
                "chunk size: {} less than min size: {}, expanding".format(
                    chunk_size, min_chunk_size))
            adjusted_chunk_dims = expandChunk(chunk_dims,
                                              item_size,
                                              shape_json,
                                              chunk_min=min_chunk_size,
                                              layout_class=layout["class"])
        elif chunk_size > max_chunk_size:
            log.debug(
                "chunk size: {} greater than max size: {}, expanding".format(
                    chunk_size, max_chunk_size, layout_class=layout["class"]))
            adjusted_chunk_dims = shrinkChunk(chunk_dims,
                                              item_size,
                                              chunk_max=max_chunk_size)
        if adjusted_chunk_dims:
            log.debug(
                f"requested chunk_dimensions: {chunk_dims} modified dimensions: {adjusted_chunk_dims}"
            )
            layout["dims"] = adjusted_chunk_dims

    if layout and layout["class"] in ('H5D_CHUNKED_REF',
                                      'H5D_CHUNKED_REF_INDIRECT'):
        chunk_dims = layout["dims"]
        chunk_size = getChunkSize(chunk_dims, item_size)

        log.debug("chunk_size: {}, min: {}, max: {}".format(
            chunk_size, min_chunk_size, max_chunk_size))
        # adjust the chunk shape if chunk size is too small or too big
        if chunk_size < min_chunk_size:
            log.warn(
                "chunk size: {} less than min size: {} for H5D_CHUNKED_REF dataset"
                .format(chunk_size, min_chunk_size))
        elif chunk_size > max_chunk_size:
            log.warn(
                "chunk size: {} greater than max size: {}, for H5D_CHUNKED_REF dataset"
                .format(chunk_size,
                        max_chunk_size,
                        layout_class=layout["class"]))

    link_id = None
    link_title = None
    if "link" in body:
        link_body = body["link"]
        if "id" in link_body:
            link_id = link_body["id"]
        if "name" in link_body:
            link_title = link_body["name"]
        if link_id and link_title:
            log.info("link id: {}".format(link_id))
            # verify that the referenced id exists and is in this domain
            # and that the requestor has permissions to create a link
            await validateAction(app, domain, link_id, username, "create")

    dset_id = createObjId("datasets", rootid=root_id)
    log.info("new  dataset id: {}".format(dset_id))

    dataset_json = {
        "id": dset_id,
        "root": root_id,
        "type": datatype,
        "shape": shape_json
    }

    if "creationProperties" in body:
        # TBD - validate all creationProperties
        creationProperties = body["creationProperties"]
        if "fillValue" in creationProperties:
            # validate fill value compatible with type
            dt = createDataType(datatype)
            fill_value = creationProperties["fillValue"]
            if isinstance(fill_value, list):
                fill_value = tuple(fill_value)
            try:
                np.asarray(fill_value, dtype=dt)
            except (TypeError, ValueError):
                msg = "Fill value {} not compatible with dataset type: {}".format(
                    fill_value, datatype)
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        dataset_json["creationProperties"] = creationProperties

    if layout is not None:
        dataset_json["layout"] = layout

    log.debug("create dataset: " + json.dumps(dataset_json))
    req = getDataNodeUrl(app, dset_id) + "/datasets"

    post_json = await http_post(app, req, data=dataset_json)

    # create link if requested
    if link_id and link_title:
        link_json = {}
        link_json["id"] = dset_id
        link_json["class"] = "H5L_TYPE_HARD"
        link_req = getDataNodeUrl(app, link_id)
        link_req += "/groups/" + link_id + "/links/" + link_title
        log.info("PUT link - : " + link_req)
        put_rsp = await http_put(app, link_req, data=link_json)
        log.debug("PUT Link resp: {}".format(put_rsp))

    # dataset creation successful
    resp = await jsonResponse(request, post_json, status=201)
    log.response(request, resp=resp)

    return resp
Exemplo n.º 4
0
async def GET_AttributeValue(request):
    """HTTP method to return an attribute value"""
    log.request(request)
    app = request.app 
    log.info("GET_AttributeValue")
    collection = getRequestCollectionName(request) # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    validateAttributeName(attr_name)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)
    
    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain value: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    
    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "read")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.debug("get Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    dn_json = await http_get(app, req, params=params)
    log.debug("got attributes json from dn for obj_id: " + str(dn_json)) 

    attr_shape = dn_json["shape"]
    log.debug(f"attribute shape: {attr_shape}")
    if attr_shape["class"] == 'H5S_NULL':
        msg = "Null space attributes can not be read"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    accept_type = getAcceptType(request)
    response_type = accept_type    # will adjust later if binary not possible
    type_json = dn_json["type"]
    shape_json = dn_json["shape"]
    item_size = getItemSize(type_json)
    
    if item_size == 'H5T_VARIABLE' and accept_type != "json":
        msg = "Client requested binary, but only JSON is supported for variable length data types"
        log.info(msg)
        response_type = "json"

    if response_type == "binary":
        arr_dtype = createDataType(type_json)  # np datatype
        np_shape = getShapeDims(shape_json)
        try:
            arr = jsonToArray(np_shape, arr_dtype, dn_json["value"])
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        output_data = arr.tobytes()
        log.debug(f"GET AttributeValue - returning {len(output_data)} bytes binary data")
        # write response
        try: 
            resp = StreamResponse()
            resp.content_type = "application/octet-stream"
            resp.content_length = len(output_data)
            # allow CORS
            resp.headers['Access-Control-Allow-Origin'] = '*'
            resp.headers['Access-Control-Allow-Methods'] = "GET, POST, DELETE, PUT, OPTIONS"
            resp.headers['Access-Control-Allow-Headers'] = "Content-Type, api_key, Authorization"
            await resp.prepare(request)
            await resp.write(output_data)     
        except Exception as e:
            log.error(f"Got exception: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    else:
        resp_json = {}
        if "value" in dn_json:
            resp_json["value"] = dn_json["value"] 

        hrefs = []
        obj_uri = '/' + collection + '/' + obj_id
        attr_uri = obj_uri + '/attributes/' + attr_name
        hrefs.append({'rel': 'self', 'href': getHref(request, attr_uri)})
        hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
        hrefs.append({'rel': 'owner', 'href': getHref(request, obj_uri)})
        resp_json["hrefs"] = hrefs
        resp = await jsonResponse(request, resp_json)
        log.response(request, resp=resp)
    return resp