async def getChunk(app, chunk_id, dset_json, s3path=None, s3offset=0, s3size=0, chunk_init=False): # if the chunk cache has too many dirty items, wait till items get flushed to S3 MAX_WAIT_TIME = 10.0 # TBD - make this a config chunk_cache = app['chunk_cache'] if chunk_init and s3offset > 0: log.error(f"unable to initiale chunk {chunk_id} for reference layouts ") raise HTTPInternalServerError() log.debug(f"getChunk cache utilization: {chunk_cache.cacheUtilizationPercent} per, dirty_count: {chunk_cache.dirtyCount}, mem_dirty: {chunk_cache.memDirty}") chunk_arr = None dset_id = getDatasetId(chunk_id) dims = getChunkLayout(dset_json) type_json = dset_json["type"] dt = createDataType(type_json) bucket = None s3key = None if s3path: if not s3path.startswith("s3://"): # TBD - verify these at dataset creation time? log.error(f"unexpected s3path for getChunk: {s3path}") raise HTTPInternalServerError() path = s3path[5:] index = path.find('/') # split bucket and key if index < 1: log.error(f"s3path is invalid: {s3path}") raise HTTPInternalServerError() bucket = path[:index] log.debug(f"using bucket: {bucket}") s3key = path[(index+1):] log.debug(f"Using bucket: {bucket} and s3key: {s3key}") else: s3key = getS3Key(chunk_id) log.debug("getChunk s3key: {}".format(s3key)) if chunk_id in chunk_cache: chunk_arr = chunk_cache[chunk_id] else: if s3path and s3size == 0: obj_exists = False else: obj_exists = await isS3Obj(app, s3key, bucket=bucket) # TBD - potential race condition? if obj_exists: pending_s3_read = app["pending_s3_read"] if chunk_id in pending_s3_read: # already a read in progress, wait for it to complete read_start_time = pending_s3_read[chunk_id] log.info(f"s3 read request for {chunk_id} was requested at: {read_start_time}") while time.time() - read_start_time < 2.0: log.debug("waiting for pending s3 read, sleeping") await asyncio.sleep(1) # sleep for sub-second? if chunk_id in chunk_cache: log.info(f"Chunk {chunk_id} has arrived!") chunk_arr = chunk_cache[chunk_id] break if chunk_arr is None: log.warn(f"s3 read for chunk {chunk_id} timed-out, initiaiting a new read") if chunk_arr is None: if chunk_id not in pending_s3_read: pending_s3_read[chunk_id] = time.time() log.debug("Reading chunk {} from S3".format(s3key)) deflate_level = getDeflate(app, dset_id, dset_json) chunk_bytes = await getS3Bytes(app, s3key, deflate_level=deflate_level, s3offset=s3offset, s3size=s3size, bucket=bucket) if chunk_id in pending_s3_read: # read complete - remove from pending map elapsed_time = time.time() - pending_s3_read[chunk_id] log.info(f"s3 read for {s3key} took {elapsed_time}") del pending_s3_read[chunk_id] else: log.warn(f"expected to find {chunk_id} in pending_s3_read map") chunk_arr = np.fromstring(chunk_bytes, dtype=dt) chunk_arr = chunk_arr.reshape(dims) log.debug("chunk size: {}".format(chunk_arr.size)) elif chunk_init: log.debug("Initializing chunk {chunk_id}") fill_value = getFillValue(dset_json) if fill_value: # need to convert list to tuples for numpy broadcast if isinstance(fill_value, list): fill_value = tuple(fill_value) chunk_arr = np.empty(dims, dtype=dt, order='C') chunk_arr[...] = fill_value else: chunk_arr = np.zeros(dims, dtype=dt, order='C') else: log.debug(f"Chunk {chunk_id} not found") if chunk_arr is not None: # check that there's room in the cache before adding it if chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size: # no room in the cache, wait till space is freed by the s3sync task wait_start = time.time() while chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size: log.warn(f"getChunk, cache utilization: {chunk_cache.cacheUtilizationPercent}, sleeping till items are flushed") if time.time() - wait_start > MAX_WAIT_TIME: log.error(f"unable to save updated chunk {chunk_id} to cache returning 503 error") raise HTTPServiceUnavailable() await asyncio.sleep(1) chunk_cache[chunk_id] = chunk_arr # store in cache return chunk_arr
async def GET_Chunk(request): log.request(request) app = request.app params = request.rel_url.query chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(chunk_id, "Chunk"): msg = "Invalid chunk id: {}".format(chunk_id) log.warn(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) log.debug("request params: {}".format(list(params.keys()))) if "dset" not in params: msg = "Missing dset in GET request" log.error(msg) raise HTTPBadRequest(reason=msg) dset_json = json.loads(params["dset"]) log.debug("dset_json: {}".format(dset_json)) type_json = dset_json["type"] dims = getChunkLayout(dset_json) log.debug("got dims: {}".format(dims)) rank = len(dims) # get chunk selection from query params if "select" in params: log.debug("select: {}".format(params["select"])) selection = [] for i in range(rank): dim_slice = getSliceQueryParam(request, i, dims[i]) selection.append(dim_slice) selection = tuple(selection) log.debug("got selection: {}".format(selection)) dt = createDataType(type_json) log.debug("dtype: {}".format(dt)) rank = len(dims) if rank == 0: msg = "No dimension passed to GET chunk request" log.error(msg) raise HTTPBadRequest(reason=msg) if len(selection) != rank: msg = "Selection rank does not match shape rank" log.error(msg) raise HTTPBadRequest(reason=msg) for i in range(rank): s = selection[i] log.debug("selection[{}]: {}".format(i, s)) s3path = None s3offset = 0 s3size = 0 if "s3path" in params: s3path = params["s3path"] log.debug(f"GET_Chunk - useing s3path: {s3path}") if "s3offset" in params: try: s3offset = int(params["s3offset"]) except ValueError: log.error(f"invalid s3offset params: {params['s3offset']}") raise HTTPBadRequest() if "s3size" in params: try: s3size = int(params["s3size"]) except ValueError: log.error(f"invalid s3size params: {params['s3sieze']}") raise HTTPBadRequest() chunk_arr = await getChunk(app, chunk_id, dset_json, s3path=s3path, s3offset=s3offset, s3size=s3size) if chunk_arr is None: # return a 404 msg = "Chunk {} does not exist".format(chunk_id) log.info(msg) raise HTTPNotFound() resp = None if "query" in params: # do query selection query = params["query"] log.info("query: {}".format(query)) if rank != 1: msg = "Query selection only supported for one dimensional arrays" log.warn(msg) raise HTTPBadRequest(reason=msg) limit = 0 if "Limit" in params: limit = int(params["Limit"]) values = [] indices = [] field_names = [] if dt.fields: field_names = list(dt.fields.keys()) x = chunk_arr[selection] log.debug("x: {}".format(x)) eval_str = getEvalStr(query, "x", field_names) log.debug("eval_str: {}".format(eval_str)) where_result = np.where(eval(eval_str)) log.debug("where_result: {}".format(where_result)) where_result_index = where_result[0] log.debug("whare_result index: {}".format(where_result_index)) log.debug("boolean selection: {}".format(x[where_result_index])) s = selection[0] count = 0 for index in where_result_index: log.debug("index: {}".format(index)) value = x[index].tolist() log.debug("value: {}".format(value)) json_val = bytesArrayToList(value) log.debug("json_value: {}".format(json_val)) json_index = index.tolist() * s.step + s.start # adjust for selection indices.append(json_index) values.append(json_val) count += 1 if limit > 0 and count >= limit: log.info("got limit items") break query_result = {} query_result["index"] = indices query_result["value"] = values log.info(f"query_result retiurning: {len(indices)} rows") log.debug(f"query_result: {query_result}") resp = json_response(query_result) else: # get requested data output_arr = chunk_arr[selection] output_data = arrayToBytes(output_arr) # write response try: resp = StreamResponse() resp.headers['Content-Type'] = "application/octet-stream" resp.content_length = len(output_data) await resp.prepare(request) await resp.write(output_data) except Exception as e: log.error(f"Exception during binary data write: {e}") raise HTTPInternalServerError() finally: await resp.write_eof() return resp
async def POST_Chunk(request): log.request(request) app = request.app params = request.rel_url.query put_points = False num_points = 0 if "count" in params: num_points = int(params["count"]) if "action" in params and params["action"] == "put": log.info("POST Chunk put points, num_points: {}".format(num_points)) put_points = True else: log.info("POST Chunk get points") chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) log.info("POST chunk_id: {}".format(chunk_id)) chunk_index = getChunkIndex(chunk_id) log.debug("chunk_index: {}".format(chunk_index)) if not isValidUuid(chunk_id, "Chunk"): msg = "Invalid chunk id: {}".format(chunk_id) log.warn(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) log.debug("request params: {}".format(list(params.keys()))) if "dset" not in params: msg = "Missing dset in GET request" log.error(msg) raise HTTPBadRequest(reason=msg) dset_json = json.loads(params["dset"]) log.debug("dset_json: {}".format(dset_json)) chunk_layout = getChunkLayout(dset_json) chunk_coord = getChunkCoordinate(chunk_id, chunk_layout) log.debug("chunk_coord: {}".format(chunk_coord)) if not request.has_body: msg = "POST Value with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) content_type = "application/octet-stream" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type != "application/octet-stream": msg = "Unexpected content_type: {}".format(content_type) log.error(msg) raise HTTPBadRequest(reason=msg) type_json = dset_json["type"] dset_dtype = createDataType(type_json) log.debug("dtype: {}".format(dset_dtype)) dims = getChunkLayout(dset_json) log.debug("got dims: {}".format(dims)) rank = len(dims) if rank == 0: msg = "POST chunk request with no dimensions" log.error(msg) raise HTTPBadRequest(reason=msg) # create a numpy array for incoming points input_bytes = await request_read(request) if len(input_bytes) != request.content_length: msg = "Read {} bytes, expecting: {}".format(len(input_bytes), request.content_length) log.error(msg) raise HTTPInternalServerError() # get chunk from cache/s3. If not found init a new chunk if this is a write request chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=put_points) if put_points: # writing point data # create a numpy array with the following type: # (coord1, coord2, ...) | dset_dtype if rank == 1: coord_type_str = "uint64" else: coord_type_str = "({},)uint64".format(rank) comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)]) point_arr = np.fromstring(input_bytes, dtype=comp_dtype) if len(point_arr) != num_points: msg = "Unexpected size of point array, got: {} expected: {}".format(len(point_arr), num_points) log.warn(msg) raise HTTPBadRequest(reason=msg) for i in range(num_points): elem = point_arr[i] if rank == 1: coord = int(elem[0]) else: coord = tuple(elem[0]) # index to update val = elem[1] # value chunk_arr[coord] = val # update the point chunk_cache = app["chunk_cache"] chunk_cache.setDirty(chunk_id) # async write to S3 dirty_ids = app["dirty_ids"] now = int(time.time()) dirty_ids[chunk_id] = now log.info("set {} to dirty".format(chunk_id)) else: # reading point data point_dt = np.dtype('uint64') # use unsigned long for point index point_arr = np.fromstring(input_bytes, dtype=point_dt) # read points as unsigned longs if len(point_arr) % rank != 0: msg = "Unexpected size of point array" log.warn(msg) raise HTTPBadRequest(reason=msg) num_points = len(point_arr) // rank log.debug("got {} points".format(num_points)) point_arr = point_arr.reshape((num_points, rank)) output_arr = np.zeros((num_points,), dtype=dset_dtype) for i in range(num_points): point = point_arr[i,:] tr_point = getChunkRelativePoint(chunk_coord, point) val = chunk_arr[tuple(tr_point)] output_arr[i] = val if put_points: # write empty response resp = json_response({}) else: # get data output_data = output_arr.tobytes() # write response try: resp = StreamResponse() resp.headers['Content-Type'] = "application/octet-stream" resp.content_length = len(output_data) await resp.prepare(request) await resp.write(output_data) except Exception as e: log.error(f"Exception during binary data write: {e}") raise HTTPInternalServerError() finally: await resp.write_eof() return resp
async def POST_Chunk(request): log.request(request) app = request.app params = request.rel_url.query put_points = False num_points = 0 if "count" in params: num_points = int(params["count"]) if "action" in params and params["action"] == "put": log.info(f"POST Chunk put points, num_points: {num_points}") put_points = True else: log.info("POST Chunk get points") s3path = None s3offset = 0 s3size = 0 if "s3path" in params: if put_points: log.error("s3path can not be used with put points POST request") raise HTTPBadRequest() s3path = params["s3path"] log.debug(f"GET_Chunk - using s3path: {s3path}") bucket = None elif "bucket" in params: bucket = params["bucket"] else: bucket = None if "s3offset" in params: try: s3offset = int(params["s3offset"]) except ValueError: log.error(f"invalid s3offset params: {params['s3offset']}") raise HTTPBadRequest() if "s3size" in params: try: s3size = int(params["s3size"]) except ValueError: log.error(f"invalid s3size params: {params['s3sieze']}") raise HTTPBadRequest() chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) log.info(f"POST chunk_id: {chunk_id}") chunk_index = getChunkIndex(chunk_id) log.debug(f"chunk_index: {chunk_index}") if not isValidUuid(chunk_id, "Chunk"): msg = f"Invalid chunk id: {chunk_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) log.debug(f"request params: {list(params.keys())}") if "dset" in params: msg = "Unexpected dset in POST request" log.error(msg) raise HTTPBadRequest(reason=msg) dset_id = getDatasetId(chunk_id) dset_json = await get_metadata_obj(app, dset_id, bucket=bucket) log.debug(f"dset_json: {dset_json}") chunk_layout = getChunkLayout(dset_json) chunk_coord = getChunkCoordinate(chunk_id, chunk_layout) log.debug(f"chunk_coord: {chunk_coord}") if not request.has_body: msg = "POST Value with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) content_type = "application/octet-stream" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type != "application/octet-stream": msg = f"Unexpected content_type: {content_type}" log.error(msg) raise HTTPBadRequest(reason=msg) type_json = dset_json["type"] dset_dtype = createDataType(type_json) log.debug(f"dtype: {dset_dtype}") dims = getChunkLayout(dset_json) log.debug(f"got dims: {dims}") rank = len(dims) if rank == 0: msg = "POST chunk request with no dimensions" log.error(msg) raise HTTPBadRequest(reason=msg) # create a numpy array for incoming points input_bytes = await request_read(request) if len(input_bytes) != request.content_length: msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}" log.error(msg) raise HTTPInternalServerError() # get chunk from cache/s3. If not found init a new chunk if this is a write request chunk_arr = await getChunk(app, chunk_id, dset_json, bucket=bucket, s3path=s3path, s3offset=s3offset, s3size=s3size, chunk_init=put_points) if chunk_arr is None: if put_points: log.error("no array returned for put_points") raise HTTPInternalServerError() else: # get points on a non-existent S3 objects? log.warn("S3 object not found for get points") raise HTTPNotFound() log.debug(f"chunk_arr.shape: {chunk_arr.shape}") if put_points: # writing point data # create a numpy array with the following type: # (coord1, coord2, ...) | dset_dtype if rank == 1: coord_type_str = "uint64" else: coord_type_str = f"({rank},)uint64" comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)]) point_arr = np.fromstring(input_bytes, dtype=comp_dtype) if len(point_arr) != num_points: msg = f"Unexpected size of point array, got: {len(point_arr)} expected: {num_points}" log.warn(msg) raise HTTPBadRequest(reason=msg) for i in range(num_points): elem = point_arr[i] log.debug(f"non-relative coordinate: {elem}") if rank == 1: coord = int(elem[0]) coord = coord % chunk_layout[0] # adjust to chunk relative else: coord = elem[0] # index to update for dim in range(rank): # adjust to chunk relative coord[dim] = int(coord[dim]) % chunk_layout[dim] coord = tuple(coord) # need to convert to a tuple log.debug(f"relative coordinate: {coord}") val = elem[1] # value try: chunk_arr[coord] = val # update the point except IndexError: msg = "Out of bounds point index for POST Chunk" log.warn(msg) raise HTTPBadRequest(reason=msg) chunk_cache = app["chunk_cache"] chunk_cache.setDirty(chunk_id) # async write to S3 dirty_ids = app["dirty_ids"] now = int(time.time()) dirty_ids[chunk_id] = (now, bucket) log.info(f"set {chunk_id} to dirty") else: # reading point data point_dt = np.dtype('uint64') # use unsigned long for point index point_arr = np.fromstring( input_bytes, dtype=point_dt) # read points as unsigned longs if len(point_arr) % rank != 0: msg = "Unexpected size of point array" log.warn(msg) raise HTTPBadRequest(reason=msg) num_points = len(point_arr) // rank log.debug(f"got {num_points} points") point_arr = point_arr.reshape((num_points, rank)) output_arr = np.zeros((num_points, ), dtype=dset_dtype) for i in range(num_points): point = point_arr[i, :] tr_point = getChunkRelativePoint(chunk_coord, point) val = chunk_arr[tuple(tr_point)] output_arr[i] = val if put_points: # write empty response resp = json_response({}) else: # get data output_data = output_arr.tobytes() # write response try: resp = StreamResponse() resp.headers['Content-Type'] = "application/octet-stream" resp.content_length = len(output_data) await resp.prepare(request) await resp.write(output_data) except Exception as e: log.error(f"Exception during binary data write: {e}") raise HTTPInternalServerError() finally: await resp.write_eof() return resp
async def PUT_Chunk(request): log.request(request) app = request.app params = request.rel_url.query chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(chunk_id, "Chunk"): msg = "Invalid chunk id: {}".format(chunk_id) log.warn(msg) raise HTTPBadRequest(reason=msg) if not request.has_body: msg = "PUT Value with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) content_type = "application/octet-stream" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type != "application/octet-stream": msg = "Unexpected content_type: {}".format(content_type) log.error(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) log.debug("request params: {}".format(list(params.keys()))) if "dset" not in params: msg = "Missing dset in GET request" log.error(msg) raise HTTPBadRequest(reason=msg) dset_json = json.loads(params["dset"]) log.debug("dset_json: {}".format(dset_json)) dims = getChunkLayout(dset_json) if "root" not in dset_json: msg = "expected root key in dset_json" log.error(msg) raise KeyError(msg) rank = len(dims) # get chunk selection from query params selection = [] for i in range(rank): dim_slice = getSliceQueryParam(request, i, dims[i]) selection.append(dim_slice) selection = tuple(selection) log.debug("got selection: {}".format(selection)) type_json = dset_json["type"] itemsize = 'H5T_VARIABLE' if "size" in type_json: itemsize = type_json["size"] dt = createDataType(type_json) log.debug("dtype: {}".format(dt)) if rank == 0: msg = "No dimension passed to PUT chunk request" log.error(msg) raise HTTPBadRequest(reason=msg) if len(selection) != rank: msg = "Selection rank does not match shape rank" log.error(msg) raise HTTPBadRequest(reason=msg) for i in range(rank): s = selection[i] log.debug("selection[{}]: {}".format(i, s)) mshape = getSelectionShape(selection) log.debug(f"mshape: {mshape}") num_elements = 1 for extent in mshape: num_elements *= extent # check that the content_length is what we expect if itemsize != 'H5T_VARIABLE': log.debug("expect content_length: {}".format(num_elements*itemsize)) log.debug("actual content_length: {}".format(request.content_length)) if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length: msg = "Expected content_length of: {}, but got: {}".format(num_elements*itemsize, request.content_length) log.error(msg) raise HTTPBadRequest(reason=msg) # create a numpy array for incoming data input_bytes = await request_read(request) # TBD - will it cause problems when failures are raised before reading data? if len(input_bytes) != request.content_length: msg = "Read {} bytes, expecting: {}".format(len(input_bytes), request.content_length) log.error(msg) raise HTTPInternalServerError() input_arr = bytesToArray(input_bytes, dt, mshape) chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=True) # update chunk array chunk_arr[selection] = input_arr chunk_cache = app["chunk_cache"] chunk_cache.setDirty(chunk_id) log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}") # async write to S3 dirty_ids = app["dirty_ids"] now = int(time.time()) dirty_ids[chunk_id] = now # chunk update successful resp = json_response({}, status=201) log.response(request, resp=resp) return resp
async def POST_Dataset(request): """HTTP method to create a new dataset object""" log.request(request) app = request.app username, pswd = getUserPasswordFromRequest(request) # write actions need auth await validateUserPassword(app, username, pswd) if not request.has_body: msg = "POST Datasets with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) body = await request.json() # get domain, check authorization domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = "Invalid host value: {}".format(domain) log.warn(msg) raise HTTPBadRequest(reason=msg) domain_json = await getDomainJson(app, domain, reload=True) root_id = domain_json["root"] aclCheck(domain_json, "create", username) # throws exception if not allowed if "root" not in domain_json: msg = "Expected root key for domain: {}".format(domain) log.warn(msg) raise HTTPBadRequest(reason=msg) # # validate type input # if "type" not in body: msg = "POST Dataset has no type key in body" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = body["type"] if isinstance(datatype, str) and datatype.startswith("t-"): # Committed type - fetch type json from DN ctype_id = datatype log.debug("got ctypeid: {}".format(ctype_id)) ctype_json = await getObjectJson(app, ctype_id) log.debug("ctype: {}".format(ctype_json)) if ctype_json["root"] != root_id: msg = "Referenced committed datatype must belong in same domain" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = ctype_json["type"] # add the ctype_id to type type datatype["id"] = ctype_id elif isinstance(datatype, str): try: # convert predefined type string (e.g. "H5T_STD_I32LE") to # corresponding json representation datatype = getBaseTypeJson(datatype) log.debug("got datatype: {}".format(datatype)) except TypeError: msg = "POST Dataset with invalid predefined type" log.warn(msg) raise HTTPBadRequest(reason=msg) validateTypeItem(datatype) item_size = getItemSize(datatype) # # Validate shape input # dims = None shape_json = {} if "shape" not in body: shape_json["class"] = "H5S_SCALAR" else: shape = body["shape"] if isinstance(shape, int): shape_json["class"] = "H5S_SIMPLE" dims = [ shape, ] shape_json["dims"] = dims elif isinstance(shape, str): # only valid string value is H5S_NULL if shape != "H5S_NULL": msg = "POST Datset with invalid shape value" log.warn(msg) raise HTTPBadRequest(reason=msg) shape_json["class"] = "H5S_NULL" elif isinstance(shape, list): if len(shape) == 0: shape_json["class"] = "H5S_SCALAR" else: shape_json["class"] = "H5S_SIMPLE" shape_json["dims"] = shape dims = shape else: msg = "Bad Request: shape is invalid" log.warn(msg) raise HTTPBadRequest(reason=msg) if dims is not None: for i in range(len(dims)): extent = dims[i] if not isinstance(extent, int): msg = "Invalid shape type" log.warn(msg) raise HTTPBadRequest(reason=msg) if extent < 0: msg = "shape dimension is negative" log.warn(msg) raise HTTPBadRequest(reason=msg) maxdims = None if "maxdims" in body: if dims is None: msg = "Maxdims cannot be supplied if space is NULL" log.warn(msg) raise HTTPBadRequest(reason=msg) maxdims = body["maxdims"] if isinstance(maxdims, int): dim1 = maxdims maxdims = [dim1] elif isinstance(maxdims, list): pass # can use as is else: msg = "Bad Request: maxdims is invalid" log.warn(msg) raise HTTPBadRequest(reason=msg) if len(dims) != len(maxdims): msg = "Maxdims rank doesn't match Shape" log.warn(msg) raise HTTPBadRequest(reason=msg) if maxdims is not None: for extent in maxdims: if not isinstance(extent, int): msg = "Invalid maxdims type" log.warn(msg) raise HTTPBadRequest(reason=msg) if extent < 0: msg = "maxdims dimension is negative" log.warn(msg) raise HTTPBadRequest(reason=msg) if len(maxdims) != len(dims): msg = "Bad Request: maxdims array length must equal shape array length" log.warn(msg) raise HTTPBadRequest(reason=msg) shape_json["maxdims"] = [] for i in range(len(dims)): maxextent = maxdims[i] if not isinstance(maxextent, int): msg = "Bad Request: maxdims must be integer type" log.warn(msg) raise HTTPBadRequest(reason=msg) elif maxextent == 0: # unlimited dimension shape_json["maxdims"].append(0) elif maxextent < dims[i]: msg = "Bad Request: maxdims extent can't be smaller than shape extent" log.warn(msg) raise HTTPBadRequest(reason=msg) else: shape_json["maxdims"].append(maxextent) layout = None min_chunk_size = int(config.get("min_chunk_size")) max_chunk_size = int(config.get("max_chunk_size")) if 'creationProperties' in body: creationProperties = body["creationProperties"] if 'layout' in creationProperties: layout = creationProperties["layout"] validateChunkLayout(shape_json, item_size, layout) if layout is None and shape_json["class"] != "H5S_NULL": # default to chunked layout layout = {"class": "H5D_CHUNKED"} if layout and layout["class"] == 'H5D_CONTIGUOUS_REF': chunk_dims = getContiguousLayout(shape_json, item_size, chunk_min=min_chunk_size, chunk_max=max_chunk_size) layout["dims"] = chunk_dims log.debug(f"autoContiguous layout: {layout}") if layout and layout["class"] == 'H5D_CHUNKED' and "dims" not in layout: # do autochunking chunk_dims = guessChunk(shape_json, item_size) layout["dims"] = chunk_dims log.debug(f"initial autochunk layout: {layout}") if layout and layout["class"] == 'H5D_CHUNKED': chunk_dims = layout["dims"] chunk_size = getChunkSize(chunk_dims, item_size) log.debug("chunk_size: {}, min: {}, max: {}".format( chunk_size, min_chunk_size, max_chunk_size)) # adjust the chunk shape if chunk size is too small or too big adjusted_chunk_dims = None if chunk_size < min_chunk_size: log.debug( "chunk size: {} less than min size: {}, expanding".format( chunk_size, min_chunk_size)) adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape_json, chunk_min=min_chunk_size, layout_class=layout["class"]) elif chunk_size > max_chunk_size: log.debug( "chunk size: {} greater than max size: {}, expanding".format( chunk_size, max_chunk_size, layout_class=layout["class"])) adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, chunk_max=max_chunk_size) if adjusted_chunk_dims: log.debug( f"requested chunk_dimensions: {chunk_dims} modified dimensions: {adjusted_chunk_dims}" ) layout["dims"] = adjusted_chunk_dims if layout and layout["class"] in ('H5D_CHUNKED_REF', 'H5D_CHUNKED_REF_INDIRECT'): chunk_dims = layout["dims"] chunk_size = getChunkSize(chunk_dims, item_size) log.debug("chunk_size: {}, min: {}, max: {}".format( chunk_size, min_chunk_size, max_chunk_size)) # adjust the chunk shape if chunk size is too small or too big if chunk_size < min_chunk_size: log.warn( "chunk size: {} less than min size: {} for H5D_CHUNKED_REF dataset" .format(chunk_size, min_chunk_size)) elif chunk_size > max_chunk_size: log.warn( "chunk size: {} greater than max size: {}, for H5D_CHUNKED_REF dataset" .format(chunk_size, max_chunk_size, layout_class=layout["class"])) link_id = None link_title = None if "link" in body: link_body = body["link"] if "id" in link_body: link_id = link_body["id"] if "name" in link_body: link_title = link_body["name"] if link_id and link_title: log.info("link id: {}".format(link_id)) # verify that the referenced id exists and is in this domain # and that the requestor has permissions to create a link await validateAction(app, domain, link_id, username, "create") dset_id = createObjId("datasets", rootid=root_id) log.info("new dataset id: {}".format(dset_id)) dataset_json = { "id": dset_id, "root": root_id, "type": datatype, "shape": shape_json } if "creationProperties" in body: # TBD - validate all creationProperties creationProperties = body["creationProperties"] if "fillValue" in creationProperties: # validate fill value compatible with type dt = createDataType(datatype) fill_value = creationProperties["fillValue"] if isinstance(fill_value, list): fill_value = tuple(fill_value) try: np.asarray(fill_value, dtype=dt) except (TypeError, ValueError): msg = "Fill value {} not compatible with dataset type: {}".format( fill_value, datatype) log.warn(msg) raise HTTPBadRequest(reason=msg) dataset_json["creationProperties"] = creationProperties if layout is not None: dataset_json["layout"] = layout log.debug("create dataset: " + json.dumps(dataset_json)) req = getDataNodeUrl(app, dset_id) + "/datasets" post_json = await http_post(app, req, data=dataset_json) # create link if requested if link_id and link_title: link_json = {} link_json["id"] = dset_id link_json["class"] = "H5L_TYPE_HARD" link_req = getDataNodeUrl(app, link_id) link_req += "/groups/" + link_id + "/links/" + link_title log.info("PUT link - : " + link_req) put_rsp = await http_put(app, link_req, data=link_json) log.debug("PUT Link resp: {}".format(put_rsp)) # dataset creation successful resp = await jsonResponse(request, post_json, status=201) log.response(request, resp=resp) return resp
async def PUT_Chunk(request): log.request(request) app = request.app params = request.rel_url.query query = None if "query" in params: query = params["query"] log.info(f"PUT_Chunk query: {query}") chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(chunk_id, "Chunk"): msg = f"Invalid chunk id: {chunk_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) if not request.has_body: msg = "PUT Value with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) if "bucket" in params: bucket = params["bucket"] log.debug(f"PUT_Chunk using bucket: {bucket}") else: bucket = None if query: expected_content_type = "text/plain; charset=utf-8" else: expected_content_type = "application/octet-stream" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type != expected_content_type: msg = f"Unexpected content_type: {content_type}" log.error(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) if "dset" in params: msg = "Unexpected param dset in GET request" log.error(msg) raise HTTPBadRequest(reason=msg) log.debug(f"PUT_Chunk - id: {chunk_id}") dset_id = getDatasetId(chunk_id) dset_json = await get_metadata_obj(app, dset_id, bucket=bucket) log.debug(f"dset_json: {dset_json}") dims = getChunkLayout(dset_json) if "root" not in dset_json: msg = "expected root key in dset_json" log.error(msg) raise KeyError(msg) rank = len(dims) # get chunk selection from query params selection = [] for i in range(rank): dim_slice = getSliceQueryParam(request, i, dims[i]) selection.append(dim_slice) selection = tuple(selection) log.debug(f"got selection: {selection}") type_json = dset_json["type"] itemsize = 'H5T_VARIABLE' if "size" in type_json: itemsize = type_json["size"] dt = createDataType(type_json) log.debug(f"dtype: {dt}") if rank == 0: msg = "No dimension passed to PUT chunk request" log.error(msg) raise HTTPBadRequest(reason=msg) if len(selection) != rank: msg = "Selection rank does not match shape rank" log.error(msg) raise HTTPBadRequest(reason=msg) for i in range(rank): s = selection[i] log.debug(f"selection[{i}]: {s}") mshape = getSelectionShape(selection) log.debug(f"mshape: {mshape}") num_elements = 1 for extent in mshape: num_elements *= extent resp = {} query_update = None limit = 0 chunk_init = True input_arr = None if query: if not dt.fields: log.error("expected compound dtype for PUT query") raise HTTPInternalServerError() query_update = await request.json() log.debug(f"query_update: {query_update}") if "Limit" in params: limit = int(params["Limit"]) chunk_init = False else: # regular chunk update # check that the content_length is what we expect if itemsize != 'H5T_VARIABLE': log.debug(f"expect content_length: {num_elements*itemsize}") log.debug(f"actual content_length: {request.content_length}") if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length: msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}" log.error(msg) raise HTTPBadRequest(reason=msg) # create a numpy array for incoming data input_bytes = await request_read( request ) # TBD - will it cause problems when failures are raised before reading data? if len(input_bytes) != request.content_length: msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}" log.error(msg) raise HTTPInternalServerError() input_arr = bytesToArray(input_bytes, dt, mshape) # TBD: Skip read if the input shape is the entire chunk? chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=chunk_init, bucket=bucket) is_dirty = False if query: values = [] indices = [] if chunk_arr is not None: # do query selection limit = 0 if "Limit" in params: limit = int(params["Limit"]) field_names = list(dt.fields.keys()) replace_mask = [ None, ] * len(field_names) for i in range(len(field_names)): field_name = field_names[i] if field_name in query_update: replace_mask[i] = query_update[field_name] log.debug(f"replace_mask: {replace_mask}") x = chunk_arr[selection] log.debug(f"put_query - x: {x}") eval_str = getEvalStr(query, "x", field_names) log.debug(f"put_query - eval_str: {eval_str}") where_result = np.where(eval(eval_str)) log.debug(f"put_query - where_result: {where_result}") where_result_index = where_result[0] log.debug(f"put_query - whare_result index: {where_result_index}") log.debug( f"put_query - boolean selection: {x[where_result_index]}") s = selection[0] count = 0 for index in where_result_index: log.debug(f"put_query - index: {index}") value = x[index] log.debug(f"put_query - original value: {value}") for i in range(len(field_names)): if replace_mask[i] is not None: value[i] = replace_mask[i] log.debug(f"put_query - modified value: {value}") x[index] = value json_val = bytesArrayToList(value) log.debug(f"put_query - json_value: {json_val}") json_index = index.tolist( ) * s.step + s.start # adjust for selection indices.append(json_index) values.append(json_val) count += 1 is_dirty = True if limit > 0 and count >= limit: log.info("put_query - got limit items") break query_result = {} query_result["index"] = indices query_result["value"] = values log.info(f"query_result retiurning: {len(indices)} rows") log.debug(f"query_result: {query_result}") resp = json_response(query_result) else: # update chunk array chunk_arr[selection] = input_arr is_dirty = True resp = json_response({}, status=201) if is_dirty: chunk_cache = app["chunk_cache"] chunk_cache.setDirty(chunk_id) log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}") # async write to S3 dirty_ids = app["dirty_ids"] now = int(time.time()) dirty_ids[chunk_id] = (now, bucket) # chunk update successful log.response(request, resp=resp) return resp
async def PUT_Chunk(request): log.request(request) app = request.app params = request.rel_url.query query = None query_update = None limit = 0 bucket = None input_arr = None if "query" in params: query = params["query"] log.info(f"PUT_Chunk query: {query}") if "Limit" in params: limit = int(params["Limit"]) chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(chunk_id, "Chunk"): msg = f"Invalid chunk id: {chunk_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) if not request.has_body: msg = "PUT Value with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) if "bucket" in params: bucket = params["bucket"] log.debug(f"PUT_Chunk using bucket: {bucket}") else: bucket = None if query: expected_content_type = "text/plain; charset=utf-8" chunk_init = False # don't initalize new chunks on query update else: expected_content_type = "application/octet-stream" chunk_init = True if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type != expected_content_type: msg = f"Unexpected content_type: {content_type}" log.error(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) if "dset" in params: msg = "Unexpected param dset in GET request" log.error(msg) raise HTTPBadRequest(reason=msg) log.debug(f"PUT_Chunk - id: {chunk_id}") dset_id = getDatasetId(chunk_id) dset_json = await get_metadata_obj(app, dset_id, bucket=bucket) log.debug(f"dset_json: {dset_json}") # TBD - does this work with linked datasets? dims = getChunkLayout(dset_json) log.debug(f"got dims: {dims}") rank = len(dims) type_json = dset_json["type"] dt = createDataType(type_json) log.debug(f"dtype: {dt}") itemsize = 'H5T_VARIABLE' if "size" in type_json: itemsize = type_json["size"] # get chunk selection from query params selection = [] for i in range(rank): dim_slice = getSliceQueryParam(request, i, dims[i]) selection.append(dim_slice) selection = tuple(selection) log.debug(f"got selection: {selection}") mshape = getSelectionShape(selection) log.debug(f"mshape: {mshape}") num_elements = 1 for extent in mshape: num_elements *= extent chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, chunk_init=chunk_init) is_dirty = False if chunk_arr is None: if chunk_init: log.error(f"failed to create numpy array") raise HTTPInternalServerError() else: log.warn(f"chunk {chunk_id} not found") raise HTTPNotFound() if query: if not dt.fields: log.error("expected compound dtype for PUT query") raise HTTPInternalServerError() if rank != 1: log.error("expected one-dimensional array for PUT query") raise HTTPInternalServerError() query_update = await request.json() log.debug(f"query_update: {query_update}") # TBD - send back binary response to SN node try: resp = chunkQuery(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, slices=selection, query=query, query_update=query_update, limit=limit, return_json=True) except TypeError as te: log.warn(f"chunkQuery - TypeError: {te}") raise HTTPBadRequest() except ValueError as ve: log.warn(f"chunkQuery - ValueError: {ve}") raise HTTPBadRequest() if query_update and resp is not None: is_dirty = True else: # regular chunk update # check that the content_length is what we expect if itemsize != 'H5T_VARIABLE': log.debug(f"expect content_length: {num_elements*itemsize}") log.debug(f"actual content_length: {request.content_length}") if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length: msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}" log.error(msg) raise HTTPBadRequest(reason=msg) # create a numpy array for incoming data input_bytes = await request_read(request) # TBD - will it cause problems when failures are raised before reading data? if len(input_bytes) != request.content_length: msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}" log.error(msg) raise HTTPInternalServerError() input_arr = bytesToArray(input_bytes, dt, mshape) chunkWriteSelection(chunk_arr=chunk_arr, slices=selection, data=input_arr) is_dirty = True # chunk update successful resp = {} if is_dirty: save_chunk(app, chunk_id, bucket=bucket) resp = json_response(resp, status=201) log.response(request, resp=resp) return resp
async def POST_Chunk(request): log.request(request) app = request.app params = request.rel_url.query put_points = False num_points = 0 if "count" not in params: log.warn("expected count param") raise HTTPBadRequest() if "count" in params: num_points = int(params["count"]) if "action" in params and params["action"] == "put": log.info(f"POST Chunk put points - num_points: {num_points}") put_points = True else: log.info(f"POST Chunk get points - num_points: {num_points}") s3path = None s3offset = 0 s3size = 0 if "s3path" in params: if put_points: log.error("s3path can not be used with put points POST request") raise HTTPBadRequest() s3path = params["s3path"] log.debug(f"GET_Chunk - using s3path: {s3path}") bucket = None elif "bucket" in params: bucket = params["bucket"] else: bucket = None if "s3offset" in params: try: s3offset = int(params["s3offset"]) except ValueError: log.error(f"invalid s3offset params: {params['s3offset']}") raise HTTPBadRequest() if "s3size" in params: try: s3size = int(params["s3size"]) except ValueError: log.error(f"invalid s3size params: {params['s3sieze']}") raise HTTPBadRequest() chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) log.info(f"POST chunk_id: {chunk_id}") chunk_index = getChunkIndex(chunk_id) log.debug(f"chunk_index: {chunk_index}") if not isValidUuid(chunk_id, "Chunk"): msg = f"Invalid chunk id: {chunk_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) log.debug(f"request params: {list(params.keys())}") if "dset" in params: msg = "Unexpected dset in POST request" log.error(msg) raise HTTPBadRequest(reason=msg) if not request.has_body: msg = "POST Value with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) content_type = "application/octet-stream" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type != "application/octet-stream": msg = f"Unexpected content_type: {content_type}" log.error(msg) raise HTTPBadRequest(reason=msg) dset_id = getDatasetId(chunk_id) dset_json = await get_metadata_obj(app, dset_id, bucket=bucket) dims = getChunkLayout(dset_json) rank = len(dims) type_json = dset_json["type"] dset_dtype = createDataType(type_json) # create a numpy array for incoming points input_bytes = await request_read(request) if len(input_bytes) != request.content_length: msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}" log.error(msg) raise HTTPInternalServerError() if rank == 1: coord_type_str = "uint64" else: coord_type_str = f"({rank},)uint64" if put_points: # create a numpy array with the following type: # (coord1, coord2, ...) | dset_dtype point_dt = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)]) point_shape = (num_points,) chunk_init = True else: point_dt = np.dtype('uint64') point_shape = (num_points, rank) chunk_init = False point_arr = bytesToArray(input_bytes, point_dt, point_shape) chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, s3path=s3path, s3offset=s3offset, s3size=s3size, chunk_init=chunk_init) if chunk_arr is None: log.warn(f"chunk {chunk_id} not found") raise HTTPNotFound() if put_points: # writing point data try: chunkWritePoints(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, point_arr=point_arr) except ValueError as ve: log.warn(f"got value error from chunkWritePoints: {ve}") raise HTTPBadRequest() # write empty response resp = json_response({}) save_chunk(app, chunk_id, bucket=bucket) # lazily write chunk to storage else: # read points try: output_arr = chunkReadPoints(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, point_arr=point_arr) except ValueError as ve: log.warn(f"got value error from chunkReadPoints: {ve}") raise HTTPBadRequest() output_data = arrayToBytes(output_arr) # write response try: resp = StreamResponse() resp.headers['Content-Type'] = "application/octet-stream" resp.content_length = len(output_data) await resp.prepare(request) await resp.write(output_data) except Exception as e: log.error(f"Exception during binary data write: {e}") raise HTTPInternalServerError() finally: await resp.write_eof() return resp
async def PUT_AttributeValue(request): """HTTP method to update an attributes data""" log.request(request) log.info("PUT_AttributeValue") app = request.app collection = getRequestCollectionName(request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid object id: {obj_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) attr_name = request.match_info.get('name') log.debug(f"Attribute name: [{attr_name}]") validateAttributeName(attr_name) log.info(f"PUT Attribute Value id: {obj_id} name: {attr_name}") username, pswd = getUserPasswordFromRequest(request) # write actions need auth await validateUserPassword(app, username, pswd) if not request.has_body: msg = "PUT AttributeValue with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPInternalServerError() # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "update") req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.debug("get Attribute: " + req) params = {} if bucket: params["bucket"] = bucket dn_json = await http_get(app, req, params=params) log.debug("got attributes json from dn for obj_id: " + str(obj_id)) log.debug(f"got dn_json: {dn_json}") attr_shape = dn_json["shape"] if attr_shape["class"] == 'H5S_NULL': msg = "Null space attributes can not be updated" log.warn(msg) raise HTTPBadRequest(reason=msg) np_shape = getShapeDims(attr_shape) type_json = dn_json["type"] np_dtype = createDataType(type_json) # np datatype request_type = "json" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type not in ("application/json", "application/octet-stream"): msg = f"Unknown content_type: {content_type}" log.warn(msg) raise HTTPBadRequest(reason=msg) if content_type == "application/octet-stream": log.debug("PUT AttributeValue - request_type is binary") request_type = "binary" else: log.debug("PUT AttribueValue - request type is json") binary_data = None if request_type == "binary": item_size = getItemSize(type_json) if item_size == 'H5T_VARIABLE': msg = "Only JSON is supported for variable length data types" log.warn(msg) raise HTTPBadRequest(reason=msg) # read binary data binary_data = await request.read() if len(binary_data) != request.content_length: msg = f"Read {len(binary_data)} bytes, expecting: {request.content_length}" log.error(msg) raise HTTPInternalServerError() arr = None # np array to hold request data if binary_data: npoints = getNumElements(np_shape) if npoints*item_size != len(binary_data): msg = "Expected: " + str(npoints*item_size) + " bytes, but got: " + str(len(binary_data)) log.warn(msg) raise HTTPBadRequest(reason=msg) arr = np.fromstring(binary_data, dtype=np_dtype) arr = arr.reshape(np_shape) # conform to selection shape # convert to JSON for transmission to DN data = arr.tolist() value = bytesArrayToList(data) else: body = await request.json() if "value" not in body: msg = "PUT attribute value with no value in body" log.warn(msg) raise HTTPBadRequest(reason=msg) value = body["value"] # validate that the value agrees with type/shape try: arr = jsonToArray(np_shape, np_dtype, value) except ValueError: msg = "Bad Request: input data doesn't match selection" log.warn(msg) raise HTTPBadRequest(reason=msg) log.info(f"Got: {arr.size} array elements") # ready to add attribute now attr_json = {} attr_json["type"] = type_json attr_json["shape"] = attr_shape attr_json["value"] = value req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.info(f"PUT Attribute Value: {req}") dn_json["value"] = value params = {} params = {"replace": 1} # let the DN know we can overwrite the attribute if bucket: params["bucket"] = bucket put_rsp = await http_put(app, req, params=params, data=attr_json) log.info(f"PUT Attribute Value resp: {put_rsp}") hrefs = [] # TBD req_rsp = { "hrefs": hrefs } # attribute creation successful resp = await jsonResponse(request, req_rsp) log.response(request, resp=resp) return resp
async def GET_AttributeValue(request): """HTTP method to return an attribute value""" log.request(request) app = request.app log.info("GET_AttributeValue") collection = getRequestCollectionName(request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid object id: {obj_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) attr_name = request.match_info.get('name') validateAttributeName(attr_name) username, pswd = getUserPasswordFromRequest(request) if username is None and app['allow_noauth']: username = "******" else: await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain value: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPBadRequest(reason="Unexpected Error") # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "read") req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.debug("get Attribute: " + req) params = {} if bucket: params["bucket"] = bucket dn_json = await http_get(app, req, params=params) log.debug("got attributes json from dn for obj_id: " + str(dn_json)) attr_shape = dn_json["shape"] log.debug(f"attribute shape: {attr_shape}") if attr_shape["class"] == 'H5S_NULL': msg = "Null space attributes can not be read" log.warn(msg) raise HTTPBadRequest(reason=msg) accept_type = getAcceptType(request) response_type = accept_type # will adjust later if binary not possible type_json = dn_json["type"] shape_json = dn_json["shape"] item_size = getItemSize(type_json) if item_size == 'H5T_VARIABLE' and accept_type != "json": msg = "Client requested binary, but only JSON is supported for variable length data types" log.info(msg) response_type = "json" if response_type == "binary": arr_dtype = createDataType(type_json) # np datatype np_shape = getShapeDims(shape_json) try: arr = jsonToArray(np_shape, arr_dtype, dn_json["value"]) except ValueError: msg = "Bad Request: input data doesn't match selection" log.warn(msg) raise HTTPBadRequest(reason=msg) output_data = arr.tobytes() log.debug(f"GET AttributeValue - returning {len(output_data)} bytes binary data") # write response try: resp = StreamResponse() resp.content_type = "application/octet-stream" resp.content_length = len(output_data) # allow CORS resp.headers['Access-Control-Allow-Origin'] = '*' resp.headers['Access-Control-Allow-Methods'] = "GET, POST, DELETE, PUT, OPTIONS" resp.headers['Access-Control-Allow-Headers'] = "Content-Type, api_key, Authorization" await resp.prepare(request) await resp.write(output_data) except Exception as e: log.error(f"Got exception: {e}") raise HTTPInternalServerError() finally: await resp.write_eof() else: resp_json = {} if "value" in dn_json: resp_json["value"] = dn_json["value"] hrefs = [] obj_uri = '/' + collection + '/' + obj_id attr_uri = obj_uri + '/attributes/' + attr_name hrefs.append({'rel': 'self', 'href': getHref(request, attr_uri)}) hrefs.append({'rel': 'home', 'href': getHref(request, '/')}) hrefs.append({'rel': 'owner', 'href': getHref(request, obj_uri)}) resp_json["hrefs"] = hrefs resp = await jsonResponse(request, resp_json) log.response(request, resp=resp) return resp
async def PUT_Attribute(request): """HTTP method to create a new attribute""" log.request(request) app = request.app collection = getRequestCollectionName(request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid object id: {obj_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) attr_name = request.match_info.get('name') log.debug(f"Attribute name: [{attr_name}]") validateAttributeName(attr_name) log.info(f"PUT Attribute id: {obj_id} name: {attr_name}") username, pswd = getUserPasswordFromRequest(request) # write actions need auth await validateUserPassword(app, username, pswd) if not request.has_body: msg = "PUT Attribute with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) body = await request.json() domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPBadRequest(reason="Unexpected Error") root_id = domain_json["root"] # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "create") if "type" not in body: msg = "PUT attribute with no type in body" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = body["type"] if isinstance(datatype, str) and datatype.startswith("t-"): # Committed type - fetch type json from DN ctype_id = datatype log.debug(f"got ctypeid: {ctype_id}") ctype_json = await getObjectJson(app, ctype_id, bucket=bucket) log.debug(f"ctype {ctype_id}: {ctype_json}") if ctype_json["root"] != root_id: msg = "Referenced committed datatype must belong in same domain" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = ctype_json["type"] # add the ctype_id to type type datatype["id"] = ctype_id elif isinstance(datatype, str): try: # convert predefined type string (e.g. "H5T_STD_I32LE") to # corresponding json representation datatype = getBaseTypeJson(datatype) log.debug(f"got datatype: {datatype}") except TypeError: msg = "PUT attribute with invalid predefined type" log.warn(msg) raise HTTPBadRequest(reason=msg) validateTypeItem(datatype) dims = None shape_json = {} if "shape" in body: shape_body = body["shape"] shape_class = None if isinstance(shape_body, dict) and "class" in shape_body: shape_class = shape_body["class"] elif isinstance(shape_body, str): shape_class = shape_body if shape_class: if shape_class == "H5S_NULL": shape_json["class"] = "H5S_NULL" if isinstance(shape_body, dict) and "dims" in shape_body: msg = "can't include dims with null shape" log.warn(msg) raise HTTPBadRequest(reason=msg) if isinstance(shape_body, dict) and "value" in body: msg = "can't have H5S_NULL shape with value" log.warn(msg) raise HTTPBadRequest(reason=msg) elif shape_class == "H5S_SCALAR": shape_json["class"] = "H5S_SCALAR" dims = getShapeDims(shape_body) if len(dims) != 1 or dims[0] != 1: msg = "dimensions aren't valid for scalar attribute" log.warn(msg) raise HTTPBadRequest(reason=msg) elif shape_class == "H5S_SIMPLE": shape_json["class"] = "H5S_SIMPLE" dims = getShapeDims(shape_body) shape_json["dims"] = dims else: msg = f"Unknown shape class: {shape_class}" log.warn(msg) raise HTTPBadRequest(reason=msg) else: # no class, interpet shape value as dimensions and # use H5S_SIMPLE as class if isinstance(shape_body, list) and len(shape_body) == 0: shape_json["class"] = "H5S_SCALAR" dims = [1,] else: shape_json["class"] = "H5S_SIMPLE" dims = getShapeDims(shape_body) shape_json["dims"] = dims else: shape_json["class"] = "H5S_SCALAR" dims = [1,] if "value" in body: if dims is None: msg = "Bad Request: data can not be included with H5S_NULL space" log.warn(msg) raise HTTPBadRequest(reason=msg) value = body["value"] # validate that the value agrees with type/shape arr_dtype = createDataType(datatype) # np datatype if len(dims) == 0: np_dims = [1,] else: np_dims = dims log.debug(f"attribute dims: {np_dims}") log.debug(f"attribute value: {value}") try: arr = jsonToArray(np_dims, arr_dtype, value) except ValueError: msg = "Bad Request: input data doesn't match selection" log.warn(msg) raise HTTPBadRequest(reason=msg) log.info(f"Got: {arr.size} array elements") else: value = None # ready to add attribute now req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.info("PUT Attribute: " + req) attr_json = {} attr_json["type"] = datatype attr_json["shape"] = shape_json if value is not None: attr_json["value"] = value params = {} if bucket: params["bucket"] = bucket put_rsp = await http_put(app, req, params=params, data=attr_json) log.info(f"PUT Attribute resp: {put_rsp}") hrefs = [] # TBD req_rsp = { "hrefs": hrefs } # attribute creation successful resp = await jsonResponse(request, req_rsp, status=201) log.response(request, resp=resp) return resp