def testSchema2Id(self): root_id = createObjId("roots") group_id = createObjId("groups", rootid=root_id) dataset_id = createObjId("datasets", rootid=root_id) ctype_id = createObjId("datatypes", rootid=root_id) self.assertEqual(getCollectionForId(root_id), "groups") self.assertEqual(getCollectionForId(group_id), "groups") self.assertEqual(getCollectionForId(dataset_id), "datasets") self.assertEqual(getCollectionForId(ctype_id), "datatypes") chunk_id = 'c' + dataset_id[1:] + "_1_2" print(chunk_id) chunk_partition_id = 'c42-' + dataset_id[2:] + "_1_2" for id in (chunk_id, chunk_partition_id): try: getCollectionForId(id) self.assertTrue(False) except ValueError: pass # expected valid_ids = (group_id, dataset_id, ctype_id, chunk_id, chunk_partition_id, root_id) s3prefix = getS3Key(root_id) self.assertTrue(s3prefix.endswith("/.group.json")) s3prefix = s3prefix[:-(len(".group.json"))] for oid in valid_ids: print("oid:", oid) self.assertTrue(len(oid) >= 38) parts = oid.split('-') self.assertEqual(len(parts), 6) self.assertTrue(oid[0] in ('g', 'd', 't', 'c')) self.assertTrue(isSchema2Id(oid)) if oid == root_id: self.assertTrue(isRootObjId(oid)) else: self.assertFalse(isRootObjId(oid)) self.assertEqual(getRootObjId(oid), root_id) s3key = getS3Key(oid) print(s3key) self.assertTrue(s3key.startswith(s3prefix)) self.assertEqual(getObjId(s3key), oid) self.assertTrue(isS3ObjKey(s3key))
async def printS3Obj(app, obj_id): try: s3_key = getS3Key(obj_id) obj_exists = await isStorObj(app, s3_key) if not obj_exists: print(f"key: {s3_key} not found") return json_obj = await getStorJSONObj(app, s3_key) print(f"s3key {s3_key}:") print(json.dumps(json_obj, sort_keys=True, indent=4)) except ValueError as ve: print(f"Got ValueError exception: {ve}") except ClientOSError as coe: print(f"Got error: {coe}") await releaseStorageClient(app)
async def createDomain(app, domain, domain_json): try: s3_key = getS3Key(domain) domain_exists = await isS3Obj(app, s3_key) if domain_exists: raise ValueError("Domain already exists") parent_domain = getParentDomain(domain) if parent_domain is None: raise ValueError("Domain must have a parent") log.info("writing domain") await putS3JSONObj(app, s3_key, domain_json) print("domain created! s3_key: {} domain_json: {}".format( s3_key, domain_json)) except ValueError as ve: print("Got ValueError exception: {}".format(str(ve))) except ClientOSError as coe: print("Got S3 error: {}".format(str(coe)))
async def createDomain(app, domain, domain_json): try: domain = app["bucket_name"] + domain print("domain:", domain) s3_key = getS3Key(domain) print("s3_key: ", s3_key) domain_exists = await isStorObj(app, s3_key) if domain_exists: raise ValueError("Domain already exists") parent_domain = getParentDomain(domain) if parent_domain is None: raise ValueError("Domain must have a parent") log.info("writing domain") await putStorJSONObj(app, s3_key, domain_json) print("domain created! s3_key: {} domain_json: {}".format( s3_key, domain_json)) except ValueError as ve: print("Got ValueError exception: {}".format(str(ve))) raise
def testIsValidUuid(self): group_id = "g-314d61b8-9954-11e6-a733-3c15c2da029e" dataset_id = "d-4c48f3ae-9954-11e6-a3cd-3c15c2da029e" ctype_id = "t-8c785f1c-9953-11e6-9bc2-0242ac110005" chunk_id = "c-8c785f1c-9953-11e6-9bc2-0242ac110005_7_2" domain_id = "mybucket/bob/mydata.h5" valid_ids = (group_id, dataset_id, ctype_id, chunk_id, domain_id) bad_ids = ("g-1e76d862", "/bob/mydata.h5") self.assertTrue(isValidUuid(group_id)) self.assertFalse(isSchema2Id(group_id)) self.assertTrue(isValidUuid(group_id, obj_class="Group")) self.assertTrue(isValidUuid(group_id, obj_class="group")) self.assertTrue(isValidUuid(group_id, obj_class="groups")) self.assertTrue(isValidUuid(dataset_id, obj_class="datasets")) self.assertFalse(isSchema2Id(dataset_id)) self.assertTrue(isValidUuid(ctype_id, obj_class="datatypes")) self.assertFalse(isSchema2Id(ctype_id)) self.assertTrue(isValidUuid(chunk_id, obj_class="chunks")) self.assertFalse(isSchema2Id(chunk_id)) validateUuid(group_id) try: isRootObjId(group_id) self.assertTrue(False) except ValueError: # only works for v2 schema pass # expected for item in valid_ids: self.assertTrue(isObjId(item)) s3key = getS3Key(item) self.assertTrue(s3key[0] != '/') self.assertTrue(isS3ObjKey(s3key)) if item.find('/') > 0: continue # bucket name gets lost when domain ids get converted to s3keys objid = getObjId(s3key) self.assertEqual(objid, item) for item in bad_ids: self.assertFalse(isValidUuid(item)) self.assertFalse(isObjId(item))
async def run_scan(app, rootid, update=False): root_key = getS3Key(rootid) if not root_key.endswith("/.group.json"): raise ValueError("unexpected root key") root_prefix = root_key[:-(len(".group.json"))] app["root_prefix"] = root_prefix try: await getStorKeys(app, prefix=root_prefix, suffix=".dataset.json", include_stats=False, callback=getKeysCallback) except ClientError as ce: log.error(f"removeKeys - getS3Keys faiiled: {ce}") except HTTPNotFound: log.warn( f"getStorKeys - HTTPNotFound error for getStorKeys with prefix: {root_prefix}" ) except HTTPInternalServerError: log.error( f"getStorKeys - HTTPInternalServerError for getStorKeys with prefix: {root_prefix}" ) except Exception as e: log.error( f"getStorKeys - Unexpected Exception for getStorKeys with prefix: {root_prefix}: {e}" ) # update all chunks for datasets with H5D_CHUNKED_REF_INDIRECT layout indirect_dataset_keys = app["indirect_dataset_keys"] for prefix in indirect_dataset_keys: log.info(f"got inidirect prefix: {prefix}") # TBD... await releaseStorageClient(app)