def testSchema2Id(self): root_id = createObjId("roots") group_id = createObjId("groups", rootid=root_id) dataset_id = createObjId("datasets", rootid=root_id) ctype_id = createObjId("datatypes", rootid=root_id) self.assertEqual(getCollectionForId(root_id), "groups") self.assertEqual(getCollectionForId(group_id), "groups") self.assertEqual(getCollectionForId(dataset_id), "datasets") self.assertEqual(getCollectionForId(ctype_id), "datatypes") chunk_id = 'c' + dataset_id[1:] + "_1_2" try: getCollectionForId(chunk_id) self.assertTrue(False) except ValueError: pass # expected valid_ids = (group_id, dataset_id, ctype_id, chunk_id, root_id) s3prefix = getS3Key(root_id) self.assertTrue(s3prefix.endswith("/.group.json")) s3prefix = s3prefix[:-(len(".group.json"))] for oid in valid_ids: self.assertTrue(len(oid) >= 38) parts = oid.split('-') self.assertEqual(len(parts), 6) self.assertTrue(oid[0] in ('g', 'd', 't', 'c')) self.assertTrue(isSchema2Id(oid)) if oid == root_id: self.assertTrue(isRootObjId(oid)) else: self.assertFalse(isRootObjId(oid)) self.assertEqual(getRootObjId(oid), root_id) s3key = getS3Key(oid) self.assertTrue(s3key.startswith(s3prefix)) self.assertEqual(getObjId(s3key), oid) self.assertTrue(isS3ObjKey(s3key))
def testSimple(self): """ check basic functions by adding one chunk to cache """ cc = LruCache(mem_target=1000 * 1000 * 10) cc.consistencyCheck() self.assertEqual(len(cc), 0) self.assertEqual(cc.dump_lru(), "->\n<-\n") id = createObjId("chunks") try: # only dict objects can be added cc[id] = list(range(20)) self.assertTrue(False) except TypeError: pass # expected arr = np.empty((16, 16), dtype='i4') id = createObjId("datasets") try: cc[id] = arr self.assertTrue(False) except ValueError: pass # expected - not a chunk id rand_id = createObjId("chunks") np_arr = np.random.random( (500, 500)) # smaller than our chunk cache size cc[rand_id] = np_arr # add to cache cc.consistencyCheck() self.assertEqual(len(cc), 1) self.assertTrue(rand_id in cc) lru_str = "->" + rand_id + "\n<-" + rand_id + "\n" mem_tgt = cc.memTarget self.assertEqual(mem_tgt, 1000 * 1000 * 10) mem_used = cc.memUsed self.assertEqual(mem_used, 500 * 500 * 8) mem_dirty = cc.memDirty self.assertEqual(mem_dirty, 0) mem_per = cc.cacheUtilizationPercent self.assertEqual(mem_per, 20) # have used 20% of target memory # try adding the same id to the cache again cc[rand_id] = np_arr cc.consistencyCheck() self.assertEqual(len(cc), 1) self.assertTrue(rand_id in cc) # try out the dirty flags self.assertFalse(cc.isDirty(rand_id)) self.assertEqual(cc.dirtyCount, 0) cc.setDirty(rand_id) cc.consistencyCheck() self.assertTrue(cc.isDirty(rand_id)) self.assertEqual(cc.dirtyCount, 1) self.assertEqual(cc.dump_lru(), lru_str) cc.consistencyCheck() cc.clearDirty(rand_id) cc.consistencyCheck() self.assertFalse(cc.isDirty(rand_id)) self.assertEqual(cc.dirtyCount, 0) # chunk should not have been evicted from cache self.assertEqual(len(cc), 1) self.assertTrue(rand_id in cc) # delete from cache del cc[rand_id] cc.consistencyCheck() # check cache is empty self.assertEqual(len(cc), 0) self.assertFalse(rand_id in cc) mem_tgt = cc.memTarget self.assertEqual(mem_tgt, 1000 * 1000 * 10) mem_used = cc.memUsed self.assertEqual(mem_used, 0) mem_dirty = cc.memDirty self.assertEqual(mem_dirty, 0) mem_per = cc.cacheUtilizationPercent self.assertEqual(mem_per, 0) # no memory used
def testMetaDataCache(self): """ check metadata cache functionality """ cc = LruCache(mem_target=1024 * 10, chunk_cache=False) cc.consistencyCheck() self.assertEqual(len(cc), 0) self.assertEqual(cc.dump_lru(), "->\n<-\n") id = createObjId("datasets") try: # only numpy arrays an be added cc[id] = np.zeros((3, 4)) self.assertTrue(False) except TypeError: pass # expected data = {"x": 123, "y": 456} arr = np.zeros((10, )) id = createObjId("chunks") try: cc[id] = arr self.assertTrue(False) except TypeError: pass # expected - not a dict rand_id = createObjId("groups") data = {"foo": "bar"} cc[rand_id] = data # add to cache cc.consistencyCheck() self.assertEqual(len(cc), 1) self.assertTrue(rand_id in cc) lru_str = "->" + rand_id + "\n<-" + rand_id + "\n" mem_tgt = cc.memTarget self.assertEqual(mem_tgt, 1024 * 10) mem_used = cc.memUsed self.assertEqual(mem_used, 1024) # not based on actual size mem_per = cc.cacheUtilizationPercent self.assertEqual(mem_per, 10) # have used 10% of target memory # try out the dirty flags self.assertFalse(cc.isDirty(rand_id)) self.assertEqual(cc.dirtyCount, 0) cc.setDirty(rand_id) cc.consistencyCheck() self.assertTrue(cc.isDirty(rand_id)) self.assertEqual(cc.dirtyCount, 1) self.assertEqual(cc.dump_lru(), lru_str) cc.clearDirty(rand_id) cc.consistencyCheck() self.assertFalse(cc.isDirty(rand_id)) self.assertEqual(cc.dirtyCount, 0) # chunk should not have been evicted from cache self.assertEqual(len(cc), 1) self.assertTrue(rand_id in cc) # delete from cache del cc[rand_id] cc.consistencyCheck() # check cache is empty self.assertEqual(len(cc), 0) self.assertFalse(rand_id in cc) mem_tgt = cc.memTarget self.assertEqual(mem_tgt, 1024 * 10) mem_used = cc.memUsed self.assertEqual(mem_used, 0) mem_per = cc.cacheUtilizationPercent self.assertEqual(mem_per, 0) # no memory used