def test_caching_readonly_cache(self): ro_cache = DataMemoryElement(readonly=True) dms = DataMemorySet(ro_cache) self.assertRaises( ReadOnlyError, dms.cache )
def test_add_data(self): de = DataMemoryElement(six.b('some bytes'), 'text/plain', True) expected_map = {de.uuid(): de} dms = DataMemorySet() dms.add_data(de) self.assertEqual(dms._element_map, expected_map)
def test_init_with_cache(self): expected_map = dict(a=1, b=2, c=3) expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_map)) i = DataMemorySet(expected_cache) self.assertEqual(i.cache_element, expected_cache) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, expected_map)
def test_uuids(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(dms.uuids(), {0, 75, 124769})
def test_count(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(dms.count(), 3)
def test_has_uuid(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertTrue(dms.has_uuid(0)) self.assertTrue(dms.has_uuid(75)) self.assertTrue(dms.has_uuid(124769))
def test_get_data_valid_uuid(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(dms.get_data(0), 'a') self.assertEqual(dms.get_data(75), 'b') self.assertEqual(dms.get_data(124769), 'c')
def test_iter(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } expected_map_values = {'a', 'b', 'c'} dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(set(dms), expected_map_values) self.assertEqual(set(iter(dms)), expected_map_values)
def test_cacheing_with_map(self): expected_cache = DataMemoryElement() expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet(expected_cache) dms._element_map = expected_map dms.cache() self.assertFalse(expected_cache.is_empty()) self.assertEqual(pickle.loads(expected_cache.get_bytes()), expected_map)
def test_init_empty_cache(self): cache_elem = DataMemoryElement() i = DataMemorySet(cache_elem, 2) self.assertEqual(i.cache_element, cache_elem) self.assertEqual(i.pickle_protocol, 2) self.assertEqual(i._element_map, {})
def test_init_no_cache(self): i = DataMemorySet() self.assertIsNone(i.cache_element) self.assertEqual(i._element_map, {}) self.assertEqual(i.pickle_protocol, -1)
def test_get_data_invalid_uuid(self): dms = DataMemorySet() self.assertRaises( KeyError, dms.get_data, 'invalid uuid' )
def test_caching_no_map_no_cache(self): dms = DataMemorySet() # should do nothing dms.cache() ntools.assert_is_none(dms.cache_element) ntools.assert_equal(dms._element_map, {})
def test_add_data_not_DataElement(self): dms = DataMemorySet() self.assertRaises( AssertionError, dms.add_data, "not data element" )
def test_init_no_cache(self): i = DataMemorySet() ntools.assert_is_none(i.cache_element) ntools.assert_equal(i._element_map, {}) ntools.assert_equal(i.pickle_protocol, -1)
# CSV file detailing [cluster_id, ad_id, image_sha1] relationships. EVAL_CLUSTERS_ADS_IMAGES_CSV = "eval.CP1_clusters_ads_images.csv" # json-lines file of clusters missing from the above file. Should be at least # composed of: {"cluster_id": <str>, ... } EVAL_MISSING_CLUSTERS = "eval.cluster_scores.missing_clusters.jl" OUTPUT_DESCR_PROB_INDEX = "cp1_img_prob_descriptors.pickle" OUTPUT_MAX_JL = "cp1_scores_max.jl" OUTPUT_AVG_JL = "cp1_scores_avg.jl" ############################################################################### # Compute classification scores initialize_logging(logging.getLogger('smqtk'), logging.DEBUG) eval_data_set = DataMemorySet(EVAL_DATASET) img_prob_descr_index = MemoryDescriptorIndex(OUTPUT_DESCR_PROB_INDEX) img_prob_gen = CaffeDescriptorGenerator(CAFFE_DEPLOY, CAFFE_MODEL, CAFFE_IMG_MEAN, 'prob', batch_size=1000, use_gpu=True, load_truncated_images=True) img_c_mem_factory = ClassificationElementFactory(MemoryClassificationElement, {}) img_prob_classifier = IndexLabelClassifier(CAFFE_LABELS) eval_data2descr = {}
def test_cacheing_no_map(self): dms = DataMemorySet(DataMemoryElement()) dms.cache() # technically caches something, but that something is an empty map. self.assertFalse(dms.cache_element.is_empty()) self.assertEqual(pickle.loads(dms.cache_element.get_bytes()), {})
def test_caching_no_map_no_cache(self): dms = DataMemorySet() # should do nothing dms.cache() self.assertIsNone(dms.cache_element) self.assertEqual(dms._element_map, {})
# CSV file detailing [cluster_id, ad_id, image_sha1] relationships. EVAL_CLUSTERS_ADS_IMAGES_CSV = "eval.CP1_clusters_ads_images.csv" # json-lines file of clusters missing from the above file. Should be at least # composed of: {"cluster_id": <str>, ... } EVAL_MISSING_CLUSTERS = "eval.cluster_scores.missing_clusters.jl" OUTPUT_DESCR_PROB_SET = "cp1_img_prob_descriptors.pickle" OUTPUT_MAX_JL = "cp1_scores_max.jl" OUTPUT_AVG_JL = "cp1_scores_avg.jl" ############################################################################### # Compute classification scores initialize_logging(logging.getLogger('smqtk'), logging.DEBUG) eval_data_set = DataMemorySet(DataFileElement(EVAL_DATASET)) img_prob_descr_set = MemoryDescriptorSet( DataFileElement(OUTPUT_DESCR_PROB_SET)) img_prob_gen = CaffeDescriptorGenerator(DataFileElement(CAFFE_DEPLOY), DataFileElement(CAFFE_MODEL), DataFileElement(CAFFE_IMG_MEAN), 'prob', batch_size=1000, use_gpu=True, load_truncated_images=True) img_c_mem_factory = ClassificationElementFactory(MemoryClassificationElement, {}) img_prob_classifier = IndexLabelClassifier(CAFFE_LABELS)