Exemplo n.º 1
0
 def test_caching_readonly_cache(self):
     ro_cache = DataMemoryElement(readonly=True)
     dms = DataMemorySet(ro_cache)
     self.assertRaises(
         ReadOnlyError,
         dms.cache
     )
Exemplo n.º 2
0
    def test_add_data(self):
        de = DataMemoryElement(six.b('some bytes'), 'text/plain', True)
        expected_map = {de.uuid(): de}

        dms = DataMemorySet()
        dms.add_data(de)
        self.assertEqual(dms._element_map, expected_map)
Exemplo n.º 3
0
    def test_init_with_cache(self):
        expected_map = dict(a=1, b=2, c=3)
        expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_map))

        i = DataMemorySet(expected_cache)

        self.assertEqual(i.cache_element, expected_cache)
        self.assertEqual(i.pickle_protocol, -1)
        self.assertEqual(i._element_map, expected_map)
Exemplo n.º 4
0
    def test_uuids(self):
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet()
        dms._element_map = expected_map
        self.assertEqual(dms.uuids(), {0, 75, 124769})
Exemplo n.º 5
0
    def test_count(self):
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet()
        dms._element_map = expected_map
        self.assertEqual(dms.count(), 3)
Exemplo n.º 6
0
    def test_has_uuid(self):
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet()
        dms._element_map = expected_map
        self.assertTrue(dms.has_uuid(0))
        self.assertTrue(dms.has_uuid(75))
        self.assertTrue(dms.has_uuid(124769))
Exemplo n.º 7
0
    def test_get_data_valid_uuid(self):
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet()
        dms._element_map = expected_map
        self.assertEqual(dms.get_data(0), 'a')
        self.assertEqual(dms.get_data(75), 'b')
        self.assertEqual(dms.get_data(124769), 'c')
Exemplo n.º 8
0
    def test_iter(self):
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }
        expected_map_values = {'a', 'b', 'c'}

        dms = DataMemorySet()
        dms._element_map = expected_map
        self.assertEqual(set(dms), expected_map_values)
        self.assertEqual(set(iter(dms)), expected_map_values)
Exemplo n.º 9
0
    def test_cacheing_with_map(self):
        expected_cache = DataMemoryElement()
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet(expected_cache)
        dms._element_map = expected_map
        dms.cache()

        self.assertFalse(expected_cache.is_empty())
        self.assertEqual(pickle.loads(expected_cache.get_bytes()), expected_map)
Exemplo n.º 10
0
 def test_init_empty_cache(self):
     cache_elem = DataMemoryElement()
     i = DataMemorySet(cache_elem, 2)
     self.assertEqual(i.cache_element, cache_elem)
     self.assertEqual(i.pickle_protocol, 2)
     self.assertEqual(i._element_map, {})
Exemplo n.º 11
0
 def test_init_no_cache(self):
     i = DataMemorySet()
     self.assertIsNone(i.cache_element)
     self.assertEqual(i._element_map, {})
     self.assertEqual(i.pickle_protocol, -1)
Exemplo n.º 12
0
 def test_get_data_invalid_uuid(self):
     dms = DataMemorySet()
     self.assertRaises(
         KeyError,
         dms.get_data, 'invalid uuid'
     )
Exemplo n.º 13
0
 def test_caching_no_map_no_cache(self):
     dms = DataMemorySet()
     # should do nothing
     dms.cache()
     ntools.assert_is_none(dms.cache_element)
     ntools.assert_equal(dms._element_map, {})
Exemplo n.º 14
0
 def test_add_data_not_DataElement(self):
     dms = DataMemorySet()
     self.assertRaises(
         AssertionError,
         dms.add_data, "not data element"
     )
Exemplo n.º 15
0
 def test_init_no_cache(self):
     i = DataMemorySet()
     ntools.assert_is_none(i.cache_element)
     ntools.assert_equal(i._element_map, {})
     ntools.assert_equal(i.pickle_protocol, -1)
Exemplo n.º 16
0
# CSV file detailing [cluster_id, ad_id, image_sha1] relationships.
EVAL_CLUSTERS_ADS_IMAGES_CSV = "eval.CP1_clusters_ads_images.csv"
# json-lines file of clusters missing from the above file. Should be at least
# composed of: {"cluster_id": <str>, ... }
EVAL_MISSING_CLUSTERS = "eval.cluster_scores.missing_clusters.jl"

OUTPUT_DESCR_PROB_INDEX = "cp1_img_prob_descriptors.pickle"
OUTPUT_MAX_JL = "cp1_scores_max.jl"
OUTPUT_AVG_JL = "cp1_scores_avg.jl"

###############################################################################

# Compute classification scores
initialize_logging(logging.getLogger('smqtk'), logging.DEBUG)

eval_data_set = DataMemorySet(EVAL_DATASET)
img_prob_descr_index = MemoryDescriptorIndex(OUTPUT_DESCR_PROB_INDEX)

img_prob_gen = CaffeDescriptorGenerator(CAFFE_DEPLOY,
                                        CAFFE_MODEL,
                                        CAFFE_IMG_MEAN,
                                        'prob',
                                        batch_size=1000,
                                        use_gpu=True,
                                        load_truncated_images=True)

img_c_mem_factory = ClassificationElementFactory(MemoryClassificationElement,
                                                 {})
img_prob_classifier = IndexLabelClassifier(CAFFE_LABELS)

eval_data2descr = {}
Exemplo n.º 17
0
 def test_cacheing_no_map(self):
     dms = DataMemorySet(DataMemoryElement())
     dms.cache()
     # technically caches something, but that something is an empty map.
     self.assertFalse(dms.cache_element.is_empty())
     self.assertEqual(pickle.loads(dms.cache_element.get_bytes()), {})
Exemplo n.º 18
0
 def test_caching_no_map_no_cache(self):
     dms = DataMemorySet()
     # should do nothing
     dms.cache()
     self.assertIsNone(dms.cache_element)
     self.assertEqual(dms._element_map, {})
Exemplo n.º 19
0
# CSV file detailing [cluster_id, ad_id, image_sha1] relationships.
EVAL_CLUSTERS_ADS_IMAGES_CSV = "eval.CP1_clusters_ads_images.csv"
# json-lines file of clusters missing from the above file. Should be at least
# composed of: {"cluster_id": <str>, ... }
EVAL_MISSING_CLUSTERS = "eval.cluster_scores.missing_clusters.jl"

OUTPUT_DESCR_PROB_SET = "cp1_img_prob_descriptors.pickle"
OUTPUT_MAX_JL = "cp1_scores_max.jl"
OUTPUT_AVG_JL = "cp1_scores_avg.jl"

###############################################################################

# Compute classification scores
initialize_logging(logging.getLogger('smqtk'), logging.DEBUG)

eval_data_set = DataMemorySet(DataFileElement(EVAL_DATASET))
img_prob_descr_set = MemoryDescriptorSet(
    DataFileElement(OUTPUT_DESCR_PROB_SET))

img_prob_gen = CaffeDescriptorGenerator(DataFileElement(CAFFE_DEPLOY),
                                        DataFileElement(CAFFE_MODEL),
                                        DataFileElement(CAFFE_IMG_MEAN),
                                        'prob',
                                        batch_size=1000,
                                        use_gpu=True,
                                        load_truncated_images=True)

img_c_mem_factory = ClassificationElementFactory(MemoryClassificationElement,
                                                 {})
img_prob_classifier = IndexLabelClassifier(CAFFE_LABELS)