Пример #1
0
    def test_get_config(self):
        self.assertEqual(MemoryDescriptorSet().get_config(),
                         MemoryDescriptorSet.get_default_config())

        self.assertEqual(
            MemoryDescriptorSet(None).get_config(),
            MemoryDescriptorSet.get_default_config())

        empty_elem = DataMemoryElement()
        dme_key = 'smqtk.representation.data_element.memory_element.DataMemoryElement'
        self.assertEqual(
            MemoryDescriptorSet(empty_elem).get_config(),
            merge_dict(MemoryDescriptorSet.get_default_config(),
                       {'cache_element': {
                           'type': dme_key
                       }}))

        dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1)
        dict_pickle_bytes_str = dict_pickle_bytes.decode(BYTES_CONFIG_ENCODING)
        cache_elem = DataMemoryElement(bytes=dict_pickle_bytes)
        self.assertEqual(
            MemoryDescriptorSet(cache_elem).get_config(),
            merge_dict(
                MemoryDescriptorSet.get_default_config(), {
                    'cache_element': {
                        dme_key: {
                            'bytes': dict_pickle_bytes_str
                        },
                        'type': dme_key
                    }
                }))
Пример #2
0
    def test_added_descriptor_table_caching(self):
        cache_elem = DataMemoryElement(readonly=False)
        descrs = [random_descriptor() for _ in range(3)]
        expected_table = dict((r.uuid(), r) for r in descrs)

        i = MemoryDescriptorSet(cache_elem)
        self.assertTrue(cache_elem.is_empty())

        # Should add descriptors to table, caching to writable element.
        i.add_many_descriptors(descrs)
        self.assertFalse(cache_elem.is_empty())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        # Changing the internal table (remove, add) it should reflect in
        # cache
        new_d = random_descriptor()
        expected_table[new_d.uuid()] = new_d
        i.add_descriptor(new_d)
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        rm_d = list(expected_table.values())[0]
        del expected_table[rm_d.uuid()]
        i.remove_descriptor(rm_d.uuid())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)
Пример #3
0
 def test_set_bytes_when_readonly(self):
     bytes_a = 'test bytes first set'
     bytes_b = 'the second set of bytes'
     e = DataMemoryElement(bytes_a, readonly=True)
     ntools.assert_equal(e.get_bytes(), bytes_a)
     ntools.assert_raises(ReadOnlyError, e.set_bytes, bytes_b)
     ntools.assert_equal(e.get_bytes(), bytes_a)
Пример #4
0
 def test_set_bytes(self):
     bytes_a = six.b('test bytes first set')
     bytes_b = six.b('the second set of bytes')
     e = DataMemoryElement(bytes_a)
     self.assertEqual(e.get_bytes(), bytes_a)
     e.set_bytes(bytes_b)
     self.assertEqual(e.get_bytes(), bytes_b)
Пример #5
0
    def test_added_descriptor_table_caching(self):
        cache_elem = DataMemoryElement(readonly=False)
        descrs = [random_descriptor() for _ in range(3)]
        expected_table = dict((r.uuid(), r) for r in descrs)

        i = MemoryDescriptorIndex(cache_elem)
        self.assertTrue(cache_elem.is_empty())

        # Should add descriptors to table, caching to writable element.
        i.add_many_descriptors(descrs)
        self.assertFalse(cache_elem.is_empty())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        # Changing the internal table (remove, add) it should reflect in
        # cache
        new_d = random_descriptor()
        expected_table[new_d.uuid()] = new_d
        i.add_descriptor(new_d)
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        rm_d = list(expected_table.values())[0]
        del expected_table[rm_d.uuid()]
        i.remove_descriptor(rm_d.uuid())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)
Пример #6
0
 def test_get_config(self, _m_cdg_setupNetwork):
     # Mocking set_network so we don't have to worry about actually
     # initializing any caffe things for this test.
     expected_params = {
         'network_prototxt': DataMemoryElement(),
         'network_model': DataMemoryElement(),
         'image_mean': DataMemoryElement(),
         'return_layer': 'layer name',
         'batch_size': 777,
         'use_gpu': False,
         'gpu_device_id': 8,
         'network_is_bgr': False,
         'data_layer': 'data-other',
         'load_truncated_images': True,
         'pixel_rescale': (.2, .8),
         'input_scale': 1.5,
     }
     # make sure that we're considering all constructor parameter options
     expected_param_keys = \
         set(inspect.getargspec(CaffeDescriptorGenerator.__init__)
                    .args[1:])
     self.assertSetEqual(set(expected_params.keys()),
                         expected_param_keys)
     g = CaffeDescriptorGenerator(**expected_params)
     for key in ('network_prototxt', 'network_model', 'image_mean'):
         expected_params[key] = to_config_dict(expected_params[key])
     self.assertEqual(g.get_config(), expected_params)
Пример #7
0
    def test_get_config(self):
        ntools.assert_equal(MemoryDescriptorIndex().get_config(),
                            MemoryDescriptorIndex.get_default_config())

        ntools.assert_equal(
            MemoryDescriptorIndex(None).get_config(),
            MemoryDescriptorIndex.get_default_config())

        empty_elem = DataMemoryElement()
        ntools.assert_equal(
            MemoryDescriptorIndex(empty_elem).get_config(),
            merge_dict(MemoryDescriptorIndex.get_default_config(),
                       {'cache_element': {
                           'type': 'DataMemoryElement'
                       }}))

        dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1)
        cache_elem = DataMemoryElement(bytes=dict_pickle_bytes)
        ntools.assert_equal(
            MemoryDescriptorIndex(cache_elem).get_config(),
            merge_dict(
                MemoryDescriptorIndex.get_default_config(), {
                    'cache_element': {
                        'DataMemoryElement': {
                            'bytes': dict_pickle_bytes
                        },
                        'type': 'DataMemoryElement'
                    }
                }))
Пример #8
0
    def test_get_config(self):
        self.assertEqual(MemoryDescriptorIndex().get_config(),
                         MemoryDescriptorIndex.get_default_config())

        self.assertEqual(
            MemoryDescriptorIndex(None).get_config(),
            MemoryDescriptorIndex.get_default_config())

        empty_elem = DataMemoryElement()
        self.assertEqual(
            MemoryDescriptorIndex(empty_elem).get_config(),
            merge_dict(MemoryDescriptorIndex.get_default_config(),
                       {'cache_element': {
                           'type': 'DataMemoryElement'
                       }}))

        dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1)
        dict_pickle_bytes_str = dict_pickle_bytes.decode(BYTES_CONFIG_ENCODING)
        cache_elem = DataMemoryElement(bytes=dict_pickle_bytes)
        self.assertEqual(
            MemoryDescriptorIndex(cache_elem).get_config(),
            merge_dict(
                MemoryDescriptorIndex.get_default_config(), {
                    'cache_element': {
                        'DataMemoryElement': {
                            'bytes': dict_pickle_bytes_str
                        },
                        'type': 'DataMemoryElement'
                    }
                }))
Пример #9
0
    def test_get_config(self, _m_cdg_setupNetwork):
        # Mocking set_network so we don't have to worry about actually
        # initializing any caffe things for this test.
        expected_params = {
            'network_prototxt': DataMemoryElement(),
            'network_model': DataMemoryElement(),
            'image_mean': DataMemoryElement(),
            'return_layer': 'layer name',
            'batch_size': 777,
            'use_gpu': False,
            'gpu_device_id': 8,
            'network_is_bgr': False,
            'data_layer': 'data-other',
            'load_truncated_images': True,
            'pixel_rescale': (.2, .8),
            'input_scale': 1.5,
            'threads': 14,
        }
        # make sure that we're considering all constructor parameter
        # options
        default_params = CaffeDescriptorGenerator.get_default_config()
        assert set(default_params) == set(expected_params)
        g = CaffeDescriptorGenerator(**expected_params)

        # Shift to expecting sub-configs for DataElement params
        for key in ('network_prototxt', 'network_model', 'image_mean'):
            expected_params[key] = to_config_dict(expected_params[key])
        assert g.get_config() == expected_params
Пример #10
0
        def test_pickle_save_restore(self, m_cdg_setupNetwork):
            # Mocking set_network so we don't have to worry about actually
            # initializing any caffe things for this test.
            expected_params = {
                'network_prototxt': DataMemoryElement(),
                'network_model': DataMemoryElement(),
                'image_mean': DataMemoryElement(),
                'return_layer': 'layer name',
                'batch_size': 777,
                'use_gpu': False,
                'gpu_device_id': 8,
                'network_is_bgr': False,
                'data_layer': 'data-other',
                'load_truncated_images': True,
                'pixel_rescale': (.2, .8),
                'input_scale': 1.5,
            }
            g = CaffeDescriptorGenerator(**expected_params)
            # Initialization sets up the network on construction.
            self.assertEqual(m_cdg_setupNetwork.call_count, 1)

            g_pickled = pickle.dumps(g, -1)
            g2 = pickle.loads(g_pickled)
            # Network should be setup for second class class just like in
            # initial construction.
            self.assertEqual(m_cdg_setupNetwork.call_count, 2)

            self.assertIsInstance(g2, CaffeDescriptorGenerator)
            self.assertEqual(g.get_config(), g2.get_config())
Пример #11
0
    def test_add_data(self):
        de = DataMemoryElement(six.b('some bytes'), 'text/plain', True)
        expected_map = {de.uuid(): de}

        dms = DataMemorySet()
        dms.add_data(de)
        self.assertEqual(dms._element_map, expected_map)
Пример #12
0
 def test_set_bytes_when_readonly(self):
     bytes_a = six.b('test bytes first set')
     bytes_b = six.b('the second set of bytes')
     e = DataMemoryElement(bytes_a, readonly=True)
     self.assertEqual(e.get_bytes(), bytes_a)
     self.assertRaises(ReadOnlyError, e.set_bytes, bytes_b)
     self.assertEqual(e.get_bytes(), bytes_a)
Пример #13
0
    def test_configuration(self):
        ex_descr_set = MemoryDescriptorSet()
        ex_i2u_kvs = MemoryKeyValueStore()
        ex_u2i_kvs = MemoryKeyValueStore()
        ex_index_elem = DataMemoryElement()
        ex_index_param_elem = DataMemoryElement()

        i = FaissNearestNeighborsIndex(
            descriptor_set=ex_descr_set,
            idx2uid_kvs=ex_i2u_kvs,
            uid2idx_kvs=ex_u2i_kvs,
            index_element=ex_index_elem,
            index_param_element=ex_index_param_elem,
            read_only=True,
            factory_string=u'some fact str',
            ivf_nprobe=88,
            use_gpu=False,
            gpu_id=99,
            random_seed=8,
        )
        for inst in configuration_test_helper(i):
            assert isinstance(inst._descriptor_set, MemoryDescriptorSet)
            assert isinstance(inst._idx2uid_kvs, MemoryKeyValueStore)
            assert isinstance(inst._uid2idx_kvs, MemoryKeyValueStore)
            assert isinstance(inst._index_element, DataMemoryElement)
            assert isinstance(inst._index_param_element, DataMemoryElement)
            assert inst.read_only is True
            assert isinstance(inst.factory_string, six.string_types)
            assert inst.factory_string == 'some fact str'
            assert inst._ivf_nprobe == 88
            assert inst._use_gpu is False
            assert inst._gpu_id == 99
            assert inst.random_seed == 8
Пример #14
0
 def test_build_index_with_cache(self):
     cache_element = DataMemoryElement()
     i = LinearHashIndex(cache_element)
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
     nose.tools.assert_equal(i.index, {1, 2, 3, 4})
     nose.tools.assert_false(cache_element.is_empty())
Пример #15
0
    def test_save_model_with_writable_caches(self):
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        expected_mean_vec_bytes = six.BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_mean_vec_bytes, expected_mean_vec)
        expected_mean_vec_bytes = expected_mean_vec_bytes.getvalue()

        expected_rotation_bytes = six.BytesIO()
        # noinspection PyTypeChecker
        numpy.save(expected_rotation_bytes, expected_rotation)
        expected_rotation_bytes = expected_rotation_bytes.getvalue()

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)

        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(),
                         expected_mean_vec_bytes)
        self.assertEqual(itq.rotation_cache_elem.get_bytes(),
                         expected_rotation_bytes)
Пример #16
0
    def test_fit_with_cache(self):
        fit_descriptors = []
        for i in range(5):
            d = DescriptorMemoryElement(six.b('test'), i)
            d.set_vector([-2. + i, -2. + i])
            fit_descriptors.append(d)

        itq = ItqFunctor(DataMemoryElement(),
                         DataMemoryElement(),
                         bit_length=1,
                         random_seed=0)
        itq.fit(fit_descriptors)

        # TODO: Explanation as to why this is the expected result.
        numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0])
        numpy.testing.assert_array_almost_equal(itq.rotation,
                                                [[1 / sqrt(2)], [1 / sqrt(2)]])
        self.assertIsNotNone(itq.mean_vec_cache_elem)
        numpy.testing.assert_array_almost_equal(
            numpy.load(six.BytesIO(itq.mean_vec_cache_elem.get_bytes())),
            [0, 0])

        self.assertIsNotNone(itq.rotation_cache_elem)
        numpy.testing.assert_array_almost_equal(
            numpy.load(six.BytesIO(itq.rotation_cache_elem.get_bytes())),
            [[1 / sqrt(2)], [1 / sqrt(2)]])
Пример #17
0
 def test_load_as_matrix_empty_data(self):
     """
     Test that we catch and do not load an empty data element.
     """
     empty_de = DataMemoryElement(readonly=True, content_type='image/png')
     assert empty_de.is_empty()
     msg = "GdalImageReader cannot load 0-sized data"
     with pytest.raises(ValueError, match=msg):
         GdalImageReader().load_as_matrix(empty_de)
Пример #18
0
    def test_save_cache(self):
        cache_element = DataMemoryElement()
        nose.tools.assert_true(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        nose.tools.assert_false(cache_element.is_empty())
        nose.tools.assert_true(len(cache_element.get_bytes()) > 0)
Пример #19
0
 def test_build_index_with_cache(self):
     cache_element = DataMemoryElement()
     i = LinearHashIndex(cache_element)
     # noinspection PyTypeChecker
     i.build_index([[0, 1, 0],
                    [1, 0, 0],
                    [0, 1, 1],
                    [0, 0, 1]])
     self.assertEqual(i.index, {1, 2, 3, 4})
     self.assertFalse(cache_element.is_empty())
Пример #20
0
    def test_add_with_caching(self):
        c = DataMemoryElement()
        s = MemoryKeyValueStore(c)

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        nose.tools.assert_equal(pickle.loads(c.get_bytes()),
                                expected_cache_dict)
Пример #21
0
 def test_set_bytes_when_readonly(self):
     bytes_a = six.b('test bytes first set')
     bytes_b = six.b('the second set of bytes')
     e = DataMemoryElement(bytes_a, readonly=True)
     self.assertEqual(e.get_bytes(), bytes_a)
     self.assertRaises(
         ReadOnlyError,
         e.set_bytes,
         bytes_b
     )
     self.assertEqual(e.get_bytes(), bytes_a)
Пример #22
0
    def test_load_as_matrix_invalid_bytes(self):
        """
        Test that data element with invalid data bytes fails to load.
        """
        d = DataMemoryElement(content_type='image/png')
        d.set_bytes(b"not valid bytes")

        inst = PilImageReader()
        with pytest.raises(IOError,
                           match="Failed to identify image from bytes "
                           "provided by DataMemoryElement"):
            inst.load_as_matrix(d)
Пример #23
0
    def test_save_cache_build_index(self):
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        self.assertFalse(cache_element.is_empty())
        # Check byte content
        expected_cache = {1, 2, 3, 4}
        actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes())))
        self.assertSetEqual(expected_cache, actual_cache)
Пример #24
0
 def test_from_config(self):
     # Configured cache with some picked bytes
     expected_table = dict(a=1, b=2, c=3)
     expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table))
     inst = MemoryDescriptorIndex.from_config({
         'cache_element': {
             'type': 'DataMemoryElement',
             'DataMemoryElement': {'bytes': expected_cache.get_bytes()}
         }
     })
     self.assertEqual(inst.cache_element, expected_cache)
     self.assertEqual(inst._table, expected_table)
Пример #25
0
    def dl_image(meta):
        try:
            c_type = meta['fields']['content_type'][0]
            obj_stored_url = meta['fields']['obj_stored_url'][0]
            obj_original_url = meta['fields']['obj_original_url'][0]

            c_ext = m.guess_extension(c_type, strict=False)
            if c_ext is None:
                log.warn(
                    "Guessed 'None' extension for content-type '%s', "
                    "skipping.", c_type)
                return None

            save_dir = os.path.abspath(
                os.path.expanduser(
                    os.path.join(output_dir, meta['index'], meta['doc_type'])))
            save_file = meta['id'] + c_ext
            save_path = os.path.join(save_dir, save_file)

            # Save/write file if needed
            if not os.path.isfile(save_path):
                # First try 'stored' url, fallback on original
                # Return None if failed to download anything
                ok, r = try_download(obj_stored_url, stored_http_auth)
                if not ok:
                    log.warn(
                        "Failed to download stored-data URL \"%s\" "
                        "(error=%s)", obj_stored_url, str(r))

                    ok, r = try_download(obj_original_url)
                    if not ok:
                        log.warn(
                            "Failed to download original URL \"%s\" "
                            "(error=%s)", obj_stored_url, str(r))
                        return None

                # Assuming OK at this point
                content = r.content

                d = DataMemoryElement(content, c_type)

                safe_create_dir(save_dir)
                with open(save_path, 'wb') as out:
                    log.debug("Saving to file: '%s'", save_path)
                    out.write(content)
            else:
                d = DataFileElement(save_path)

            return meta['id'], save_path, d.uuid()
        except KeyError, ex:
            log.error("Failed to find key %s in meta block: %s", str(ex), meta)
            raise
Пример #26
0
    def dl_image(meta):
        try:
            c_type = meta['fields']['content_type'][0]
            obj_stored_url = meta['fields']['obj_stored_url'][0]
            obj_original_url = meta['fields']['obj_original_url'][0]

            c_ext = m.guess_extension(c_type, strict=False)
            if c_ext is None:
                log.warn("Guessed 'None' extension for content-type '%s', "
                         "skipping.", c_type)
                return None

            save_dir = os.path.abspath(os.path.expanduser(
                os.path.join(output_dir, meta['index'], meta['doc_type'])
            ))
            save_file = meta['id'] + c_ext
            save_path = os.path.join(save_dir, save_file)

            # Save/write file if needed
            if not os.path.isfile(save_path):
                # First try 'stored' url, fallback on original
                # Return None if failed to download anything
                ok, r = try_download(obj_stored_url, stored_http_auth)
                if not ok:
                    log.warn("Failed to download stored-data URL \"%s\" "
                             "(error=%s)",
                             obj_stored_url, str(r))

                    ok, r = try_download(obj_original_url)
                    if not ok:
                        log.warn("Failed to download original URL \"%s\" "
                                 "(error=%s)",
                                 obj_stored_url, str(r))
                        return None

                # Assuming OK at this point
                content = r.content

                d = DataMemoryElement(content, c_type)

                safe_create_dir(save_dir)
                with open(save_path, 'wb') as out:
                    log.debug("Saving to file: '%s'", save_path)
                    out.write(content)
            else:
                d = DataFileElement(save_path)

            return meta['id'], save_path, d.uuid()
        except KeyError, ex:
            log.error("Failed to find key %s in meta block: %s",
                      str(ex), meta)
            raise
Пример #27
0
    def test_remove_many_with_cache(self):
        starting_table = {
            0: 0,
            1: 1,
            2: 2,
        }
        c = DataMemoryElement(pickle.dumps(starting_table))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, starting_table)

        s.remove_many([0, 2])

        self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
    def test_remove_many_with_cache(self):
        starting_table = {
            0: 0,
            1: 1,
            2: 2,
        }
        c = DataMemoryElement(pickle.dumps(starting_table))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, starting_table)

        s.remove_many([0, 2])

        self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
    def test_add_with_caching(self):
        """
        Test that we can add key-value pairs and they reflect in the cache
        element.
        """
        c = DataMemoryElement()
        s = MemoryKeyValueStore(c)

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        self.assertEqual(pickle.loads(c.get_bytes()), expected_cache_dict)
Пример #30
0
    def test_configuration(self):
        default_config = DataMemoryElement.get_default_config()
        ntools.assert_equal(default_config,
                            {'bytes': None, 'content_type': None})

        default_config['bytes'] = 'Hello World.'
        default_config['content_type'] = 'text/plain'
        inst1 = DataMemoryElement.from_config(default_config)
        ntools.assert_equal(default_config, inst1.get_config())
        ntools.assert_equal(inst1._bytes, 'Hello World.')
        ntools.assert_equal(inst1._content_type, 'text/plain')

        inst2 = DataMemoryElement.from_config(inst1.get_config())
        ntools.assert_equal(inst1, inst2)
Пример #31
0
 def test_from_config(self):
     # Configured cache with some picked bytes
     expected_table = dict(a=1, b=2, c=3)
     expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table))
     inst = MemoryDescriptorIndex.from_config({
         'cache_element': {
             'type': 'DataMemoryElement',
             'DataMemoryElement': {
                 'bytes': expected_cache.get_bytes()
             }
         }
     })
     ntools.assert_equal(inst.cache_element, expected_cache)
     ntools.assert_equal(inst._table, expected_table)
Пример #32
0
    def test_cacheing_with_map(self):
        expected_cache = DataMemoryElement()
        expected_map = {
            0: 'a',
            75: 'b',
            124769: 'c',
        }

        dms = DataMemorySet(expected_cache)
        dms._element_map = expected_map
        dms.cache()

        self.assertFalse(expected_cache.is_empty())
        self.assertEqual(pickle.loads(expected_cache.get_bytes()), expected_map)
Пример #33
0
    def test_save_cache_build_index(self):
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0],
                       [1, 0, 0],
                       [0, 1, 1],
                       [0, 0, 1]])
        self.assertFalse(cache_element.is_empty())
        # Check byte content
        expected_cache = {1, 2, 3, 4}
        actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes())))
        self.assertSetEqual(expected_cache, actual_cache)
Пример #34
0
    def test_configuration(self):
        default_config = DataMemoryElement.get_default_config()
        ntools.assert_equal(default_config,
                            {'bytes': None, 'content_type': None,
                             'readonly': False})

        default_config['bytes'] = 'Hello World.'
        default_config['content_type'] = 'text/plain'
        inst1 = DataMemoryElement.from_config(default_config)
        ntools.assert_equal(default_config, inst1.get_config())
        ntools.assert_equal(inst1._bytes, 'Hello World.')
        ntools.assert_equal(inst1._content_type, 'text/plain')

        inst2 = DataMemoryElement.from_config(inst1.get_config())
        ntools.assert_equal(inst1, inst2)
Пример #35
0
    def test_add_many_with_caching(self):
        d = {
            'a': 'b',
            'foo': None,
            0: 89,
        }
        c = DataMemoryElement()

        s = MemoryKeyValueStore(c)
        self.assertEqual(s._table, {})
        self.assertEqual(c.get_bytes(), six.b(""))

        s.add_many(d)
        self.assertEqual(s._table, d)
        self.assertEqual(pickle.loads(c.get_bytes()), d)
Пример #36
0
        def test_persistence_with_update_index(self):
            n1 = 100
            n2 = 10
            dim = 8
            set1 = {DescriptorMemoryElement('test', i) for i in range(n1)}
            set2 = {
                DescriptorMemoryElement('test', i)
                for i in range(n1, n1 + n2)
            }
            [d.set_vector(np.random.rand(dim)) for d in (set1 | set2)]

            # Create index with persistent entities
            index_element = DataMemoryElement(
                content_type='application/octet-stream')
            index_param_element = DataMemoryElement(content_type='text/plain')
            index = self._make_inst(index_element=index_element,
                                    index_param_element=index_param_element)
            descriptor_set = index._descriptor_set
            idx2uid_kvs = index._idx2uid_kvs
            uid2idx_kvs = index._uid2idx_kvs

            # Build initial index.
            index.build_index(set1)
            self.assertEqual(index.count(), len(set1))
            for d in set1:
                self.assertIn(d, index._descriptor_set)

            # Update and check that all intended descriptors are present in
            # index.
            index.update_index(set2)
            set_all = set1 | set2
            self.assertEqual(index.count(), len(set_all))
            for d in set_all:
                self.assertIn(d, index._descriptor_set)

            del index
            index = self._make_inst(descriptor_set=descriptor_set,
                                    idx2uid_kvs=idx2uid_kvs,
                                    uid2idx_kvs=uid2idx_kvs,
                                    index_element=index_element,
                                    index_param_element=index_param_element)

            # Check that NN can return something from the updated set.
            # - nearest element to the query element when the query is in the
            #   index should be the query element.
            for q in set_all:
                n_elems, n_dists = index.nn(q)
                self.assertEqual(n_elems[0], q)
Пример #37
0
    def test_get_config_with_cache_elements(self):
        itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7,
                         random_seed=8)
        itq.mean_vec_cache_elem = DataMemoryElement('cached vec bytes')
        itq.rotation_cache_elem = DataMemoryElement('cached rot bytes')

        c = itq.get_config()
        NT.assert_equal(c['bit_length'], 5)
        NT.assert_equal(c['itq_iterations'], 6)
        NT.assert_equal(c['normalize'], 7)
        NT.assert_equal(c['random_seed'], 8)
        NT.assert_equal(c['mean_vec_cache']['type'], "DataMemoryElement")
        NT.assert_equal(c['mean_vec_cache']['DataMemoryElement']['bytes'],
                        'cached vec bytes')
        NT.assert_equal(c['rotation_cache']['DataMemoryElement']['bytes'],
                        'cached rot bytes')
Пример #38
0
    def test_configuration(self):
        default_config = DataMemoryElement.get_default_config()
        self.assertEqual(default_config,
                         {'bytes': None, 'content_type': None,
                          'readonly': False})

        default_config['bytes'] = 'Hello World.'
        default_config['content_type'] = 'text/plain'
        #: :type: DataMemoryElement
        inst1 = DataMemoryElement.from_config(default_config)
        self.assertEqual(default_config, inst1.get_config())
        self.assertEqual(inst1._bytes, 'Hello World.')
        self.assertEqual(inst1._content_type, 'text/plain')

        inst2 = DataMemoryElement.from_config(inst1.get_config())
        self.assertEqual(inst1, inst2)
Пример #39
0
 def test_caching_readonly_cache(self):
     ro_cache = DataMemoryElement(readonly=True)
     dms = DataMemorySet(ro_cache)
     self.assertRaises(
         ReadOnlyError,
         dms.cache
     )
Пример #40
0
 def test_save_model_with_cache(self, m_savez):
     cache_element = DataMemoryElement()
     bt = SkLearnBallTreeHashIndex(cache_element, random_seed=0)
     m = np.random.randint(0, 2, 1000 * 256).reshape(1000, 256)
     bt._build_bt_internal(m)
     self.assertTrue(m_savez.called)
     self.assertEqual(m_savez.call_count, 1)
Пример #41
0
    def test_add_with_caching(self):
        """
        Test that we can add key-value pairs and they reflect in the cache
        element.
        """
        c = DataMemoryElement()
        s = MemoryKeyValueStore(c)

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        self.assertEqual(
            pickle.loads(c.get_bytes()),
            expected_cache_dict
        )
Пример #42
0
    def test_remove_with_cache(self):
        """
        Test that removal correctly updates the cache element.
        """
        existing_data = {
            0: 1,
            'a': 'b',
        }

        c = DataMemoryElement(pickle.dumps(existing_data))
        s = MemoryKeyValueStore(c)
        self.assertDictEqual(s._table, existing_data)

        s.remove('a')
        self.assertDictEqual(s._table, {0: 1})
        self.assertDictEqual(pickle.loads(c.get_bytes()),
                             {0: 1})
Пример #43
0
    def test_add_many_with_caching(self):
        """
        Test that adding many reflects in cache.
        """
        d = {
            'a': 'b',
            'foo': None,
            0: 89,
        }
        c = DataMemoryElement()

        s = MemoryKeyValueStore(c)
        self.assertEqual(s._table, {})
        self.assertEqual(c.get_bytes(), six.b(""))

        s.add_many(d)
        self.assertEqual(s._table, d)
        self.assertEqual(
            pickle.loads(c.get_bytes()),
            d
        )
Пример #44
0
    def test_save_cache_remove_from_index(self):
        # Test that the cache is updated appropriately on a removal.
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0],   # 2
                       [0, 1, 1],   # 3
                       [1, 0, 0],   # 4
                       [1, 1, 0]])  # 6
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))),
            {2, 3, 4, 6}
        )

        # noinspection PyTypeChecker
        i.remove_from_index([[0, 1, 1],   # 3
                             [1, 0, 0]])  # 4
        self.assertFalse(cache_element.is_empty())
        self.assertSetEqual(
            set(numpy.load(BytesIO(cache_element.get_bytes()))),
            {2, 6}
        )
Пример #45
0
    def resolve_data_element(self, uri):
        """
        Given the URI to some data, resolve it down to a DataElement instance.

        :raises ValueError: Issue with the given URI regarding either URI source
            resolution or data resolution.

        :param uri: URI to data
        :type uri: str
        :return: DataElement instance wrapping given URI to data.
        :rtype: smqtk.representation.DataElement

        """
        self._log.debug("Resolving URI: %s", uri)
        # Resolve URI into appropriate DataElement instance
        if uri[:7] == "file://":
            self._log.debug("Given local disk filepath")
            filepath = uri[7:]
            if not os.path.isfile(filepath):
                raise ValueError("File URI did not point to an existing file "
                                 "on disk.")
            else:
                de = DataFileElement(filepath)

        elif uri[:9] == "base64://":
            self._log.debug("Given base64 string")
            content_type = flask.request.args.get('content_type', None)
            self._log.debug("Content type: %s", content_type)
            if not content_type:
                raise ValueError("No content-type with given base64 data")
            else:
                b64str = uri[9:]
                de = DataMemoryElement.from_base64(b64str, content_type)

        else:
            self._log.debug("Given URL")
            try:
                de = DataUrlElement(uri)
            except requests.HTTPError as ex:
                raise ValueError("Failed to initialize URL element due to "
                                 "HTTPError: %s" % str(ex))

        return de
Пример #46
0
    def test_add_data(self):
        mem_kv = MemoryKeyValueStore()
        kvds = KVSDataSet(mem_kv)

        de1 = DataMemoryElement(six.b('bytes1'))
        de2 = DataMemoryElement(six.b('bytes2'))
        kvds.add_data(de1, de2)

        # Check that appropriate keys and values are retrievable and located in
        # used KV-store.
        self.assertIn(de1.uuid(), mem_kv)
        self.assertIn(de2.uuid(), mem_kv)
        self.assertEqual(mem_kv.get(de1.uuid()), de1)
        self.assertEqual(mem_kv.get(de2.uuid()), de2)
Пример #47
0
 def test_from_uri_data_format(self):
     e = DataMemoryElement.from_uri(self.VALID_DATA_URI)
     self.assertIsInstance(e, DataMemoryElement)
     self.assertEqual(e.get_bytes(), self.EXPECTED_BYTES)
     self.assertEqual(e.content_type(), self.EXPECTED_CT)
Пример #48
0
 def test_get_bytes_empty_bytes(self):
     e = DataMemoryElement('')
     self.assertEqual(e.get_bytes(), six.b(''))
Пример #49
0
 def test_is_empty_nonzero_bytes(self):
     e = DataMemoryElement('some bytes')
     self.assertFalse(e.is_empty())
Пример #50
0
 def test_writable_default(self):
     v = 'foo'
     e = DataMemoryElement(v)
     self.assertTrue(e.writable())
Пример #51
0
 def test_get_bytes_some_bytes(self):
     expected_bytes = 'some bytes'
     e = DataMemoryElement(expected_bytes)
     self.assertEqual(e.get_bytes(), expected_bytes)
Пример #52
0
 def test_from_base64_no_ct(self):
     e = DataMemoryElement.from_base64(self.VALID_BASE64)
     self.assertIsInstance(e, DataMemoryElement)
     self.assertEqual(e.get_bytes(), self.EXPECTED_BYTES)
Пример #53
0
 def test_writable_when_not_readonly(self):
     e = DataMemoryElement('', readonly=False)
     self.assertTrue(e.writable())
Пример #54
0
 def test_from_base64_with_ct(self):
     e = DataMemoryElement.from_base64(self.VALID_BASE64, self.EXPECTED_CT)
     self.assertIsInstance(e, DataMemoryElement)
     self.assertEqual(e.get_bytes(), self.EXPECTED_BYTES)
     self.assertEqual(e.content_type(), self.EXPECTED_CT)
Пример #55
0
 def test_from_base64_empty_string(self):
     # Should translate to empty byte string
     e = DataMemoryElement.from_base64('', None)
     self.assertIsInstance(e, DataMemoryElement)
     self.assertEqual(e.get_bytes(), six.b(''))
Пример #56
0
 def test_from_uri_empty_string(self):
     # Should return an element with no byte data
     e = DataMemoryElement.from_uri('')
     self.assertIsInstance(e, DataMemoryElement)
     # no base64 data, which should decode to no bytes
     self.assertEqual(e.get_bytes(), six.b(''))
Пример #57
0
 def test_from_uri_base64_header_empty_data(self):
     e = DataMemoryElement.from_uri('base64://')
     self.assertIsInstance(e, DataMemoryElement)
     # no base64 data, which should decode to no bytes
     self.assertEqual(e.get_bytes(), six.b(''))
Пример #58
0
 def test_is_empty_zero_bytes(self):
     e = DataMemoryElement('')
     self.assertTrue(e.is_empty())