Пример #1
0
 def test_add_many_readonly(self):
     """ Test that we can't add many on a read-only instance. """
     s = MemoryKeyValueStore(DataMemoryElement(readonly=True))
     self.assertRaises(
         ReadOnlyError,
         s.add_many, {0: 1}
     )
Пример #2
0
    def test_add_data(self):
        de = DataMemoryElement(six.b('some bytes'), 'text/plain', True)
        expected_map = {de.uuid(): de}

        dms = DataMemorySet()
        dms.add_data(de)
        ntools.assert_equal(dms._element_map, expected_map)
Пример #3
0
 def test_set_bytes(self):
     bytes_a = six.b('test bytes first set')
     bytes_b = six.b('the second set of bytes')
     e = DataMemoryElement(bytes_a)
     self.assertEqual(e.get_bytes(), bytes_a)
     e.set_bytes(bytes_b)
     self.assertEqual(e.get_bytes(), bytes_b)
Пример #4
0
    def test_added_descriptor_table_caching(self):
        cache_elem = DataMemoryElement(readonly=False)
        descrs = [random_descriptor() for _ in range(3)]
        expected_table = dict((r.uuid(), r) for r in descrs)

        i = MemoryDescriptorIndex(cache_elem)
        self.assertTrue(cache_elem.is_empty())

        # Should add descriptors to table, caching to writable element.
        i.add_many_descriptors(descrs)
        self.assertFalse(cache_elem.is_empty())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        # Changing the internal table (remove, add) it should reflect in
        # cache
        new_d = random_descriptor()
        expected_table[new_d.uuid()] = new_d
        i.add_descriptor(new_d)
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)

        rm_d = list(expected_table.values())[0]
        del expected_table[rm_d.uuid()]
        i.remove_descriptor(rm_d.uuid())
        self.assertEqual(pickle.loads(i.cache_element.get_bytes()),
                         expected_table)
Пример #5
0
 def test_set_bytes_when_readonly(self):
     bytes_a = six.b('test bytes first set')
     bytes_b = six.b('the second set of bytes')
     e = DataMemoryElement(bytes_a, readonly=True)
     self.assertEqual(e.get_bytes(), bytes_a)
     self.assertRaises(ReadOnlyError, e.set_bytes, bytes_b)
     self.assertEqual(e.get_bytes(), bytes_a)
    def test_add_read_only(self):
        """
        Test that we cannot add when read-only (based on cache element).
        """
        s = MemoryKeyValueStore(DataMemoryElement(readonly=True))

        self.assertRaises(ReadOnlyError, s.add, 'a', 'b')
        self.assertRaises(ReadOnlyError, s.add, 'foo', None)
Пример #7
0
 def test_save_model_with_readonly_cache(self):
     cache_element = DataMemoryElement(readonly=True)
     bt = SkLearnBallTreeHashIndex(cache_element)
     m = numpy.random.randint(0, 2, 1000 * 256).reshape(1000, 256)
     nose.tools.assert_raises(
         ValueError,
         bt.build_index, m
     )
Пример #8
0
 def test_save_model_with_readonly_cache(self):
     cache_element = DataMemoryElement(readonly=True)
     bt = SkLearnBallTreeHashIndex(cache_element)
     m = np.random.randint(0, 2, 1000 * 256).reshape(1000, 256)
     self.assertRaises(
         ValueError,
         bt._build_bt_internal, m
     )
Пример #9
0
    def test_get_config_with_cache_elements(self):
        itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7,
                         random_seed=8)
        itq.mean_vec_cache_elem = DataMemoryElement(b'cached vec bytes')
        itq.rotation_cache_elem = DataMemoryElement(b'cached rot bytes')

        c = itq.get_config()
        self.assertEqual(c['bit_length'], 5)
        self.assertEqual(c['itq_iterations'], 6)
        self.assertEqual(c['normalize'], 7)
        self.assertEqual(c['random_seed'], 8)
        self.assertEqual(c['mean_vec_cache']['type'], "DataMemoryElement")
        # Check using string encodings of set bytes (JSON compliant).
        self.assertEqual(c['mean_vec_cache']['DataMemoryElement']['bytes'],
                         'cached vec bytes')
        self.assertEqual(c['rotation_cache']['DataMemoryElement']['bytes'],
                         'cached rot bytes')
 def test_new_empty_cache(self):
     """
     Test construction with a cache element set with no bytes (empty).
     """
     c = DataMemoryElement()
     s = MemoryKeyValueStore(c)
     self.assertEqual(s._cache_element, c)
     self.assertEqual(s._table, {})
Пример #11
0
    def test_persistence_with_update_index(self):
        n1 = 100
        n2 = 10
        dim = 8
        set1 = {DescriptorMemoryElement('test', i) for i in range(n1)}
        set2 = {DescriptorMemoryElement('test', i) for i in range(n1, n1 + n2)}
        [d.set_vector(np.random.rand(dim)) for d in (set1 | set2)]

        # Create index with persistent entities
        index_element = DataMemoryElement(
            content_type='application/octet-stream')
        index_param_element = DataMemoryElement(content_type='text/plain')
        index = self._make_inst(index_element=index_element,
                                index_param_element=index_param_element)
        descriptor_set = index._descriptor_set
        idx2uid_kvs = index._idx2uid_kvs
        uid2idx_kvs = index._uid2idx_kvs

        # Build initial index.
        index.build_index(set1)
        self.assertEqual(index.count(), len(set1))
        for d in set1:
            self.assertIn(d, index._descriptor_set)

        # Update and check that all intended descriptors are present in
        # index.
        index.update_index(set2)
        set_all = set1 | set2
        self.assertEqual(index.count(), len(set_all))
        for d in set_all:
            self.assertIn(d, index._descriptor_set)

        del index
        index = self._make_inst(descriptor_set=descriptor_set,
                                idx2uid_kvs=idx2uid_kvs,
                                uid2idx_kvs=uid2idx_kvs,
                                index_element=index_element,
                                index_param_element=index_param_element)

        # Check that NN can return something from the updated set.
        # - nearest element to the query element when the query is in the
        #   index should be the query element.
        for q in set_all:
            n_elems, n_dists = index.nn(q)
            self.assertEqual(n_elems[0], q)
Пример #12
0
    def test_init_with_cache(self):
        expected_map = dict(a=1, b=2, c=3)
        expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_map))

        i = DataMemorySet(expected_cache)

        self.assertEqual(i.cache_element, expected_cache)
        self.assertEqual(i.pickle_protocol, -1)
        self.assertEqual(i._element_map, expected_map)
Пример #13
0
    def test_get_config_with_cache_elements(self):
        itq = ItqFunctor(bit_length=5,
                         itq_iterations=6,
                         normalize=7,
                         random_seed=8)
        itq.mean_vec_cache_elem = DataMemoryElement('cached vec bytes')
        itq.rotation_cache_elem = DataMemoryElement('cached rot bytes')

        c = itq.get_config()
        NT.assert_equal(c['bit_length'], 5)
        NT.assert_equal(c['itq_iterations'], 6)
        NT.assert_equal(c['normalize'], 7)
        NT.assert_equal(c['random_seed'], 8)
        NT.assert_equal(c['mean_vec_cache']['type'], "DataMemoryElement")
        NT.assert_equal(c['mean_vec_cache']['DataMemoryElement']['bytes'],
                        'cached vec bytes')
        NT.assert_equal(c['rotation_cache']['DataMemoryElement']['bytes'],
                        'cached rot bytes')
Пример #14
0
    def test_save_cache(self):
        cache_element = DataMemoryElement()
        nose.tools.assert_true(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        nose.tools.assert_false(cache_element.is_empty())
        nose.tools.assert_true(len(cache_element.get_bytes()) > 0)
Пример #15
0
 def test_init_with_empty_cache(self):
     empty_cache = DataMemoryElement()
     i = SkLearnBallTreeHashIndex(cache_element=empty_cache,
                                  leaf_size=52,
                                  random_seed=42)
     self.assertEqual(i.cache_element, empty_cache)
     self.assertEqual(i.leaf_size, 52)
     self.assertEqual(i.random_seed, 42)
     self.assertIsNone(i.bt)
Пример #16
0
    def test_cache_table_empty_table(self):
        inst = MemoryDescriptorSet(DataMemoryElement(), -1)
        inst._table = {}
        expected_table_pickle_bytes = pickle.dumps(inst._table, -1)

        inst.cache_table()
        self.assertIsNotNone(inst.cache_element)
        self.assertEqual(inst.cache_element.get_bytes(),
                         expected_table_pickle_bytes)
Пример #17
0
 def test_load_as_matrix_empty_data(self):
     """
     Test that we catch and do not load an empty data element.
     """
     empty_de = DataMemoryElement(readonly=True, content_type='image/png')
     assert empty_de.is_empty()
     msg = "GdalImageReader cannot load 0-sized data"
     with pytest.raises(ValueError, match=msg):
         GdalImageReader().load_as_matrix(empty_de)
Пример #18
0
    def test_cache_table_empty_table(self):
        inst = MemoryDescriptorIndex(DataMemoryElement(), -1)
        inst._table = {}
        expected_table_pickle_bytes = pickle.dumps(inst._table, -1)

        inst.cache_table()
        ntools.assert_is_not_none(inst.cache_element)
        ntools.assert_equal(inst.cache_element.get_bytes(),
                            expected_table_pickle_bytes)
Пример #19
0
 def test_configuration(self):
     inst = DataMemoryElement(
         bytes=b'Hello World.',
         content_type='text/plain',
         readonly=True,
     )
     for i in configuration_test_helper(inst):  # type: DataMemoryElement
         assert i._bytes == b'Hello World.'
         assert i._content_type == 'text/plain'
         assert i._readonly is True
 def test_repr_simple_cache(self):
     """
     Test representational string when a cache element is set.
     """
     c = DataMemoryElement()
     s = MemoryKeyValueStore(c)
     expected_repr = "<MemoryKeyValueStore cache_element: " \
                     "DataMemoryElement{len(bytes): 0, content_type: " \
                     "None, readonly: False}>"
     self.assertEqual(repr(s), expected_repr)
Пример #21
0
    def test_add_with_caching(self):
        s = MemoryKeyValueStore()
        s._cache_element = DataMemoryElement()

        expected_cache_dict = {'a': 'b', 'foo': None, 0: 89}

        s.add('a', 'b')
        s.add('foo', None)
        s.add(0, 89)
        nose.tools.assert_equal(pickle.loads(s._cache_element.get_bytes()),
                                expected_cache_dict)
Пример #22
0
    def test_get_config(self):
        i = LinearHashIndex()

        # Without cache element
        expected_c = LinearHashIndex.get_default_config()
        nose.tools.assert_equal(i.get_config(), expected_c)

        # With cache element
        i.cache_element = DataMemoryElement()
        expected_c['cache_element']['type'] = 'DataMemoryElement'
        nose.tools.assert_equal(i.get_config(), expected_c)
Пример #23
0
    def test_init_with_cache(self):
        d_list = (random_descriptor(), random_descriptor(),
                  random_descriptor(), random_descriptor())
        expected_table = dict((r.uuid(), r) for r in d_list)
        expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table))

        inst = MemoryDescriptorSet(expected_cache)
        self.assertEqual(len(inst._table), 4)
        self.assertEqual(inst.cache_element, expected_cache)
        self.assertEqual(inst._table, expected_table)
        self.assertEqual(set(inst._table.values()), set(d_list))
Пример #24
0
 def test_load_as_matrix_no_bytes(self):
     """
     Test that a data element with no bytes fails to load.
     """
     d = DataMemoryElement(content_type='image/png')
     # Not initializing any bytes
     inst = PilImageReader()
     with pytest.raises(IOError,
                        match="Failed to identify image from bytes "
                        "provided by DataMemoryElement"):
         inst.load_as_matrix(d)
Пример #25
0
    def test_load_cache(self):
        cache_element = DataMemoryElement()
        i1 = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i1.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])

        # load called on initialization.
        i2 = LinearHashIndex(cache_element)

        nose.tools.assert_equal(i1.cache_element, i2.cache_element)
        nose.tools.assert_equal(i1.index, i2.index)
Пример #26
0
    def test_save_cache_build_index(self):
        cache_element = DataMemoryElement()
        self.assertTrue(cache_element.is_empty())

        i = LinearHashIndex(cache_element)
        # noinspection PyTypeChecker
        i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]])
        self.assertFalse(cache_element.is_empty())
        # Check byte content
        expected_cache = {1, 2, 3, 4}
        actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes())))
        self.assertSetEqual(expected_cache, actual_cache)
Пример #27
0
    def test_get_config(self):
        i = LinearHashIndex()

        # Without cache element
        expected_c = LinearHashIndex.get_default_config()
        self.assertEqual(i.get_config(), expected_c)

        # With cache element
        i.cache_element = DataMemoryElement()
        expected_c['cache_element'][
            'type'] = 'smqtk.representation.data_element.memory_element.DataMemoryElement'
        self.assertEqual(i.get_config(), expected_c)
Пример #28
0
    def test_load_as_matrix_invalid_bytes(self):
        """
        Test that data element with invalid data bytes fails to load.
        """
        d = DataMemoryElement(content_type='image/png')
        d.set_bytes(b"not valid bytes")

        inst = PilImageReader()
        with pytest.raises(IOError,
                           match="Failed to identify image from bytes "
                           "provided by DataMemoryElement"):
            inst.load_as_matrix(d)
Пример #29
0
    def test_save_model_with_read_only_cache(self):
        # If one or both cache elements are read-only, no saving.
        expected_mean_vec = numpy.array([1, 2, 3])
        expected_rotation = numpy.eye(3)

        itq = ItqFunctor()
        itq.mean_vec = expected_mean_vec
        itq.rotation = expected_rotation

        # read-only mean-vec cache
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=True)
        itq.rotation_cache_elem = DataMemoryElement(readonly=False)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b(''))
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))

        # read-only rotation cache
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=False)
        itq.rotation_cache_elem = DataMemoryElement(readonly=True)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b(''))
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))

        # Both read-only
        itq.mean_vec_cache_elem = DataMemoryElement(readonly=True)
        itq.rotation_cache_elem = DataMemoryElement(readonly=True)
        itq.save_model()
        self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b(''))
        self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))
Пример #30
0
    def dl_image(meta):
        try:
            c_type = meta['fields']['content_type'][0]
            obj_stored_url = meta['fields']['obj_stored_url'][0]
            obj_original_url = meta['fields']['obj_original_url'][0]

            c_ext = m.guess_extension(c_type, strict=False)
            if c_ext is None:
                log.warn(
                    "Guessed 'None' extension for content-type '%s', "
                    "skipping.", c_type)
                return None

            save_dir = os.path.abspath(
                os.path.expanduser(
                    os.path.join(output_dir, meta['index'], meta['doc_type'])))
            save_file = meta['id'] + c_ext
            save_path = os.path.join(save_dir, save_file)

            # Save/write file if needed
            if not os.path.isfile(save_path):
                # First try 'stored' url, fallback on original
                # Return None if failed to download anything
                ok, r = try_download(obj_stored_url, stored_http_auth)
                if not ok:
                    log.warn(
                        "Failed to download stored-data URL \"%s\" "
                        "(error=%s)", obj_stored_url, str(r))

                    ok, r = try_download(obj_original_url)
                    if not ok:
                        log.warn(
                            "Failed to download original URL \"%s\" "
                            "(error=%s)", obj_stored_url, str(r))
                        return None

                # Assuming OK at this point
                content = r.content

                d = DataMemoryElement(content, c_type)

                safe_create_dir(save_dir)
                with open(save_path, 'wb') as out:
                    log.debug("Saving to file: '%s'", save_path)
                    out.write(content)
            else:
                d = DataFileElement(save_path)

            return meta['id'], save_path, d.uuid()
        except KeyError, ex:
            log.error("Failed to find key %s in meta block: %s", str(ex), meta)
            raise