def test_add_many_readonly(self): """ Test that we can't add many on a read-only instance. """ s = MemoryKeyValueStore(DataMemoryElement(readonly=True)) self.assertRaises( ReadOnlyError, s.add_many, {0: 1} )
def test_add_data(self): de = DataMemoryElement(six.b('some bytes'), 'text/plain', True) expected_map = {de.uuid(): de} dms = DataMemorySet() dms.add_data(de) ntools.assert_equal(dms._element_map, expected_map)
def test_set_bytes(self): bytes_a = six.b('test bytes first set') bytes_b = six.b('the second set of bytes') e = DataMemoryElement(bytes_a) self.assertEqual(e.get_bytes(), bytes_a) e.set_bytes(bytes_b) self.assertEqual(e.get_bytes(), bytes_b)
def test_added_descriptor_table_caching(self): cache_elem = DataMemoryElement(readonly=False) descrs = [random_descriptor() for _ in range(3)] expected_table = dict((r.uuid(), r) for r in descrs) i = MemoryDescriptorIndex(cache_elem) self.assertTrue(cache_elem.is_empty()) # Should add descriptors to table, caching to writable element. i.add_many_descriptors(descrs) self.assertFalse(cache_elem.is_empty()) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table) # Changing the internal table (remove, add) it should reflect in # cache new_d = random_descriptor() expected_table[new_d.uuid()] = new_d i.add_descriptor(new_d) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table) rm_d = list(expected_table.values())[0] del expected_table[rm_d.uuid()] i.remove_descriptor(rm_d.uuid()) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table)
def test_set_bytes_when_readonly(self): bytes_a = six.b('test bytes first set') bytes_b = six.b('the second set of bytes') e = DataMemoryElement(bytes_a, readonly=True) self.assertEqual(e.get_bytes(), bytes_a) self.assertRaises(ReadOnlyError, e.set_bytes, bytes_b) self.assertEqual(e.get_bytes(), bytes_a)
def test_add_read_only(self): """ Test that we cannot add when read-only (based on cache element). """ s = MemoryKeyValueStore(DataMemoryElement(readonly=True)) self.assertRaises(ReadOnlyError, s.add, 'a', 'b') self.assertRaises(ReadOnlyError, s.add, 'foo', None)
def test_save_model_with_readonly_cache(self): cache_element = DataMemoryElement(readonly=True) bt = SkLearnBallTreeHashIndex(cache_element) m = numpy.random.randint(0, 2, 1000 * 256).reshape(1000, 256) nose.tools.assert_raises( ValueError, bt.build_index, m )
def test_save_model_with_readonly_cache(self): cache_element = DataMemoryElement(readonly=True) bt = SkLearnBallTreeHashIndex(cache_element) m = np.random.randint(0, 2, 1000 * 256).reshape(1000, 256) self.assertRaises( ValueError, bt._build_bt_internal, m )
def test_get_config_with_cache_elements(self): itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7, random_seed=8) itq.mean_vec_cache_elem = DataMemoryElement(b'cached vec bytes') itq.rotation_cache_elem = DataMemoryElement(b'cached rot bytes') c = itq.get_config() self.assertEqual(c['bit_length'], 5) self.assertEqual(c['itq_iterations'], 6) self.assertEqual(c['normalize'], 7) self.assertEqual(c['random_seed'], 8) self.assertEqual(c['mean_vec_cache']['type'], "DataMemoryElement") # Check using string encodings of set bytes (JSON compliant). self.assertEqual(c['mean_vec_cache']['DataMemoryElement']['bytes'], 'cached vec bytes') self.assertEqual(c['rotation_cache']['DataMemoryElement']['bytes'], 'cached rot bytes')
def test_new_empty_cache(self): """ Test construction with a cache element set with no bytes (empty). """ c = DataMemoryElement() s = MemoryKeyValueStore(c) self.assertEqual(s._cache_element, c) self.assertEqual(s._table, {})
def test_persistence_with_update_index(self): n1 = 100 n2 = 10 dim = 8 set1 = {DescriptorMemoryElement('test', i) for i in range(n1)} set2 = {DescriptorMemoryElement('test', i) for i in range(n1, n1 + n2)} [d.set_vector(np.random.rand(dim)) for d in (set1 | set2)] # Create index with persistent entities index_element = DataMemoryElement( content_type='application/octet-stream') index_param_element = DataMemoryElement(content_type='text/plain') index = self._make_inst(index_element=index_element, index_param_element=index_param_element) descriptor_set = index._descriptor_set idx2uid_kvs = index._idx2uid_kvs uid2idx_kvs = index._uid2idx_kvs # Build initial index. index.build_index(set1) self.assertEqual(index.count(), len(set1)) for d in set1: self.assertIn(d, index._descriptor_set) # Update and check that all intended descriptors are present in # index. index.update_index(set2) set_all = set1 | set2 self.assertEqual(index.count(), len(set_all)) for d in set_all: self.assertIn(d, index._descriptor_set) del index index = self._make_inst(descriptor_set=descriptor_set, idx2uid_kvs=idx2uid_kvs, uid2idx_kvs=uid2idx_kvs, index_element=index_element, index_param_element=index_param_element) # Check that NN can return something from the updated set. # - nearest element to the query element when the query is in the # index should be the query element. for q in set_all: n_elems, n_dists = index.nn(q) self.assertEqual(n_elems[0], q)
def test_init_with_cache(self): expected_map = dict(a=1, b=2, c=3) expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_map)) i = DataMemorySet(expected_cache) self.assertEqual(i.cache_element, expected_cache) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, expected_map)
def test_get_config_with_cache_elements(self): itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7, random_seed=8) itq.mean_vec_cache_elem = DataMemoryElement('cached vec bytes') itq.rotation_cache_elem = DataMemoryElement('cached rot bytes') c = itq.get_config() NT.assert_equal(c['bit_length'], 5) NT.assert_equal(c['itq_iterations'], 6) NT.assert_equal(c['normalize'], 7) NT.assert_equal(c['random_seed'], 8) NT.assert_equal(c['mean_vec_cache']['type'], "DataMemoryElement") NT.assert_equal(c['mean_vec_cache']['DataMemoryElement']['bytes'], 'cached vec bytes') NT.assert_equal(c['rotation_cache']['DataMemoryElement']['bytes'], 'cached rot bytes')
def test_save_cache(self): cache_element = DataMemoryElement() nose.tools.assert_true(cache_element.is_empty()) i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) nose.tools.assert_false(cache_element.is_empty()) nose.tools.assert_true(len(cache_element.get_bytes()) > 0)
def test_init_with_empty_cache(self): empty_cache = DataMemoryElement() i = SkLearnBallTreeHashIndex(cache_element=empty_cache, leaf_size=52, random_seed=42) self.assertEqual(i.cache_element, empty_cache) self.assertEqual(i.leaf_size, 52) self.assertEqual(i.random_seed, 42) self.assertIsNone(i.bt)
def test_cache_table_empty_table(self): inst = MemoryDescriptorSet(DataMemoryElement(), -1) inst._table = {} expected_table_pickle_bytes = pickle.dumps(inst._table, -1) inst.cache_table() self.assertIsNotNone(inst.cache_element) self.assertEqual(inst.cache_element.get_bytes(), expected_table_pickle_bytes)
def test_load_as_matrix_empty_data(self): """ Test that we catch and do not load an empty data element. """ empty_de = DataMemoryElement(readonly=True, content_type='image/png') assert empty_de.is_empty() msg = "GdalImageReader cannot load 0-sized data" with pytest.raises(ValueError, match=msg): GdalImageReader().load_as_matrix(empty_de)
def test_cache_table_empty_table(self): inst = MemoryDescriptorIndex(DataMemoryElement(), -1) inst._table = {} expected_table_pickle_bytes = pickle.dumps(inst._table, -1) inst.cache_table() ntools.assert_is_not_none(inst.cache_element) ntools.assert_equal(inst.cache_element.get_bytes(), expected_table_pickle_bytes)
def test_configuration(self): inst = DataMemoryElement( bytes=b'Hello World.', content_type='text/plain', readonly=True, ) for i in configuration_test_helper(inst): # type: DataMemoryElement assert i._bytes == b'Hello World.' assert i._content_type == 'text/plain' assert i._readonly is True
def test_repr_simple_cache(self): """ Test representational string when a cache element is set. """ c = DataMemoryElement() s = MemoryKeyValueStore(c) expected_repr = "<MemoryKeyValueStore cache_element: " \ "DataMemoryElement{len(bytes): 0, content_type: " \ "None, readonly: False}>" self.assertEqual(repr(s), expected_repr)
def test_add_with_caching(self): s = MemoryKeyValueStore() s._cache_element = DataMemoryElement() expected_cache_dict = {'a': 'b', 'foo': None, 0: 89} s.add('a', 'b') s.add('foo', None) s.add(0, 89) nose.tools.assert_equal(pickle.loads(s._cache_element.get_bytes()), expected_cache_dict)
def test_get_config(self): i = LinearHashIndex() # Without cache element expected_c = LinearHashIndex.get_default_config() nose.tools.assert_equal(i.get_config(), expected_c) # With cache element i.cache_element = DataMemoryElement() expected_c['cache_element']['type'] = 'DataMemoryElement' nose.tools.assert_equal(i.get_config(), expected_c)
def test_init_with_cache(self): d_list = (random_descriptor(), random_descriptor(), random_descriptor(), random_descriptor()) expected_table = dict((r.uuid(), r) for r in d_list) expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table)) inst = MemoryDescriptorSet(expected_cache) self.assertEqual(len(inst._table), 4) self.assertEqual(inst.cache_element, expected_cache) self.assertEqual(inst._table, expected_table) self.assertEqual(set(inst._table.values()), set(d_list))
def test_load_as_matrix_no_bytes(self): """ Test that a data element with no bytes fails to load. """ d = DataMemoryElement(content_type='image/png') # Not initializing any bytes inst = PilImageReader() with pytest.raises(IOError, match="Failed to identify image from bytes " "provided by DataMemoryElement"): inst.load_as_matrix(d)
def test_load_cache(self): cache_element = DataMemoryElement() i1 = LinearHashIndex(cache_element) # noinspection PyTypeChecker i1.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) # load called on initialization. i2 = LinearHashIndex(cache_element) nose.tools.assert_equal(i1.cache_element, i2.cache_element) nose.tools.assert_equal(i1.index, i2.index)
def test_save_cache_build_index(self): cache_element = DataMemoryElement() self.assertTrue(cache_element.is_empty()) i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) self.assertFalse(cache_element.is_empty()) # Check byte content expected_cache = {1, 2, 3, 4} actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes()))) self.assertSetEqual(expected_cache, actual_cache)
def test_get_config(self): i = LinearHashIndex() # Without cache element expected_c = LinearHashIndex.get_default_config() self.assertEqual(i.get_config(), expected_c) # With cache element i.cache_element = DataMemoryElement() expected_c['cache_element'][ 'type'] = 'smqtk.representation.data_element.memory_element.DataMemoryElement' self.assertEqual(i.get_config(), expected_c)
def test_load_as_matrix_invalid_bytes(self): """ Test that data element with invalid data bytes fails to load. """ d = DataMemoryElement(content_type='image/png') d.set_bytes(b"not valid bytes") inst = PilImageReader() with pytest.raises(IOError, match="Failed to identify image from bytes " "provided by DataMemoryElement"): inst.load_as_matrix(d)
def test_save_model_with_read_only_cache(self): # If one or both cache elements are read-only, no saving. expected_mean_vec = numpy.array([1, 2, 3]) expected_rotation = numpy.eye(3) itq = ItqFunctor() itq.mean_vec = expected_mean_vec itq.rotation = expected_rotation # read-only mean-vec cache itq.mean_vec_cache_elem = DataMemoryElement(readonly=True) itq.rotation_cache_elem = DataMemoryElement(readonly=False) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b('')) self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b('')) # read-only rotation cache itq.mean_vec_cache_elem = DataMemoryElement(readonly=False) itq.rotation_cache_elem = DataMemoryElement(readonly=True) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b('')) self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b('')) # Both read-only itq.mean_vec_cache_elem = DataMemoryElement(readonly=True) itq.rotation_cache_elem = DataMemoryElement(readonly=True) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), six.b('')) self.assertEqual(itq.rotation_cache_elem.get_bytes(), six.b(''))
def dl_image(meta): try: c_type = meta['fields']['content_type'][0] obj_stored_url = meta['fields']['obj_stored_url'][0] obj_original_url = meta['fields']['obj_original_url'][0] c_ext = m.guess_extension(c_type, strict=False) if c_ext is None: log.warn( "Guessed 'None' extension for content-type '%s', " "skipping.", c_type) return None save_dir = os.path.abspath( os.path.expanduser( os.path.join(output_dir, meta['index'], meta['doc_type']))) save_file = meta['id'] + c_ext save_path = os.path.join(save_dir, save_file) # Save/write file if needed if not os.path.isfile(save_path): # First try 'stored' url, fallback on original # Return None if failed to download anything ok, r = try_download(obj_stored_url, stored_http_auth) if not ok: log.warn( "Failed to download stored-data URL \"%s\" " "(error=%s)", obj_stored_url, str(r)) ok, r = try_download(obj_original_url) if not ok: log.warn( "Failed to download original URL \"%s\" " "(error=%s)", obj_stored_url, str(r)) return None # Assuming OK at this point content = r.content d = DataMemoryElement(content, c_type) safe_create_dir(save_dir) with open(save_path, 'wb') as out: log.debug("Saving to file: '%s'", save_path) out.write(content) else: d = DataFileElement(save_path) return meta['id'], save_path, d.uuid() except KeyError, ex: log.error("Failed to find key %s in meta block: %s", str(ex), meta) raise