class SourceImageStorage: def __init__(self, flush_data=False): logger = logging.getLogger(__name__) logger.info("Init SourceImageStorage") self._storage_service = ImageService(storage_params=StorageParameters( **config.FILE_SERVICE_PARAMETERS)) self._es = ElasticSearchDriver( index=config.ELASTIC_SOURCE_IMAGES_INDEX, doc_type=config.ELASTIC_SOURCE_IMAGES_TYPE, flush_data=flush_data) def save_source_image(self, image: bytes, metadata: SourceImageMetadata) -> str: logger = logging.getLogger(__name__) logger.info("Saving source image with metadata {}".format(metadata)) id_cached = self._storage_service.put_encoded(image) source_image_elastic_id = self._es.index( create_doc(metadata, id_cached)) return source_image_elastic_id def get_metadata_by_id(self, image_id: str) -> SourceImageMetadata: logger = logging.getLogger(__name__) logger.info("loading image with id {}".format(image_id)) raw_doc = self._es.get_doc(image_id) metadata = SourceImageMetadata(path=raw_doc[DOC_FIELD_IMAGE_URL]) return metadata
def test_put_and_get(self, unique_temp_index): driver = ElasticSearchDriver(unique_temp_index, "some-doc-type") id_ = driver.index(self.doc1) doc = driver.get_doc(id_) assert doc == self.doc1
class RegionRepository: def __init__(self, descriptor_shape: Iterable[int], flush_data=False): self._es = ElasticSearchDriver(index=config.ELASTIC_DESCRIPTOR_INDEX, doc_type=config.ELASTIC_DESCRIPTOR_TYPE, flush_data=flush_data) self._search_terms_creator = SearchTermsCreator(descriptor_shape) def save(self, image_region: ImageRegion, reference_to_source: str) -> str: doc = self._create_doc(image_region, reference_to_source) image_region_elastic_id = self._es.index(doc) return image_region_elastic_id def find(self, descriptor: Descriptor) -> List[SearchResult]: words = self._get_words(descriptor) results = self._es.search_by_words(words, list(words.keys())) return results def _create_doc(self, image_region: ImageRegion, reference_to_source: str) -> Dict[str, object]: quantized_words = self._search_terms_creator.get_dictionary_of_words(image_region.descriptor) base = {SearchResult.FIELD_SOURCE_ID: reference_to_source, SearchResult.FIELD_DESCRIPTOR: image_region.descriptor.vector_as_lists} return dict(**base, **quantized_words) def _get_words(self, descriptor: Descriptor) -> Dict[str, object]: return self._search_terms_creator.get_dictionary_of_words(descriptor)
def __init__(self, flush_data=False): logger = logging.getLogger(__name__) logger.info("Init SourceImageStorage") self._storage_service = ImageService(storage_params=StorageParameters( **config.FILE_SERVICE_PARAMETERS)) self._es = ElasticSearchDriver( index=config.ELASTIC_SOURCE_IMAGES_INDEX, doc_type=config.ELASTIC_SOURCE_IMAGES_TYPE, flush_data=flush_data)
def test_search_by_words_on_empty_index(self, unique_temp_index): driver = ElasticSearchDriver(unique_temp_index, "some-doc-type") search_results = driver.search_by_words( { "word1": "value1", "word2": "value2" }, ["word1", "word2", "word3"]) assert search_results == []
def test_search_by_words_on_non_existent_index(self, index_name): driver = ElasticSearchDriver(index_name, "some-doc-type") search_results = driver.search_by_words( { "word1": "value1", "word2": "value2" }, ["word1", "word2", "word3"]) assert search_results == []
def test_index_with_flush_works(self): # noinspection PyTypeChecker driver_with_flush = ElasticSearchDriver(index, doc_type, self.mocked_elastic, True) self.mocked_elastic.index.return_value = mocked_index_response result = driver_with_flush.index(doc) assert result is mocked_id self.mocked_elastic.index.assert_has_calls([ call(index=index, doc_type=doc_type, body=doc, refresh='wait_for') ])
def test_search_by_words_works(self, unique_temp_index): driver = ElasticSearchDriver(unique_temp_index, "some-doc-type") driver.index(self.doc1) driver.index(self.doc2) for attempt in range(10): search_results = driver.search_by_words( {"word2": "value2shared"}, ["word1", "word2", "word3"]) time.sleep(1) if len(search_results) == 2: break else: assert False, "Unable to fetch results in a reasonable time " assert sorted(search_results, key=lambda x: x.source_id) == \ sorted([SearchResult(self.payload1), SearchResult(self.payload2)], key=lambda x: x.source_id)
def test_default_driver(self, mocked_elastic): elastic_client = mock.MagicMock() elastic_client.indices.create.return_value = '' mocked_elastic.return_value = elastic_client driver = ElasticSearchDriver(index, doc_type) assert driver._es is mocked_elastic.return_value elastic_client.indices.create.assert_called_once_with(index=index)
def test_constructor_with_existing_index(self, mocked_elastic): elastic_client = mock.MagicMock() elastic_client.indices.create.side_effect = [ TransportError(234234, 'index_already_exists_exception', 'bla') ] mocked_elastic.return_value = elastic_client ElasticSearchDriver(index, doc_type) elastic_client.indices.create.assert_called_once_with(index=index)
def test_failing_index_creation(self, mocked_elastic): elastic_client = mock.MagicMock() elastic_client.indices.create.side_effect = [ TransportError(500, 'unknown', 'bla') ] mocked_elastic.return_value = elastic_client with pytest.raises(TransportError): ElasticSearchDriver(index, doc_type) elastic_client.indices.create.assert_called_once_with(index=index)
class TestElasticSearchDriver: mocked_elastic = mock.MagicMock(Elasticsearch, indices=mock.MagicMock()) # noinspection PyTypeChecker driver = ElasticSearchDriver(index, doc_type, mocked_elastic) @mock.patch('impl.storage.elastic_search_driver.Elasticsearch', spec=True) def test_default_driver(self, mocked_elastic): elastic_client = mock.MagicMock() elastic_client.indices.create.return_value = '' mocked_elastic.return_value = elastic_client driver = ElasticSearchDriver(index, doc_type) assert driver._es is mocked_elastic.return_value elastic_client.indices.create.assert_called_once_with(index=index) @mock.patch('impl.storage.elastic_search_driver.Elasticsearch', spec=True) def test_failing_index_creation(self, mocked_elastic): elastic_client = mock.MagicMock() elastic_client.indices.create.side_effect = [ TransportError(500, 'unknown', 'bla') ] mocked_elastic.return_value = elastic_client with pytest.raises(TransportError): ElasticSearchDriver(index, doc_type) elastic_client.indices.create.assert_called_once_with(index=index) @mock.patch('impl.storage.elastic_search_driver.Elasticsearch', spec=True) def test_constructor_with_existing_index(self, mocked_elastic): elastic_client = mock.MagicMock() elastic_client.indices.create.side_effect = [ TransportError(234234, 'index_already_exists_exception', 'bla') ] mocked_elastic.return_value = elastic_client ElasticSearchDriver(index, doc_type) elastic_client.indices.create.assert_called_once_with(index=index) def test_non_default_driver(self): assert self.driver._es is self.mocked_elastic def test_index_working(self): self.mocked_elastic.index.return_value = mocked_index_response result = self.driver.index(doc) assert result is mocked_id self.mocked_elastic.index.assert_called_once_with(index=index, doc_type=doc_type, body=doc, refresh=False) def test_index_with_flush_works(self): # noinspection PyTypeChecker driver_with_flush = ElasticSearchDriver(index, doc_type, self.mocked_elastic, True) self.mocked_elastic.index.return_value = mocked_index_response result = driver_with_flush.index(doc) assert result is mocked_id self.mocked_elastic.index.assert_has_calls([ call(index=index, doc_type=doc_type, body=doc, refresh='wait_for') ]) def test_index_with_exception(self): self.mocked_elastic.index.side_effect = ConnectionError with pytest.raises(ElasticSearchDriverException): self.driver.index(doc) def test_index_with_wrong_response(self): self.mocked_elastic.index.return_value = wrong_index_response with pytest.raises(ElasticSearchDriverException): self.driver.index(doc) def test_get_doc_working(self): self.mocked_elastic.get.return_value = mocked_get_response result = self.driver.get_doc(mocked_id) assert result == doc self.mocked_elastic.get.assert_called_once_with(index=index, doc_type=doc_type, id=mocked_id) def test_get_doc_with_exception(self): self.mocked_elastic.get.side_effect = ConnectionError with pytest.raises(ElasticSearchDriverException): self.driver.get_doc(mocked_id) def test_get_doc_with_wrong_response(self): self.mocked_elastic.get.return_value = wrong_get_response with pytest.raises(ElasticSearchDriverException): self.driver.get_doc(mocked_id) def test_search_by_words(self): mocked_elastic_with_flexible_signature = mock.MagicMock() # noinspection PyTypeChecker driver_with_flexible_signature = ElasticSearchDriver( index, doc_type, mocked_elastic_with_flexible_signature) expected_result1 = { SearchResult.FIELD_DESCRIPTOR: [1], SearchResult.FIELD_SOURCE_ID: "some_id1" } expected_result2 = { SearchResult.FIELD_DESCRIPTOR: [2], SearchResult.FIELD_SOURCE_ID: "some_id2" } mocked_elastic_with_flexible_signature.search.return_value = { "something": 123, "hits": { "something-more": 3245, "hits": [ { '_source': expected_result1 }, { '_source': expected_result2 }, ] } } word1 = "word1" value1 = "value1" word2 = "word2" exclude_words = ["exclude1", "exclude2"] value2 = 5656 size = 6556 result = driver_with_flexible_signature.search_by_words( { word1: value1, word2: value2 }, exclude_words, size) assert result == [ SearchResult(x) for x in [expected_result1, expected_result2] ] mocked_elastic_with_flexible_signature.search.assert_called_once_with( index=index, doc_type=doc_type, body={ 'query': { 'bool': { 'should': ListInAnyOrder([ { "term": { word1: value1 } }, { "term": { word2: value2 } }, ]) } }, '_source': { 'excludes': exclude_words } }, size=size, timeout='10s')
def test_search_by_words(self): mocked_elastic_with_flexible_signature = mock.MagicMock() # noinspection PyTypeChecker driver_with_flexible_signature = ElasticSearchDriver( index, doc_type, mocked_elastic_with_flexible_signature) expected_result1 = { SearchResult.FIELD_DESCRIPTOR: [1], SearchResult.FIELD_SOURCE_ID: "some_id1" } expected_result2 = { SearchResult.FIELD_DESCRIPTOR: [2], SearchResult.FIELD_SOURCE_ID: "some_id2" } mocked_elastic_with_flexible_signature.search.return_value = { "something": 123, "hits": { "something-more": 3245, "hits": [ { '_source': expected_result1 }, { '_source': expected_result2 }, ] } } word1 = "word1" value1 = "value1" word2 = "word2" exclude_words = ["exclude1", "exclude2"] value2 = 5656 size = 6556 result = driver_with_flexible_signature.search_by_words( { word1: value1, word2: value2 }, exclude_words, size) assert result == [ SearchResult(x) for x in [expected_result1, expected_result2] ] mocked_elastic_with_flexible_signature.search.assert_called_once_with( index=index, doc_type=doc_type, body={ 'query': { 'bool': { 'should': ListInAnyOrder([ { "term": { word1: value1 } }, { "term": { word2: value2 } }, ]) } }, '_source': { 'excludes': exclude_words } }, size=size, timeout='10s')
def __init__(self, descriptor_shape: Iterable[int], flush_data=False): self._es = ElasticSearchDriver(index=config.ELASTIC_DESCRIPTOR_INDEX, doc_type=config.ELASTIC_DESCRIPTOR_TYPE, flush_data=flush_data) self._search_terms_creator = SearchTermsCreator(descriptor_shape)
def test_get_non_existent(self, unique_temp_index): driver = ElasticSearchDriver(unique_temp_index, "some-doc-type") with pytest.raises(ElasticSearchDriverException): driver.get_doc("id that doesn't exist")