class SourceImageStorage:
    def __init__(self, flush_data=False):
        logger = logging.getLogger(__name__)
        logger.info("Init SourceImageStorage")

        self._storage_service = ImageService(storage_params=StorageParameters(
            **config.FILE_SERVICE_PARAMETERS))
        self._es = ElasticSearchDriver(
            index=config.ELASTIC_SOURCE_IMAGES_INDEX,
            doc_type=config.ELASTIC_SOURCE_IMAGES_TYPE,
            flush_data=flush_data)

    def save_source_image(self, image: bytes,
                          metadata: SourceImageMetadata) -> str:
        logger = logging.getLogger(__name__)
        logger.info("Saving source image with metadata {}".format(metadata))

        id_cached = self._storage_service.put_encoded(image)
        source_image_elastic_id = self._es.index(
            create_doc(metadata, id_cached))
        return source_image_elastic_id

    def get_metadata_by_id(self, image_id: str) -> SourceImageMetadata:
        logger = logging.getLogger(__name__)
        logger.info("loading image with id {}".format(image_id))
        raw_doc = self._es.get_doc(image_id)
        metadata = SourceImageMetadata(path=raw_doc[DOC_FIELD_IMAGE_URL])

        return metadata
示例#2
0
    def test_put_and_get(self, unique_temp_index):
        driver = ElasticSearchDriver(unique_temp_index, "some-doc-type")

        id_ = driver.index(self.doc1)
        doc = driver.get_doc(id_)

        assert doc == self.doc1
示例#3
0
class RegionRepository:
    def __init__(self, descriptor_shape: Iterable[int], flush_data=False):
        self._es = ElasticSearchDriver(index=config.ELASTIC_DESCRIPTOR_INDEX,
                                       doc_type=config.ELASTIC_DESCRIPTOR_TYPE,
                                       flush_data=flush_data)

        self._search_terms_creator = SearchTermsCreator(descriptor_shape)

    def save(self, image_region: ImageRegion, reference_to_source: str) -> str:
        doc = self._create_doc(image_region, reference_to_source)
        image_region_elastic_id = self._es.index(doc)
        return image_region_elastic_id

    def find(self, descriptor: Descriptor) -> List[SearchResult]:
        words = self._get_words(descriptor)
        results = self._es.search_by_words(words, list(words.keys()))
        return results

    def _create_doc(self, image_region: ImageRegion, reference_to_source: str) -> Dict[str, object]:
        quantized_words = self._search_terms_creator.get_dictionary_of_words(image_region.descriptor)
        base = {SearchResult.FIELD_SOURCE_ID: reference_to_source,
                SearchResult.FIELD_DESCRIPTOR: image_region.descriptor.vector_as_lists}
        return dict(**base, **quantized_words)

    def _get_words(self, descriptor: Descriptor) -> Dict[str, object]:
        return self._search_terms_creator.get_dictionary_of_words(descriptor)
    def __init__(self, flush_data=False):
        logger = logging.getLogger(__name__)
        logger.info("Init SourceImageStorage")

        self._storage_service = ImageService(storage_params=StorageParameters(
            **config.FILE_SERVICE_PARAMETERS))
        self._es = ElasticSearchDriver(
            index=config.ELASTIC_SOURCE_IMAGES_INDEX,
            doc_type=config.ELASTIC_SOURCE_IMAGES_TYPE,
            flush_data=flush_data)
示例#5
0
    def test_search_by_words_on_empty_index(self, unique_temp_index):
        driver = ElasticSearchDriver(unique_temp_index, "some-doc-type")

        search_results = driver.search_by_words(
            {
                "word1": "value1",
                "word2": "value2"
            }, ["word1", "word2", "word3"])

        assert search_results == []
示例#6
0
    def test_search_by_words_on_non_existent_index(self, index_name):
        driver = ElasticSearchDriver(index_name, "some-doc-type")

        search_results = driver.search_by_words(
            {
                "word1": "value1",
                "word2": "value2"
            }, ["word1", "word2", "word3"])

        assert search_results == []
示例#7
0
    def test_index_with_flush_works(self):
        # noinspection PyTypeChecker
        driver_with_flush = ElasticSearchDriver(index, doc_type,
                                                self.mocked_elastic, True)
        self.mocked_elastic.index.return_value = mocked_index_response

        result = driver_with_flush.index(doc)

        assert result is mocked_id
        self.mocked_elastic.index.assert_has_calls([
            call(index=index, doc_type=doc_type, body=doc, refresh='wait_for')
        ])
示例#8
0
    def test_search_by_words_works(self, unique_temp_index):
        driver = ElasticSearchDriver(unique_temp_index, "some-doc-type")
        driver.index(self.doc1)
        driver.index(self.doc2)

        for attempt in range(10):
            search_results = driver.search_by_words(
                {"word2": "value2shared"}, ["word1", "word2", "word3"])
            time.sleep(1)
            if len(search_results) == 2:
                break
        else:
            assert False, "Unable to fetch results in a reasonable time "

        assert sorted(search_results, key=lambda x: x.source_id) == \
               sorted([SearchResult(self.payload1), SearchResult(self.payload2)], key=lambda x: x.source_id)
示例#9
0
    def test_default_driver(self, mocked_elastic):
        elastic_client = mock.MagicMock()
        elastic_client.indices.create.return_value = ''
        mocked_elastic.return_value = elastic_client

        driver = ElasticSearchDriver(index, doc_type)

        assert driver._es is mocked_elastic.return_value
        elastic_client.indices.create.assert_called_once_with(index=index)
示例#10
0
    def test_constructor_with_existing_index(self, mocked_elastic):
        elastic_client = mock.MagicMock()
        elastic_client.indices.create.side_effect = [
            TransportError(234234, 'index_already_exists_exception', 'bla')
        ]
        mocked_elastic.return_value = elastic_client

        ElasticSearchDriver(index, doc_type)

        elastic_client.indices.create.assert_called_once_with(index=index)
示例#11
0
    def test_failing_index_creation(self, mocked_elastic):
        elastic_client = mock.MagicMock()
        elastic_client.indices.create.side_effect = [
            TransportError(500, 'unknown', 'bla')
        ]
        mocked_elastic.return_value = elastic_client

        with pytest.raises(TransportError):
            ElasticSearchDriver(index, doc_type)

        elastic_client.indices.create.assert_called_once_with(index=index)
示例#12
0
class TestElasticSearchDriver:
    mocked_elastic = mock.MagicMock(Elasticsearch, indices=mock.MagicMock())
    # noinspection PyTypeChecker
    driver = ElasticSearchDriver(index, doc_type, mocked_elastic)

    @mock.patch('impl.storage.elastic_search_driver.Elasticsearch', spec=True)
    def test_default_driver(self, mocked_elastic):
        elastic_client = mock.MagicMock()
        elastic_client.indices.create.return_value = ''
        mocked_elastic.return_value = elastic_client

        driver = ElasticSearchDriver(index, doc_type)

        assert driver._es is mocked_elastic.return_value
        elastic_client.indices.create.assert_called_once_with(index=index)

    @mock.patch('impl.storage.elastic_search_driver.Elasticsearch', spec=True)
    def test_failing_index_creation(self, mocked_elastic):
        elastic_client = mock.MagicMock()
        elastic_client.indices.create.side_effect = [
            TransportError(500, 'unknown', 'bla')
        ]
        mocked_elastic.return_value = elastic_client

        with pytest.raises(TransportError):
            ElasticSearchDriver(index, doc_type)

        elastic_client.indices.create.assert_called_once_with(index=index)

    @mock.patch('impl.storage.elastic_search_driver.Elasticsearch', spec=True)
    def test_constructor_with_existing_index(self, mocked_elastic):
        elastic_client = mock.MagicMock()
        elastic_client.indices.create.side_effect = [
            TransportError(234234, 'index_already_exists_exception', 'bla')
        ]
        mocked_elastic.return_value = elastic_client

        ElasticSearchDriver(index, doc_type)

        elastic_client.indices.create.assert_called_once_with(index=index)

    def test_non_default_driver(self):
        assert self.driver._es is self.mocked_elastic

    def test_index_working(self):
        self.mocked_elastic.index.return_value = mocked_index_response

        result = self.driver.index(doc)

        assert result is mocked_id
        self.mocked_elastic.index.assert_called_once_with(index=index,
                                                          doc_type=doc_type,
                                                          body=doc,
                                                          refresh=False)

    def test_index_with_flush_works(self):
        # noinspection PyTypeChecker
        driver_with_flush = ElasticSearchDriver(index, doc_type,
                                                self.mocked_elastic, True)
        self.mocked_elastic.index.return_value = mocked_index_response

        result = driver_with_flush.index(doc)

        assert result is mocked_id
        self.mocked_elastic.index.assert_has_calls([
            call(index=index, doc_type=doc_type, body=doc, refresh='wait_for')
        ])

    def test_index_with_exception(self):
        self.mocked_elastic.index.side_effect = ConnectionError
        with pytest.raises(ElasticSearchDriverException):
            self.driver.index(doc)

    def test_index_with_wrong_response(self):
        self.mocked_elastic.index.return_value = wrong_index_response
        with pytest.raises(ElasticSearchDriverException):
            self.driver.index(doc)

    def test_get_doc_working(self):
        self.mocked_elastic.get.return_value = mocked_get_response

        result = self.driver.get_doc(mocked_id)

        assert result == doc
        self.mocked_elastic.get.assert_called_once_with(index=index,
                                                        doc_type=doc_type,
                                                        id=mocked_id)

    def test_get_doc_with_exception(self):
        self.mocked_elastic.get.side_effect = ConnectionError
        with pytest.raises(ElasticSearchDriverException):
            self.driver.get_doc(mocked_id)

    def test_get_doc_with_wrong_response(self):
        self.mocked_elastic.get.return_value = wrong_get_response
        with pytest.raises(ElasticSearchDriverException):
            self.driver.get_doc(mocked_id)

    def test_search_by_words(self):
        mocked_elastic_with_flexible_signature = mock.MagicMock()
        # noinspection PyTypeChecker
        driver_with_flexible_signature = ElasticSearchDriver(
            index, doc_type, mocked_elastic_with_flexible_signature)
        expected_result1 = {
            SearchResult.FIELD_DESCRIPTOR: [1],
            SearchResult.FIELD_SOURCE_ID: "some_id1"
        }
        expected_result2 = {
            SearchResult.FIELD_DESCRIPTOR: [2],
            SearchResult.FIELD_SOURCE_ID: "some_id2"
        }
        mocked_elastic_with_flexible_signature.search.return_value = {
            "something": 123,
            "hits": {
                "something-more":
                3245,
                "hits": [
                    {
                        '_source': expected_result1
                    },
                    {
                        '_source': expected_result2
                    },
                ]
            }
        }

        word1 = "word1"
        value1 = "value1"
        word2 = "word2"
        exclude_words = ["exclude1", "exclude2"]
        value2 = 5656
        size = 6556
        result = driver_with_flexible_signature.search_by_words(
            {
                word1: value1,
                word2: value2
            }, exclude_words, size)

        assert result == [
            SearchResult(x) for x in [expected_result1, expected_result2]
        ]
        mocked_elastic_with_flexible_signature.search.assert_called_once_with(
            index=index,
            doc_type=doc_type,
            body={
                'query': {
                    'bool': {
                        'should':
                        ListInAnyOrder([
                            {
                                "term": {
                                    word1: value1
                                }
                            },
                            {
                                "term": {
                                    word2: value2
                                }
                            },
                        ])
                    }
                },
                '_source': {
                    'excludes': exclude_words
                }
            },
            size=size,
            timeout='10s')
示例#13
0
    def test_search_by_words(self):
        mocked_elastic_with_flexible_signature = mock.MagicMock()
        # noinspection PyTypeChecker
        driver_with_flexible_signature = ElasticSearchDriver(
            index, doc_type, mocked_elastic_with_flexible_signature)
        expected_result1 = {
            SearchResult.FIELD_DESCRIPTOR: [1],
            SearchResult.FIELD_SOURCE_ID: "some_id1"
        }
        expected_result2 = {
            SearchResult.FIELD_DESCRIPTOR: [2],
            SearchResult.FIELD_SOURCE_ID: "some_id2"
        }
        mocked_elastic_with_flexible_signature.search.return_value = {
            "something": 123,
            "hits": {
                "something-more":
                3245,
                "hits": [
                    {
                        '_source': expected_result1
                    },
                    {
                        '_source': expected_result2
                    },
                ]
            }
        }

        word1 = "word1"
        value1 = "value1"
        word2 = "word2"
        exclude_words = ["exclude1", "exclude2"]
        value2 = 5656
        size = 6556
        result = driver_with_flexible_signature.search_by_words(
            {
                word1: value1,
                word2: value2
            }, exclude_words, size)

        assert result == [
            SearchResult(x) for x in [expected_result1, expected_result2]
        ]
        mocked_elastic_with_flexible_signature.search.assert_called_once_with(
            index=index,
            doc_type=doc_type,
            body={
                'query': {
                    'bool': {
                        'should':
                        ListInAnyOrder([
                            {
                                "term": {
                                    word1: value1
                                }
                            },
                            {
                                "term": {
                                    word2: value2
                                }
                            },
                        ])
                    }
                },
                '_source': {
                    'excludes': exclude_words
                }
            },
            size=size,
            timeout='10s')
示例#14
0
    def __init__(self, descriptor_shape: Iterable[int], flush_data=False):
        self._es = ElasticSearchDriver(index=config.ELASTIC_DESCRIPTOR_INDEX,
                                       doc_type=config.ELASTIC_DESCRIPTOR_TYPE,
                                       flush_data=flush_data)

        self._search_terms_creator = SearchTermsCreator(descriptor_shape)
示例#15
0
    def test_get_non_existent(self, unique_temp_index):
        driver = ElasticSearchDriver(unique_temp_index, "some-doc-type")

        with pytest.raises(ElasticSearchDriverException):
            driver.get_doc("id that doesn't exist")