Пример #1
0
 def find_by_phash(self,
                   phash: str,
                   minimum_should_match='10%',
                   pagination_from: int = 0,
                   pagination_size: int = 10) -> dict:
     should_query = [
         Q('term', **{f'hash.hash_{index}': value})
         for index, value in enumerate(phash)
     ]
     q = Q('bool',
           should=should_query,
           minimum_should_match=minimum_should_match)
     similar_results = {}
     elastic_search = Image.search(
         using=self._elasticsearch.database,
         index=config.elasticsearch_index).query(q)
     response = elastic_search[pagination_from:pagination_from +
                               pagination_size].execute()
     for img in response:
         similar_results[img.meta.id] = {
             'distance':
             img.meta.score,  # TODO: normalize distance
             'data':
             img,
             'path':
             f'{config.short_path}/full/{img.image_path}',
             'thumbnail_path':
             f'{config.short_path}/thumbs/verybig/{img.image_path}'
         }
     return similar_results
Пример #2
0
 def _was_already_scraped(self, source_id):
     elastic_search = ImgMatchImage.search(using=self._elasticsearch.database,
                                           index=config.elasticsearch_index) \
         .query('term', source_website='e621') \
         .query('term', source_id=source_id)
     count = elastic_search.count()
     return count >= 1
Пример #3
0
 def _was_already_scraped(self, source_id):
     # TODO: duplicate code, remove
     elastic_search = ImgMatchImage.search(using=self._elasticsearch.database,
                                           index=config.elasticsearch_index) \
         .query('term', source_website='danbooru') \
         .query('term', source_id=source_id)
     count = elastic_search.count()
     return count >= 1
Пример #4
0
 def get_elastic_record(self, image_id: str) -> dict:
     elastic_search = Image.search(using=self._elasticsearch.database,
                                   index=config.elasticsearch_index) \
         .query('ids', values=[image_id])
     response = elastic_search[0:1].execute()[0]
     return {
         'data':
         response,
         'path':
         f'{config.short_path}/full/{response.image_path}',
         'thumbnail_path':
         f'{config.short_path}/thumbs/verybig/{response.image_path}'
     }
Пример #5
0
    def find(self,
             vectors: np.ndarray,
             pagination_from: int = 0,
             pagination_size: int = 10,
             partition_tags: list = None) -> dict:
        search_param = {
            "nprobe": 32  # TODO: make it as a param
        }

        # TODO: Currently Milvus does not support pagination. This is an inefficient "pagination"
        # TODO: hack: we fetch more results than needed and then discard the unneeded ones
        param = {
            'collection_name': config.milvus_collection_name,
            'query_records': vectors,
            'top_k': pagination_from + pagination_size,
            'params': search_param,
            'partition_tags': partition_tags
        }

        status, results = self._milvus.database.search(**param)
        if status.OK():
            elastic_ids = []
            similar_results = {}
            for res in results[0][pagination_from:]:
                similar_results[str(res.id)] = {
                    'distance': res.distance,
                    'id': res.id
                }
                elastic_ids.append(res.id)
            elastic_search = Image.search(using=self._elasticsearch.database,
                                          index=config.elasticsearch_index) \
                .query('ids', values=elastic_ids)
            response = elastic_search[0:pagination_size].execute()
            for img in response:
                similar_results[
                    img.meta.id]['data'] = img  # TODO: use defaultdict
                similar_results[img.meta.id][
                    'path'] = f'{config.short_path}/full/{img.image_path}'
                similar_results[img.meta.id][
                    'thumbnail_path'] = f'{config.short_path}/thumbs/verybig/{img.image_path}'
            return similar_results  # TODO: return as list
Пример #6
0
 def create_elastic_index(self):
     Image.init(using=self._elasticsearch.database)
Пример #7
0
 def count(self):
     elastic_search = Image.search(using=self._elasticsearch.database,
                                   index=config.elasticsearch_index)
     count = elastic_search.count()
     return count