def find_by_phash(self, phash: str, minimum_should_match='10%', pagination_from: int = 0, pagination_size: int = 10) -> dict: should_query = [ Q('term', **{f'hash.hash_{index}': value}) for index, value in enumerate(phash) ] q = Q('bool', should=should_query, minimum_should_match=minimum_should_match) similar_results = {} elastic_search = Image.search( using=self._elasticsearch.database, index=config.elasticsearch_index).query(q) response = elastic_search[pagination_from:pagination_from + pagination_size].execute() for img in response: similar_results[img.meta.id] = { 'distance': img.meta.score, # TODO: normalize distance 'data': img, 'path': f'{config.short_path}/full/{img.image_path}', 'thumbnail_path': f'{config.short_path}/thumbs/verybig/{img.image_path}' } return similar_results
def _was_already_scraped(self, source_id): elastic_search = ImgMatchImage.search(using=self._elasticsearch.database, index=config.elasticsearch_index) \ .query('term', source_website='e621') \ .query('term', source_id=source_id) count = elastic_search.count() return count >= 1
def _was_already_scraped(self, source_id): # TODO: duplicate code, remove elastic_search = ImgMatchImage.search(using=self._elasticsearch.database, index=config.elasticsearch_index) \ .query('term', source_website='danbooru') \ .query('term', source_id=source_id) count = elastic_search.count() return count >= 1
def get_elastic_record(self, image_id: str) -> dict: elastic_search = Image.search(using=self._elasticsearch.database, index=config.elasticsearch_index) \ .query('ids', values=[image_id]) response = elastic_search[0:1].execute()[0] return { 'data': response, 'path': f'{config.short_path}/full/{response.image_path}', 'thumbnail_path': f'{config.short_path}/thumbs/verybig/{response.image_path}' }
def find(self, vectors: np.ndarray, pagination_from: int = 0, pagination_size: int = 10, partition_tags: list = None) -> dict: search_param = { "nprobe": 32 # TODO: make it as a param } # TODO: Currently Milvus does not support pagination. This is an inefficient "pagination" # TODO: hack: we fetch more results than needed and then discard the unneeded ones param = { 'collection_name': config.milvus_collection_name, 'query_records': vectors, 'top_k': pagination_from + pagination_size, 'params': search_param, 'partition_tags': partition_tags } status, results = self._milvus.database.search(**param) if status.OK(): elastic_ids = [] similar_results = {} for res in results[0][pagination_from:]: similar_results[str(res.id)] = { 'distance': res.distance, 'id': res.id } elastic_ids.append(res.id) elastic_search = Image.search(using=self._elasticsearch.database, index=config.elasticsearch_index) \ .query('ids', values=elastic_ids) response = elastic_search[0:pagination_size].execute() for img in response: similar_results[ img.meta.id]['data'] = img # TODO: use defaultdict similar_results[img.meta.id][ 'path'] = f'{config.short_path}/full/{img.image_path}' similar_results[img.meta.id][ 'thumbnail_path'] = f'{config.short_path}/thumbs/verybig/{img.image_path}' return similar_results # TODO: return as list
def create_elastic_index(self): Image.init(using=self._elasticsearch.database)
def count(self): elastic_search = Image.search(using=self._elasticsearch.database, index=config.elasticsearch_index) count = elastic_search.count() return count