def add(): es = Elasticsearch() ses = SignatureES(es) ses.add_image( 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg' ) return 'ok'
def reAndThum(filePath, outFolder, outSize=(200, 200)): global failedList # 尝试处理该文件 flag1, outPath = reName(filePath) if flag1 == -1 and lock.acquire(): # 更新错误列表 failedList.append(filePath) print(os.path.basename(outPath), "failed.") print() lock.release() else: flag2, thumbPath = mkThumb(outPath, outFolder, outSize) if flag2 == -1 and lock.acquire(): # 更新错误列表 failedList.append(filePath) print(os.path.basename(outPath), "failed.") print() lock.release() else: # 向图像匹配库中添加该图片(使用缩略图) try: es = Elasticsearch() ses = SignatureES(es) ses.add_image(thumbPath) except Exception: if lock.acquire(): failedList.append(filePath) print(os.path.basename(outPath), "Failed to add to image-match database.") print() lock.release()
class WorkWithSignatures(): n_grid = 9 crop_percentile = (5, 95) P = None diagonal_neighbors = True identical_tolerance = 2 / 255 n_levels = 2 search_rotated = False es = Elasticsearch( ['elasticsearch'], port=9200, ) ses = SignatureES(es, n_grid=n_grid, crop_percentile=crop_percentile, diagonal_neighbors=diagonal_neighbors, identical_tolerance=identical_tolerance, n_levels=n_levels) def clear_db(self): self.es.indices.delete(index='images', ignore=[400, 404]) self.es = Elasticsearch() self.ses = SignatureES(self.es, n_grid=self.n_grid, crop_percentile=self.crop_percentile, diagonal_neighbors=self.diagonal_neighbors, identical_tolerance=self.identical_tolerance, n_levels=self.n_levels) def reload_params(self, params): self.n_grid = params['n_grid'] self.crop_percentile = params['crop_percentile'] self.P = params['P'] self.diagonal_neighbors = params['diagonal_neighbors'] self.identical_tolerance = params['identical_tolerance'] self.n_levels = params['n_levels'] self.search_rotated = params['search_rotated'] params.pop("search_rotated", None) self.ses = SignatureES(self.es, **params) def get_all_params(self): return { 'n_grid': self.n_grid, 'crop_percentile': self.crop_percentile, 'P': self.P, 'diagonal_neighbors': self.diagonal_neighbors, 'identical_tolerance': self.identical_tolerance, 'n_levels': self.n_levels, 'search_rotated': self.search_rotated } def load_file(self, path): self.ses.add_image(path) def search_file(self, file_bytes): return self.ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated)
def search(): es = Elasticsearch() ses = SignatureES(es) image = ses.search_image( 'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg' ) print image return 'ok'
def add_files(): es = Elasticsearch() ses = SignatureES(es) n = 0 for file in get_files(): logger.info('{0} Adding file {1}'.format(n, file)) ses.add_image(file) n += 1
def main(): image_dir = '/home/key/图片/image_search_data' es = Elasticsearch(hosts=["127.0.0.1:9200"]) ses = SignatureES(es, index='images', doc_type='image') for file in walk(image_dir): ses.add_image(file) print('index image: {}'.format(file))
def reload_params(self, params): self.n_grid = params['n_grid'] self.crop_percentile = params['crop_percentile'] self.P = params['P'] self.diagonal_neighbors = params['diagonal_neighbors'] self.identical_tolerance = params['identical_tolerance'] self.n_levels = params['n_levels'] params.pop("search_rotated", None) self.ses = SignatureES(self.es, **params)
def imgStoreTest(): es = Elasticsearch() ses = SignatureES(es) imagePath = r"D:\konachan\T22\311479.jpg" imageID = int(os.path.basename(imagePath).split(".")[0]) # pbar.set_description(f"Deal with {imageID}") # image = cv2.imread(imagePath) metadata = {"imageID": imageID} ses.add_image(path=imagePath, metadata=metadata)
def clear_db(self): self.es.indices.delete(index='images', ignore=[400, 404]) self.es = Elasticsearch() self.ses = SignatureES(self.es, n_grid=self.n_grid, crop_percentile=self.crop_percentile, diagonal_neighbors=self.diagonal_neighbors, identical_tolerance=self.identical_tolerance, n_levels=self.n_levels)
def add_to_es(self, img_dir=""): es = Elasticsearch(hosts=[{"host": settings.ELASTICSEARCH_HOST}]) ses = SignatureES(es, distance_cutoff=0.3) dirlist = os.listdir(img_dir) for file in dirlist: file_ext = "".join(file.split('.')[-1::]) img_path = img_dir + file if file_ext in ('png', 'jpg'): print(img_path, 'added.') ses.add_image(img_path)
def search(): for vid_name in os.listdir(REPLAY_DIR): print("Processing video: {0}".format(vid_name)) result_path = osp.join(SEARCH_RESULT, vid_name) if not osp.exists(result_path): os.makedirs(result_path) print("---Adding images to engine ...") [match, round] = frames_meta[vid_name] [start, end] = corpus_idx[match][round] p = Pool() p.map(add_image_to_corpus, corpus_paths[start:end + 1]) print("---Searching replay frames ...") vid_path = osp.join(REPLAY_DIR, vid_name) replay_lists = [] for img in os.listdir(vid_path): img_path = osp.join(vid_path, img) replay_lists.append(img_path) p1 = Pool() p1.map(search_images, replay_lists) print("---Done!") es = Elasticsearch() ses = SignatureES(es, distance_cutoff=0.4)
class ImageController(object): def __init__(self): self.es = Elasticsearch() self.ses = SignatureES(self.es) def on_post(self, req, resp): logging.info('received request') if not req.content_length: raise falcon.HTTPBadRequest( 'Missing content', 'Please provide jpg image is POST request body') data = req.bounded_stream.read() try: result = { 'result': self.ses.search_image(data, all_orientations=True, bytestream=True) } resp.status = falcon.HTTP_200 resp.body = json.dumps(result) except Exception as e: logging.error(traceback.format_exc()) raise falcon.HTTPBadRequest('Error', 'Your request cannot be processed')
def storeImage(inDir): es = Elasticsearch() ses = SignatureES(es) # 获取文件列表 fileList = [] for folderName, subfolders, fileNames in os.walk(inDir): for fileName in fileNames: if fileName.endswith(("jpg", "png", "jpeg", "gif")): fileList.append(os.path.join(folderName, fileName)) # 循环处理 pbar = tqdm(fileList, ncols=100) cnt = 0 for imagePath in pbar: cnt += 1 imageID = int(os.path.basename(imagePath).split(".")[0]) pbar.set_description(f"Deal with {imageID}") # image = cv2.imread(imagePath) metadata = {"imageID": imageID} ses.add_image(path=imagePath, metadata=metadata)
def post(self, request, *args, **kwargs): form = self.form_class(request.POST, request.FILES) # specifying host for docker container, # the host must be the name of the docker container es = Elasticsearch(hosts=[{"host": settings.ELASTICSEARCH_HOST}]) ses = SignatureES(es, distance_cutoff=0.3) try: if form.is_valid(): form.save() image = Image.objects.latest('uploaded_at') search = ses.search_image(image.image.path) if search: for result in search: image_name_ext = result['path'].split('/')[-1::] image_name = "".join(image_name_ext).split( '.')[-2::][0] # print(image_name) if image_name: card = Card.objects.get(unique_id=image_name) res = card.unique_id else: res = '' else: # print("Form not valid.") return render(request, self.template_name, {'form': form}) except ObjectDoesNotExist: res = '' context = { "success": True, "result": res, } return (JsonResponse(context))
def add_imgs(): gis = ImageSignature() a = gis.generate_signature( 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg' ) b = gis.generate_signature( 'https://upload.wikimedia.org/wikipedia/commons/thumb/9/99/Gioconda_%28copia_del_Museo_del_Prado_restaurada%29.jpg/800px-Gioconda_%28copia_del_Museo_del_Prado_restaurada%29.jpg' ) res = gis.normalized_distance(a, b) print(res) es = Elasticsearch() ses = SignatureES(es) mypath = '/var/www/html/boots-market/image/catalog/product' ses.add_image(mypath + '/' + 'almcdnruimg389x562frfr030awdzpc579240581v1.jpg') #ses.add_image('/var/www/html/boots-market/image/catalog/almcdnruimg389x562frfr030awdzpc579240581v1.jpg') #ses.add_image('/var/www/html/boots-market/image/catalog/12616562_12123107_800.jpg') return onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] for file in onlyfiles: filedir = mypath + '/' + str(file) print('add: ' + filedir) ses.add_image(filedir)
def search_handler(): img, bs = get_image('url', 'image') ao = request.form.get('all_orientations', all_orientations) == 'true' min_score = request.form.get('min_score', default_min_score) local_ses = SignatureES(es, index=es_index, doc_type=es_doc_type, distance_cutoff=dist_from_percent(float(min_score))) matches = local_ses.search_image( path=img, all_orientations=ao, bytestream=bs ) return json.dumps({ 'status': 'ok', 'error': [], 'method': 'search', 'result': [{ 'score': dist_to_percent(m['dist']), 'filepath': m['path'], 'metadata': m['metadata'] } for m in matches] })
def create_app(config_class=Config): app = Flask(__name__) app.config.from_object(config_class) api = Api(app, doc='/swagger-ui') api.add_namespace(image_api) api.add_namespace(errors_api) elastic_url = app.config.get('ELASTIC_URL') if elastic_url: app.elastic = Elasticsearch([elastic_url]) else: app.elastic = Elasticsearch() searcher = SignatureES(es=app.elastic) app.searcher = searcher return app
def new_process(thread_range, offset): global es, ses es = elasticsearch.Elasticsearch(timeout=60) ses = SignatureES(es) avatars = AvatarIterator(args.fileName, thread_range, offset=offset, progress=args.progress) print(f"started process {offset} -> {avatars.end}") for _ in range(args.threadCount): threading.Thread(target=new_thread, args=(avatars,)).start() while True: if offset == 0: os.system('clear') msg = f"offset {avatars.offset} | {avatars.count - avatars.offset}" if avatars.count - avatars.offset >= thread_range: msg += " (finished)" print(msg) time.sleep(1)
def search(name, all_orientations=False, image_match=0.6): es = Elasticsearch() ses = SignatureES(es, size=15, timeout='30s') logger.info("Search image %s...", name) #result = ses.search_image(name) result = ses.search_image(name, all_orientations) size = len(result) if size == 0 and image_match != None: logger.info("No image found for %s", name) ses = SignatureES(es, size=15, timeout='30s', distance_cutoff=float(image_match)) result = ses.search_image(name, all_orientations) logger.info("Second distance_cutoff %s found %s image for %s", image_match, size, name) else: logger.info("%s images found for %s", size, name) #logger.info("%s images found for %s", size, name) #result = ses.search_single_record(name) return result
def deduplicate_images( es_host, es_port, es_index, new_path, staging_path, existing_path, example_count, quiet, ): if quiet: logger.setLevel(logging.ERROR) else: logger.setLevel(logging.INFO) es = Elasticsearch([{"host": es_host, "port": es_port}]) ses = SignatureES(es, index=es_index) temp_dir = tempfile.mkdtemp("salsa-valentina") logger.info(f"Connected to Elasticsearch at {es_host}:{es_port}") if existing_path: existing_path = Path(existing_path) image_paths = list(existing_path.glob("*.jpg")) logger.info(f"Will index {len(image_paths)} images") index_existing_images(ses, attach_tqdm(image_paths, quiet)) logger.info(f"Done indexing existing images") staging_path = Path(staging_path) new_path = Path(new_path) image_paths = list(new_path.glob("*.*")) logger.info(f"Will process {len(image_paths)}") new_paths = preprocess_images(attach_tqdm(image_paths, quiet), temp_dir) logger.info(f"Ended with {len(new_paths)}") logger.info(f"Will try to add {len(new_paths)} new images") similarity_results = query_images(ses, attach_tqdm(new_paths, quiet)) logger.info(f"Done performing queries existing images") click.echo( "View the images and if you are ok with the results, press Q to close the window and continue" ) show_images(similarity_results, example_count) threshold = click.prompt( "Please input a distance threshold between (0-1], Any other value will abort the program", confirmation_prompt=True, default=-1, type=float, ) try: threshold = float(threshold) if 0.0 > threshold or threshold > 1.0: raise ValueError( f"The selected threshold ({threshold}) is not between 0.0 and 1.0" ) except ValueError: logger.error("Error with the selected threshold") images_to_keep = find_to_keep(similarity_results, threshold) logger.info(f"Moving images {len(images_to_keep)}") move_images(staging_path, images_to_keep) logger.info("Cleanup") shutil.rmtree(temp_dir, ignore_errors=True)
# coding: utf-8 from pixiv.collector import getUrlsFromPixivRanking from pixiv.collector import urlToIllustId from pixiv.collector import getYmdStringListUntilLastYear import os import glob import urllib import requests from elasticsearch import Elasticsearch from image_match.elasticsearch_driver import SignatureES es = Elasticsearch(hosts="127.0.0.1", ports=9200) ses = SignatureES(es) # reset reset = True if reset: es.indices.delete(index='images', ignore=[400, 404]) es.indices.create(index='images', ignore=[400]) urls = [] for path in glob.glob('*.txt'): with open(path, "r") as f: urls.extend(f.readlines()) for i, url in enumerate(urls): print(f"{i}/{len(urls)}", url) illust_id = urlToIllustId(url) meta = {} if illust_id is not False:
def ses(es, index_name): return SignatureES(es=es, el_version=7, index=index_name, doc_type=DOC_TYPE)
from elasticsearch import Elasticsearch from flask import Flask, request from image_match.elasticsearch_driver import SignatureES from image_match.goldberg import ImageSignature import json import os # ============================================================================= # Globals app = Flask(__name__) es = Elasticsearch([os.environ['ELASTICSEARCH_URL']]) es_index = os.environ.get('ELASTICSEARCH_INDEX', 'images') es_doc_type = os.environ.get('ELASTICSEARCH_DOC_TYPE', 'images') ses = SignatureES(es, index=es_index, doc_type=es_doc_type) gis = ImageSignature() # ============================================================================= # Helpers def ids_with_path(path): matches = es.search(index=es_index, _source='_id', q='path:' + json.dumps(path)) return [m['_id'] for m in matches['hits']['hits']] def paths_at_location(offset, limit): search = es.search(index=es_index, from_=offset,
def ses(es, index_name): return SignatureES(es=es, index=index_name)
if is_one_img_search: return dic_to_list(one_img_search(imgs[0])) else: return dic_to_list(mul_img_search(imgs)) if __name__ == '__main__': parse_img_db() if len(sys.argv) == 2: prod_id = sys.argv[1] else: if len(img_db_products) > 0: prod_id = img_db_products[next(iter(img_db_products))] else: prod_id = '219720bed2MP002XW1GZVD' #print('prod_id', prod_id) es = Elasticsearch() ses = SignatureES(es, distance_cutoff=5.0) print(json.dumps(search_products_for(prod_id))) quit()
import os from glob import glob from image_match.goldberg import ImageSignature from elasticsearch import Elasticsearch from image_match.elasticsearch_driver import SignatureES # Need to start elastic search $elasticsearch on osx, $sudo service elasticsearch start on ubuntu """Originally wanted to remove duplicate images to speed up training with this script but due to a lack of time it was unfinished""" psychic_learners_dir = os.path.split(os.getcwd())[0] image_directory = os.path.join(psychic_learners_dir, 'data', 'image', 'train_v1') category_directories = glob(os.path.join(image_directory, '*')) for category_directory in category_directories: image_filenames = glob(os.path.join(category_directory, '*.jpg')) es = Elasticsearch() ses = SignatureES(es) for image_filename in image_filenames: ses.add_image(image_filename) for image_filename in image_filenames: ses.search_image(image_filename)
import hashlib import os from PIL import ImageFile from elasticsearch import Elasticsearch from gevent.pool import Pool, joinall from redis import Redis from image_match.elasticsearch_driver import SignatureES from local_config import IMAGE_PATHS ImageFile.LOAD_TRUNCATED_IMAGES = True es = Elasticsearch() print(es.cluster) ses = SignatureES(es) redis_client = Redis() pool = Pool(100) print('初始化完毕') def insert_es(img_path): if not validate_format(img_path): return file_hash = get_md5(img_path) if redis_client.get(file_hash): # print("已入库:%s" % img_path) return try: ses.add_image(img_path)
def search_file_with_threshold_and_rotated(self, file_bytes, threshold, search_rotated): if threshold == 0.0: return self.ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated) else: ses = SignatureES(self.es, distance_cutoff=threshold) return ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated)
class WorkWithSignatures(): n_grid = 9 crop_percentile = (5, 95) P = None diagonal_neighbors = True identical_tolerance = 2 / 255 n_levels = 2 search_rotated = False es = Elasticsearch( ['elasticsearch_img'], port=9200, ) # es = Elasticsearch() ses = SignatureES(es, n_grid=n_grid, crop_percentile=crop_percentile, diagonal_neighbors=diagonal_neighbors, identical_tolerance=identical_tolerance, n_levels=n_levels, distance_cutoff=0.9999) def clear_db(self): self.es.indices.delete(index='images', ignore=[400, 404]) def reload_params(self, params): self.n_grid = params['n_grid'] self.crop_percentile = params['crop_percentile'] self.P = params['P'] self.diagonal_neighbors = params['diagonal_neighbors'] self.identical_tolerance = params['identical_tolerance'] self.n_levels = params['n_levels'] params.pop("search_rotated", None) self.ses = SignatureES(self.es, **params) def get_all_params(self): return {'n_grid': self.n_grid, 'crop_percentile': self.crop_percentile, 'P': self.P, 'diagonal_neighbors': self.diagonal_neighbors, 'identical_tolerance': self.identical_tolerance, 'n_levels': self.n_levels} def set_rotate_param(self, rotate): self.search_rotated = rotate def get_rotate_param(self): return self.search_rotated def load_file(self, path): self.ses.add_image(path) def search_file(self, file_bytes): return self.ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated) def search_file_with_threshold(self, file_bytes, threshold): if threshold == 0.0: return self.ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated) else: ses = SignatureES(self.es, distance_cutoff=threshold) return ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated) def search_file_with_threshold_and_rotated(self, file_bytes, threshold, search_rotated): if threshold == 0.0: return self.ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated) else: ses = SignatureES(self.es, distance_cutoff=threshold) return ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated) def get_summary_count(self): return self.es.search(index="images*", size=0)['hits']['total'] def delete_file_from_es(self, path): matching_paths = [item['_id'] for item in self.es.search(body={'query': {'match': {'path': path} } }, index='images')['hits']['hits'] if item['_source']['path'] == path] if len(matching_paths) > 0: for id_tag in matching_paths: self.es.delete(index='images', doc_type='image', id=id_tag) else: raise Exception("File does not exists") def delete_duplicate_signature(self): all_data = self.es.search(index="images", body={"query": {"match_all": {}}}) ids_and_sings = [(d['_id'], d['_source']['signature']) for d in all_data['hits']['hits']] to_delete =[elem[0] for index, elem in enumerate(ids_and_sings) for j in ids_and_sings[index+1:] if numpy.array_equal(elem[1], j[1])] for id_tag in set(to_delete): self.es.delete(index='images', doc_type='image', id=id_tag) paths = [d['_source']['path'] for d in all_data['hits']['hits']] for path in paths: self.ses.delete_duplicates(path)
def delete(): es = Elasticsearch() ses = SignatureES(es) es.indices.delete("images")