Пример #1
0
def add():
    es = Elasticsearch()
    ses = SignatureES(es)
    ses.add_image(
        'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'
    )
    return 'ok'
Пример #2
0
def reAndThum(filePath, outFolder, outSize=(200, 200)):
    global failedList
    # 尝试处理该文件
    flag1, outPath = reName(filePath)
    if flag1 == -1 and lock.acquire():
        # 更新错误列表
        failedList.append(filePath)
        print(os.path.basename(outPath), "failed.")
        print()
        lock.release()
    else:
        flag2, thumbPath = mkThumb(outPath, outFolder, outSize)
        if flag2 == -1 and lock.acquire():
            # 更新错误列表
            failedList.append(filePath)
            print(os.path.basename(outPath), "failed.")
            print()
            lock.release()
        else:
            # 向图像匹配库中添加该图片(使用缩略图)
            try:
                es = Elasticsearch()
                ses = SignatureES(es)
                ses.add_image(thumbPath)
            except Exception:
                if lock.acquire():
                    failedList.append(filePath)
                    print(os.path.basename(outPath),
                          "Failed to add to image-match database.")
                    print()
                    lock.release()
Пример #3
0
class WorkWithSignatures():
    n_grid = 9
    crop_percentile = (5, 95)
    P = None
    diagonal_neighbors = True
    identical_tolerance = 2 / 255
    n_levels = 2
    search_rotated = False

    es = Elasticsearch(
        ['elasticsearch'],
        port=9200,
    )

    ses = SignatureES(es,
                      n_grid=n_grid,
                      crop_percentile=crop_percentile,
                      diagonal_neighbors=diagonal_neighbors,
                      identical_tolerance=identical_tolerance,
                      n_levels=n_levels)

    def clear_db(self):
        self.es.indices.delete(index='images', ignore=[400, 404])
        self.es = Elasticsearch()
        self.ses = SignatureES(self.es,
                               n_grid=self.n_grid,
                               crop_percentile=self.crop_percentile,
                               diagonal_neighbors=self.diagonal_neighbors,
                               identical_tolerance=self.identical_tolerance,
                               n_levels=self.n_levels)

    def reload_params(self, params):
        self.n_grid = params['n_grid']
        self.crop_percentile = params['crop_percentile']
        self.P = params['P']
        self.diagonal_neighbors = params['diagonal_neighbors']
        self.identical_tolerance = params['identical_tolerance']
        self.n_levels = params['n_levels']
        self.search_rotated = params['search_rotated']
        params.pop("search_rotated", None)
        self.ses = SignatureES(self.es, **params)

    def get_all_params(self):
        return {
            'n_grid': self.n_grid,
            'crop_percentile': self.crop_percentile,
            'P': self.P,
            'diagonal_neighbors': self.diagonal_neighbors,
            'identical_tolerance': self.identical_tolerance,
            'n_levels': self.n_levels,
            'search_rotated': self.search_rotated
        }

    def load_file(self, path):
        self.ses.add_image(path)

    def search_file(self, file_bytes):
        return self.ses.search_image(file_bytes,
                                     bytestream=True,
                                     all_orientations=self.search_rotated)
Пример #4
0
def search():
    es = Elasticsearch()
    ses = SignatureES(es)
    image = ses.search_image(
        'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg'
    )
    print image
    return 'ok'
Пример #5
0
def add_files():
    es = Elasticsearch()
    ses = SignatureES(es)
    n = 0
    for file in get_files():
        logger.info('{0} Adding file {1}'.format(n, file))
        ses.add_image(file)
        n += 1
Пример #6
0
def main():
    image_dir = '/home/key/图片/image_search_data'
    es = Elasticsearch(hosts=["127.0.0.1:9200"])
    ses = SignatureES(es, index='images', doc_type='image')

    for file in walk(image_dir):
        ses.add_image(file)
        print('index image: {}'.format(file))
Пример #7
0
 def reload_params(self, params):
     self.n_grid = params['n_grid']
     self.crop_percentile = params['crop_percentile']
     self.P = params['P']
     self.diagonal_neighbors = params['diagonal_neighbors']
     self.identical_tolerance = params['identical_tolerance']
     self.n_levels = params['n_levels']
     params.pop("search_rotated", None)
     self.ses = SignatureES(self.es, **params)
Пример #8
0
def imgStoreTest():
    es = Elasticsearch()
    ses = SignatureES(es)
    imagePath = r"D:\konachan\T22\311479.jpg"
    imageID = int(os.path.basename(imagePath).split(".")[0])
    # pbar.set_description(f"Deal with {imageID}")
    # image = cv2.imread(imagePath)
    metadata = {"imageID": imageID}
    ses.add_image(path=imagePath, metadata=metadata)
Пример #9
0
 def clear_db(self):
     self.es.indices.delete(index='images', ignore=[400, 404])
     self.es = Elasticsearch()
     self.ses = SignatureES(self.es,
                            n_grid=self.n_grid,
                            crop_percentile=self.crop_percentile,
                            diagonal_neighbors=self.diagonal_neighbors,
                            identical_tolerance=self.identical_tolerance,
                            n_levels=self.n_levels)
Пример #10
0
    def add_to_es(self, img_dir=""):

        es = Elasticsearch(hosts=[{"host": settings.ELASTICSEARCH_HOST}])
        ses = SignatureES(es, distance_cutoff=0.3)

        dirlist = os.listdir(img_dir)

        for file in dirlist:
            file_ext = "".join(file.split('.')[-1::])
            img_path = img_dir + file
            if file_ext in ('png', 'jpg'):
                print(img_path, 'added.')
                ses.add_image(img_path)
Пример #11
0
def search():
    for vid_name in os.listdir(REPLAY_DIR):
        print("Processing video: {0}".format(vid_name))

        result_path = osp.join(SEARCH_RESULT, vid_name)
        if not osp.exists(result_path):
            os.makedirs(result_path)

        print("---Adding images to engine ...")
        [match, round] = frames_meta[vid_name]
        [start, end] = corpus_idx[match][round]
        p = Pool()
        p.map(add_image_to_corpus, corpus_paths[start:end + 1])

        print("---Searching replay frames ...")
        vid_path = osp.join(REPLAY_DIR, vid_name)
        replay_lists = []
        for img in os.listdir(vid_path):
            img_path = osp.join(vid_path, img)
            replay_lists.append(img_path)
        p1 = Pool()
        p1.map(search_images, replay_lists)

        print("---Done!")
        es = Elasticsearch()
        ses = SignatureES(es, distance_cutoff=0.4)
Пример #12
0
class ImageController(object):
    def __init__(self):
        self.es = Elasticsearch()
        self.ses = SignatureES(self.es)

    def on_post(self, req, resp):
        logging.info('received request')
        if not req.content_length:
            raise falcon.HTTPBadRequest(
                'Missing content',
                'Please provide jpg image is POST request body')
        data = req.bounded_stream.read()
        try:
            result = {
                'result':
                self.ses.search_image(data,
                                      all_orientations=True,
                                      bytestream=True)
            }
            resp.status = falcon.HTTP_200
            resp.body = json.dumps(result)
        except Exception as e:
            logging.error(traceback.format_exc())
            raise falcon.HTTPBadRequest('Error',
                                        'Your request cannot be processed')
Пример #13
0
def storeImage(inDir):
    es = Elasticsearch()
    ses = SignatureES(es)
    # 获取文件列表
    fileList = []
    for folderName, subfolders, fileNames in os.walk(inDir):
        for fileName in fileNames:
            if fileName.endswith(("jpg", "png", "jpeg", "gif")):
                fileList.append(os.path.join(folderName, fileName))
    # 循环处理
    pbar = tqdm(fileList, ncols=100)
    cnt = 0
    for imagePath in pbar:
        cnt += 1
        imageID = int(os.path.basename(imagePath).split(".")[0])
        pbar.set_description(f"Deal with {imageID}")
        # image = cv2.imread(imagePath)
        metadata = {"imageID": imageID}
        ses.add_image(path=imagePath, metadata=metadata)
Пример #14
0
    def post(self, request, *args, **kwargs):
        form = self.form_class(request.POST, request.FILES)

        # specifying host for docker container,
        # the host must be the name of the docker container
        es = Elasticsearch(hosts=[{"host": settings.ELASTICSEARCH_HOST}])
        ses = SignatureES(es, distance_cutoff=0.3)

        try:
            if form.is_valid():
                form.save()

                image = Image.objects.latest('uploaded_at')

                search = ses.search_image(image.image.path)

                if search:
                    for result in search:
                        image_name_ext = result['path'].split('/')[-1::]
                        image_name = "".join(image_name_ext).split(
                            '.')[-2::][0]
                        # print(image_name)
                    if image_name:
                        card = Card.objects.get(unique_id=image_name)
                        res = card.unique_id
                else:
                    res = ''
            else:
                # print("Form not valid.")
                return render(request, self.template_name, {'form': form})

        except ObjectDoesNotExist:
            res = ''

        context = {
            "success": True,
            "result": res,
        }

        return (JsonResponse(context))
Пример #15
0
def add_imgs():
    gis = ImageSignature()
    a = gis.generate_signature(
        'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'
    )
    b = gis.generate_signature(
        'https://upload.wikimedia.org/wikipedia/commons/thumb/9/99/Gioconda_%28copia_del_Museo_del_Prado_restaurada%29.jpg/800px-Gioconda_%28copia_del_Museo_del_Prado_restaurada%29.jpg'
    )
    res = gis.normalized_distance(a, b)
    print(res)

    es = Elasticsearch()
    ses = SignatureES(es)

    mypath = '/var/www/html/boots-market/image/catalog/product'

    ses.add_image(mypath + '/' +
                  'almcdnruimg389x562frfr030awdzpc579240581v1.jpg')
    #ses.add_image('/var/www/html/boots-market/image/catalog/almcdnruimg389x562frfr030awdzpc579240581v1.jpg')
    #ses.add_image('/var/www/html/boots-market/image/catalog/12616562_12123107_800.jpg')

    return

    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    for file in onlyfiles:
        filedir = mypath + '/' + str(file)
        print('add: ' + filedir)
        ses.add_image(filedir)
Пример #16
0
def search_handler():
    img, bs = get_image('url', 'image')
    ao = request.form.get('all_orientations', all_orientations) == 'true'
    min_score = request.form.get('min_score', default_min_score)
    local_ses = SignatureES(es, index=es_index, doc_type=es_doc_type,
                            distance_cutoff=dist_from_percent(float(min_score)))

    matches = local_ses.search_image(
        path=img,
        all_orientations=ao,
        bytestream=bs
    )

    return json.dumps({
        'status': 'ok',
        'error': [],
        'method': 'search',
        'result': [{
            'score': dist_to_percent(m['dist']),
            'filepath': m['path'],
            'metadata': m['metadata']
        } for m in matches]
    })
Пример #17
0
def create_app(config_class=Config):
    app = Flask(__name__)
    app.config.from_object(config_class)

    api = Api(app, doc='/swagger-ui')

    api.add_namespace(image_api)
    api.add_namespace(errors_api)

    elastic_url = app.config.get('ELASTIC_URL')
    if elastic_url:
        app.elastic = Elasticsearch([elastic_url])
    else:
        app.elastic = Elasticsearch()

    searcher = SignatureES(es=app.elastic)
    app.searcher = searcher

    return app
Пример #18
0
def new_process(thread_range, offset):
    global es, ses
    es = elasticsearch.Elasticsearch(timeout=60)
    ses = SignatureES(es)

    avatars = AvatarIterator(args.fileName, thread_range,
                             offset=offset, progress=args.progress)
    print(f"started process {offset} -> {avatars.end}")

    for _ in range(args.threadCount):
        threading.Thread(target=new_thread, args=(avatars,)).start()

    while True:
        if offset == 0:
            os.system('clear')
        msg = f"offset {avatars.offset} | {avatars.count - avatars.offset}"
        if avatars.count - avatars.offset >= thread_range:
            msg += " (finished)"
        print(msg)
        time.sleep(1)
Пример #19
0
def search(name, all_orientations=False, image_match=0.6):
    es = Elasticsearch()
    ses = SignatureES(es, size=15, timeout='30s')
    logger.info("Search image %s...", name)
    #result = ses.search_image(name)
    result = ses.search_image(name, all_orientations)
    size = len(result)
    if size == 0 and image_match != None:
        logger.info("No image found for %s", name)
        ses = SignatureES(es, size=15, timeout='30s', distance_cutoff=float(image_match))
        result = ses.search_image(name, all_orientations)
        logger.info("Second distance_cutoff %s found %s image for %s", image_match, size, name)
    else:
        logger.info("%s images found for %s", size, name)
    #logger.info("%s images found for %s", size, name)
    #result = ses.search_single_record(name)
    return result
Пример #20
0
def deduplicate_images(
    es_host,
    es_port,
    es_index,
    new_path,
    staging_path,
    existing_path,
    example_count,
    quiet,
):
    if quiet:
        logger.setLevel(logging.ERROR)
    else:
        logger.setLevel(logging.INFO)

    es = Elasticsearch([{"host": es_host, "port": es_port}])
    ses = SignatureES(es, index=es_index)
    temp_dir = tempfile.mkdtemp("salsa-valentina")
    logger.info(f"Connected to Elasticsearch at {es_host}:{es_port}")

    if existing_path:
        existing_path = Path(existing_path)
        image_paths = list(existing_path.glob("*.jpg"))
        logger.info(f"Will index {len(image_paths)} images")
        index_existing_images(ses, attach_tqdm(image_paths, quiet))
        logger.info(f"Done indexing existing images")

    staging_path = Path(staging_path)
    new_path = Path(new_path)
    image_paths = list(new_path.glob("*.*"))

    logger.info(f"Will process {len(image_paths)}")
    new_paths = preprocess_images(attach_tqdm(image_paths, quiet), temp_dir)
    logger.info(f"Ended with {len(new_paths)}")

    logger.info(f"Will try to add {len(new_paths)} new images")
    similarity_results = query_images(ses, attach_tqdm(new_paths, quiet))
    logger.info(f"Done performing queries existing images")

    click.echo(
        "View the images and if you are ok with the results, press Q to close the window and continue"
    )
    show_images(similarity_results, example_count)

    threshold = click.prompt(
        "Please input a distance threshold between (0-1], Any other value will abort the program",
        confirmation_prompt=True,
        default=-1,
        type=float,
    )
    try:
        threshold = float(threshold)
        if 0.0 > threshold or threshold > 1.0:
            raise ValueError(
                f"The selected threshold ({threshold}) is not between 0.0 and 1.0"
            )
    except ValueError:
        logger.error("Error with the selected threshold")

    images_to_keep = find_to_keep(similarity_results, threshold)
    logger.info(f"Moving images {len(images_to_keep)}")
    move_images(staging_path, images_to_keep)

    logger.info("Cleanup")
    shutil.rmtree(temp_dir, ignore_errors=True)
Пример #21
0
# coding: utf-8
from pixiv.collector import getUrlsFromPixivRanking
from pixiv.collector import urlToIllustId
from pixiv.collector import getYmdStringListUntilLastYear
import os
import glob
import urllib
import requests
from elasticsearch import Elasticsearch
from image_match.elasticsearch_driver import SignatureES

es = Elasticsearch(hosts="127.0.0.1", ports=9200)
ses = SignatureES(es)

# reset
reset = True
if reset:
    es.indices.delete(index='images', ignore=[400, 404])
    es.indices.create(index='images', ignore=[400])

urls = []
for path in glob.glob('*.txt'):
    with open(path, "r") as f:
        urls.extend(f.readlines())

for i, url in enumerate(urls):
    print(f"{i}/{len(urls)}", url)
    illust_id = urlToIllustId(url)

    meta = {}
    if illust_id is not False:
def ses(es, index_name):
    return SignatureES(es=es,
                       el_version=7,
                       index=index_name,
                       doc_type=DOC_TYPE)
Пример #23
0
from elasticsearch import Elasticsearch
from flask import Flask, request
from image_match.elasticsearch_driver import SignatureES
from image_match.goldberg import ImageSignature
import json
import os

# =============================================================================
# Globals

app = Flask(__name__)
es = Elasticsearch([os.environ['ELASTICSEARCH_URL']])
es_index = os.environ.get('ELASTICSEARCH_INDEX', 'images')
es_doc_type = os.environ.get('ELASTICSEARCH_DOC_TYPE', 'images')
ses = SignatureES(es, index=es_index, doc_type=es_doc_type)
gis = ImageSignature()

# =============================================================================
# Helpers


def ids_with_path(path):
    matches = es.search(index=es_index,
                        _source='_id',
                        q='path:' + json.dumps(path))
    return [m['_id'] for m in matches['hits']['hits']]


def paths_at_location(offset, limit):
    search = es.search(index=es_index,
                       from_=offset,
def ses(es, index_name):
    return SignatureES(es=es, index=index_name)
Пример #25
0
    if is_one_img_search:

        return dic_to_list(one_img_search(imgs[0]))

    else:

        return dic_to_list(mul_img_search(imgs))


if __name__ == '__main__':

    parse_img_db()

    if len(sys.argv) == 2:
        prod_id = sys.argv[1]
    else:
        if len(img_db_products) > 0:
            prod_id = img_db_products[next(iter(img_db_products))]
        else:
            prod_id = '219720bed2MP002XW1GZVD'

    #print('prod_id', prod_id)

    es = Elasticsearch()
    ses = SignatureES(es, distance_cutoff=5.0)

    print(json.dumps(search_products_for(prod_id)))

    quit()
Пример #26
0
import os
from glob import glob

from image_match.goldberg import ImageSignature
from elasticsearch import Elasticsearch
from image_match.elasticsearch_driver import SignatureES
# Need to start elastic search $elasticsearch on osx, $sudo service elasticsearch start on ubuntu
"""Originally wanted to remove duplicate images to speed up training with this script but due to a lack of time it was unfinished"""

psychic_learners_dir = os.path.split(os.getcwd())[0]
image_directory = os.path.join(psychic_learners_dir, 'data', 'image',
                               'train_v1')
category_directories = glob(os.path.join(image_directory, '*'))
for category_directory in category_directories:
    image_filenames = glob(os.path.join(category_directory, '*.jpg'))
    es = Elasticsearch()
    ses = SignatureES(es)
    for image_filename in image_filenames:
        ses.add_image(image_filename)
    for image_filename in image_filenames:
        ses.search_image(image_filename)
Пример #27
0
import hashlib
import os

from PIL import ImageFile
from elasticsearch import Elasticsearch
from gevent.pool import Pool, joinall
from redis import Redis

from image_match.elasticsearch_driver import SignatureES
from local_config import IMAGE_PATHS

ImageFile.LOAD_TRUNCATED_IMAGES = True
es = Elasticsearch()

print(es.cluster)
ses = SignatureES(es)
redis_client = Redis()
pool = Pool(100)
print('初始化完毕')


def insert_es(img_path):
    if not validate_format(img_path):
        return

    file_hash = get_md5(img_path)
    if redis_client.get(file_hash):
        # print("已入库:%s" % img_path)
        return
    try:
        ses.add_image(img_path)
Пример #28
0
 def search_file_with_threshold_and_rotated(self, file_bytes, threshold, search_rotated):
     if threshold == 0.0:
        return self.ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated)
     else:
         ses = SignatureES(self.es, distance_cutoff=threshold)
         return ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated)
Пример #29
0
class WorkWithSignatures():
    n_grid = 9
    crop_percentile = (5, 95)
    P = None
    diagonal_neighbors = True
    identical_tolerance = 2 / 255
    n_levels = 2
    search_rotated = False

    es = Elasticsearch(
        ['elasticsearch_img'],
        port=9200,
    )
    # es = Elasticsearch()

    ses = SignatureES(es, n_grid=n_grid, crop_percentile=crop_percentile, diagonal_neighbors=diagonal_neighbors,
                      identical_tolerance=identical_tolerance, n_levels=n_levels, distance_cutoff=0.9999)

    def clear_db(self):
        self.es.indices.delete(index='images', ignore=[400, 404])

    def reload_params(self, params):
        self.n_grid = params['n_grid']
        self.crop_percentile = params['crop_percentile']
        self.P = params['P']
        self.diagonal_neighbors = params['diagonal_neighbors']
        self.identical_tolerance = params['identical_tolerance']
        self.n_levels = params['n_levels']
        params.pop("search_rotated", None)
        self.ses = SignatureES(self.es, **params)

    def get_all_params(self):
        return {'n_grid': self.n_grid,
                'crop_percentile': self.crop_percentile,
                'P': self.P,
                'diagonal_neighbors': self.diagonal_neighbors,
                'identical_tolerance': self.identical_tolerance,
                'n_levels': self.n_levels}

    def set_rotate_param(self, rotate):
        self.search_rotated = rotate

    def get_rotate_param(self):
        return self.search_rotated

    def load_file(self, path):
        self.ses.add_image(path)

    def search_file(self, file_bytes):
        return self.ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated)

    def search_file_with_threshold(self, file_bytes, threshold):
        if threshold == 0.0:
           return self.ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated)
        else:
            ses = SignatureES(self.es, distance_cutoff=threshold)
            return ses.search_image(file_bytes, bytestream=True, all_orientations=self.search_rotated)

    def search_file_with_threshold_and_rotated(self, file_bytes, threshold, search_rotated):
        if threshold == 0.0:
           return self.ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated)
        else:
            ses = SignatureES(self.es, distance_cutoff=threshold)
            return ses.search_image(file_bytes, bytestream=True, all_orientations=search_rotated)

    def get_summary_count(self):
        return self.es.search(index="images*", size=0)['hits']['total']

    def delete_file_from_es(self, path):
        matching_paths = [item['_id'] for item in
                          self.es.search(body={'query':
                                               {'match':
                                                {'path': path}
                                               }
                                              },
                                         index='images')['hits']['hits']
                          if item['_source']['path'] == path]
        if len(matching_paths) > 0:
            for id_tag in matching_paths:
                self.es.delete(index='images', doc_type='image', id=id_tag)
        else:
            raise Exception("File does not exists")

    def delete_duplicate_signature(self):
        all_data = self.es.search(index="images", body={"query": {"match_all": {}}})
        ids_and_sings = [(d['_id'], d['_source']['signature']) for d in all_data['hits']['hits']]
        to_delete =[elem[0] for index, elem in enumerate(ids_and_sings) for j in ids_and_sings[index+1:] if numpy.array_equal(elem[1], j[1])]
        for id_tag in set(to_delete):
            self.es.delete(index='images', doc_type='image', id=id_tag)
        paths = [d['_source']['path'] for d in all_data['hits']['hits']]
        for path in paths:
            self.ses.delete_duplicates(path)
Пример #30
0
def delete():
    es = Elasticsearch()
    ses = SignatureES(es)
    es.indices.delete("images")