Exemplo n.º 1
0
def compare_imgs(img, truth_filename, do_assert=True):
    """
    PROTIP: run the following to re-generate the test images:

        REGENERATE_TEST_IMAGES=1 pytest mujoco_py/tests/test_modder.py

    Note: do this in Docker so that images will work for testing.
    """
    assert isinstance(truth_filename, str)
    truth_filename = join(TEST_ASSET_DIR_PATH, truth_filename)
    if os.getenv('REGENERATE_TEST_IMAGES'):
        if exists(truth_filename):
            pre_path, ext = splitext(truth_filename)
            backup_path = "%s_old%s" % (pre_path, ext)
            move(truth_filename, backup_path)
        save_test_image(truth_filename, img)
        return 0
    true_img = np.asarray(Image.open(truth_filename))
    assert img.shape == true_img.shape
    hash0 = imagehash.dhash(Image.fromarray(img))
    hash1 = imagehash.dhash(Image.fromarray(true_img))
    diff = np.sum(hash0.hash != hash1.hash)
    if diff != 0:
        # If the assert fails, the best way to investigate is to run
        # pytest for the particular test. For example,
        #
        #       pytest -k test_something_something path/to/test.py
        save_test_image("/tmp/img.png", img)
        save_test_image("/tmp/true_img.png", true_img)
        save_test_image("/tmp/diff_img.png", img - true_img)
    if do_assert:
        assert diff <= 1
    return diff
Exemplo n.º 2
0
def image_descriptor(image_path, prior=None):
    mtime = os.path.getmtime(image_path)
    ctime = os.path.getctime(image_path)

    if not prior or (not prior.get('modified')):
        img = Image.open(image_path)
        result = {'width': img.size[0],
                  'height': img.size[1],
                  'created': mtime,
                  'modified': ctime,
                  # TODO: if results too bad, change hash sizes for more precission?
                  'aHash': str(imagehash.average_hash(img)),
                  'pHash': str(imagehash.phash(img)),
                  'dHash': str(imagehash.dhash(img)),
        }
        return result

    changed = prior['modified'] < mtime
    img = Image.open(image_path)

    if changed or not prior["width"]:
        prior["width"] = img.size[0]
    if changed or not prior["height"]:
        prior["height"] = img.size[1]

    if changed or not prior["aHash"]:
        prior["aHash"] = str(imagehash.average_hash(img))
    if changed or not prior["pHash"]:
        prior["pHash"] = str(imagehash.phash(img))
    if changed or not prior["dHash"]:
        prior["dHash"] = str(imagehash.dhash(img))
    return prior
Exemplo n.º 3
0
def color_dhash(imagepath):
    
    im = Image.open(imagepath)
    npim = np.asarray(im)
    
    imr = Image.fromarray(npim[:,:,0])
    img = Image.fromarray(npim[:,:,1])
    imb = Image.fromarray(npim[:,:,2])
    
    hashr = bin(int(str(imagehash.dhash(imr)), 16))[2:]
    hashg = bin(int(str(imagehash.dhash(img)), 16))[2:]
    hashb = bin(int(str(imagehash.dhash(imb)), 16))[2:]
    
    gapr = 64 - len(hashr)
    gapg = 64 - len(hashg)
    gapb = 64 - len(hashb)
    
    hashrf = ''.join(['0' for i in range(gapr)])
    hashgf = ''.join(['0' for i in range(gapg)])
    hashbf = ''.join(['0' for i in range(gapb)])
    
    hashrf += hashr
    hashgf += hashg
    hashbf += hashb

    im.close()
    
    return str(hex(int(hashrf + hashgf + hashbf, 2)).split('0x')[1].split('L')[0])
Exemplo n.º 4
0
 def __init__(self,res1,res2):
    print "Inside"
    print res1,res2
    urllib.urlretrieve(res1, "data1")
    print "Image 1 downloaded"
    urllib.urlretrieve(res2, "data2")
    print "Image 2 downloaded"
    self.hash = imagehash.dhash(Image.open("data1"))
    print "Hashing first image"
    self.otherhash = imagehash.dhash(Image.open("data2"))
    print "Hashing second image"
Exemplo n.º 5
0
def keep_dissimilar(image1, image2, threshold):
  # takes 2 PIL Image objects
  # threshold is the max acceptable Hamming distance
  # returns image2 only if dissimilar enough
  image1_hash = list(str(imagehash.dhash(image1)))
  image2_hash = list(str(imagehash.dhash(image2)))
  print image1_hash
  print image2_hash
  dist = spatial.distance.hamming(
          image1_hash, image2_hash)
  print dist
  if dist >= threshold:
    return image2
  else:
    return False
Exemplo n.º 6
0
 def run(self):
     files = sorted(os.listdir('data/%s/media' % self.date_path))
     hashes = {}
     matches = []
     g = nx.Graph()
     for i in range(len(files)):
         f = files[i]
         fn = 'data/%s/media/%s' % (self.date_path, f)
         ahash = imagehash.average_hash(Image.open(fn))
         dhash = imagehash.dhash(Image.open(fn))
         phash = imagehash.phash(Image.open(fn))
         hashes[f] = {'ahash': ahash, 'dhash': dhash, 'phash': phash}
         for j in range(0, i):
             f2name = files[j]
             f2 = hashes[f2name]
             sumhash = sum([ahash - f2['ahash'],
                            dhash - f2['dhash'],
                            phash - f2['phash']])
             if sumhash <= 40:
                 matches.append([f, files[j],
                                 ahash - f2['ahash'],
                                 dhash - f2['dhash'],
                                 phash - f2['phash'],
                                 sumhash])
                 g.add_edge(f, f2name)
     with self.output().open('w') as fp_graph:
         components = list(nx.connected_components(g))
         # Note: sets are not JSON serializable
         d = []
         for s in components:
             d.append(list(s))
         logging.debug(' - = - = - = GRAPH HERE - = - = - = -')
         logging.debug(d)
         json.dump(d, fp_graph, indent=2)
Exemplo n.º 7
0
    def scrape(self):
        """
        This function converts captured images into text
        """
        item_images = self.capture()
        for entry in item_images:
            key = entry[0]
            image = entry[1]
            _type = self.allItems[key]["type"]
            #_hash = hashlib.md5(image.tobytes()).hexdigest()
            _hash = str(imagehash.dhash(image))
            _map = self.allItems[key]["hashmap"]

            #If the hash of this image does not already exist use Tesseract OCR to give us a first guess
            if _hash not in _map:
                #Run each image through tesseract OCR to get a string representation of that image
                _map[_hash] = pytesseract.image_to_string(image)
            self.items[key] = _map[_hash]

            ###DEBUG
            image.save(self.config_home + "DEBUG\\" + str(key) + " " + (self.sanitize(self.items[key]) + " " + str(_hash)).replace("\\", "") + ".png")
            ###END DEBUG


            #Stop there if the type of the item is string but otherwise we have more parsing to do
            if _type != "String":
                self.items[key] = self.scenario.parse(self.items[key], _type)
            print key + "\t" + str(self.items[key])
Exemplo n.º 8
0
def hash_cards(db_session):
	my_images_path = "MTGimglib/set/M15" #put your image path here if you want to override current directory
	extension = "*.jpg"

	if not my_images_path:
	    path = os.getcwd() #get the current directory
	else:
	    path = my_images_path

	imgs = list() #load up an image list
	directory = os.path.join(path, extension)
	files = glob.glob(directory)

	for file in files:
		img = Image.open(file)
		imageG = img.convert('L')
		small = imageG.resize((9, 8), Image.ANTIALIAS)
		#img hash
		hashimg = imagehash.dhash(small)
		#change to number
		hstr = str(hashimg)
		num_of_bits = 64
		hashbin = bin(int(hstr, 16))[2:].zfill(num_of_bits) # backfills 0s 
		#get file name
		name_split = file.split('/')
		img_name = name_split[-1]
		name_ex = img_name.split('.')
		card_name = name_ex[0]
		print len(hashbin), img_name , hashbin

		# need to handle exceptions
		card_from_table = db_session.query(model.Card).filter(model.Card.imageName==card_name).first() # returning an object which is a row from the Card tabe
		card_from_table.hashId = hashbin # Make class attribute of card row hashbin 
		db_session.add(card_from_table)
	db_session.commit()
Exemplo n.º 9
0
def getImageHash(imagename):

    imageF = Image.open(imagename)
    h = str(imagehash.dhash(imageF, 12))
    if h == '000000000000000000000000000000000000':
        h = 'phash_'+str(imagehash.phash(imageF))
    return h
Exemplo n.º 10
0
def imageSearch(dataset, shlv, query):
    # open the shelve database
    db = shelve.open(shlv)

    # load the query image, compute the difference image hash, and
    # and grab the images from the database that have the same hash
    # value
    filenames=[]
    q = Image.open(query)
    h = str(imagehash.dhash(q))
    for ihash in db:
        hd = ham_dist(h, ihash)
        # print(hd)
        if hd < 12:
            filenames.append(str(db[ihash]).strip("[]'"))

    print ("Found %d images" % (len(filenames)))

    # loop over the images
    # for filename in filenames:
    #     image = Image.open(dataset + "/" + str(filename))
    #     image.show()
    return (filenames)    
    # close the shelve database
    db.close()

# Main test
# img = str("images")
# ds = str("patterns")
# q = str("sample2.jpg")
# imageSearch(img,ds,q)
Exemplo n.º 11
0
def get_image_metadata(config, request):
    ''' Handle request for an image. '''

    try:
        url = request.GET['url']
    except KeyError:
        raise aiohttp.web.HTTPBadRequest(reason='"url" argument is required.')

    tor_config = config['Tor']
    socks_proxy = SOCKSConnector(tor_config['ip'], int(tor_config['port']))
    response = yield from aiohttp.get(url, connector=socks_proxy)
    content_type = response.headers['Content-type']

    if not content_type.startswith('image/'):
        reason = 'Requested a non-image resource ({}).'.format(content_type)
        raise aiohttp.web.HTTPBadRequest(reason=reason)

    image_data = yield from response.read()
    image_file = io.BytesIO(image_data)
    image = Image.open(image_file)
    extra = dict()

    if content_type in ('image/jpeg', 'image/tiff'):
        for name, tag in exifread.process_file(io.BytesIO(image_data)).items():
            if name.startswith('Image') or name.startswith('MakerNote'):
                if isinstance(tag.values, (int, str)):
                    extra[name] = tag.values
                elif isinstance(tag.values, list):
                    if len(tag.values) > 0 and isinstance(tag.values[0], int):
                        extra[name] = tag.values
                    else:
                        extra[name] = ','.join(map(str,tag.values))
                else:
                    extra[name] = str(tag)

    metadata = {
        'content_type': content_type,
        'extra': extra,
        'format': image.format,
        'hashes': {
            'ahash': str(imagehash.average_hash(image)),
            'dhash': str(imagehash.dhash(image)),
            'md5': hashlib.md5(image_data).hexdigest(),
            'phash': str(imagehash.phash(image)),
            'sha1': hashlib.sha1(image_data).hexdigest(),
            'sha256': hashlib.sha256(image_data).hexdigest(),
        },
        'last_modified': response.headers.get('Last-modified', None),
        'resolution': {
            'width': image.width,
            'height': image.height,
        },
        'size': len(image_data),
    }

    return aiohttp.web.Response(
        headers={'Content-type': 'application/json; charset=utf8'},
        body=json.dumps(metadata).encode('utf8'),
    )
Exemplo n.º 12
0
    def run(self, task):
        image = str2image(task.get_file_data)

        self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image))
        self.results["imghash"]["p_hash"] = str(imagehash.phash(image))
        self.results["imghash"]["d_hash"] = str(imagehash.dhash(image))

        return self.results
Exemplo n.º 13
0
 def create_from_image(cls, img, url, facebook=None, okcupid=None):
     cls.objects.create(
             ahash = imagehash.average_hash(img),
             phash = imagehash.phash(img),
             dhash = imagehash.dhash(img),
             url = url,
             facebook = facebook,
             okcupid = okcupid)
Exemplo n.º 14
0
def hroom(autoquit=False):
    run_id = get_id()
    LOG = logging.getLogger('RUN_{}'.format(run_id))
    db = shelve.open('equations.dat')
    workdir = make_workdir(run_id)
    driver = start_driver()
    # cookies = driver.get_cookies()
    # s = requests.Session()
    # for cookie in cookies:
    #     s.cookies.set(cookie['name'], cookie['value'])

    GameCell.set_driver(driver)
    GameCell.set_run_id(run_id)
    game = start_game(driver)
    column, row = get_column_row_sums(game)
    cells = separate_cells(game)
    picture = Image.open(take_picture(driver, workdir))
    numbers = []
    for cell in cells.values():
        img = crop_buttons(cell, picture, workdir)
        dha = imagehash.dhash(img)
        numbers.append(db[dha.__str__()]['value'])

    column = [int(x) for x in column]
    row = [int(x) for x in row]
    numbers = [int(x) for x in numbers]

    click_on_me = robjects.r.get_right_indexes(column, row, numbers)
    forj = ', '.join(map(lambda x: "'{}'".format(int(x)), click_on_me))
    tutu = '''
    game = document.getElementById('game');
    cells = game.getElementsByClassName('tile-clickable');
    results = [{}]
    for (i = 0; i < cells.length; i++) {{
        if(results.includes(cells[i].getAttribute('data-index'))){{
            cells[i].click();
        }}
    }}

    subm = document.getElementById('game-submit');
    subm.click()
    '''.format(forj)
    driver.execute_script(tutu)
    # print(tutu)

    # result = set(click_on_me)
    # for key, value in cells.items():
    #     if key in result:
    #         value.click()
    #     else:
    #         value.unclick()
    #
    # subm_button = driver.find_element_by_id('game-submit')
    # subm_button.click()
    # if autoquit:
    #     driver.quit()
    return cells, game, driver
def getHash(img):
	normal = Image.open(img).convert('L')
	crop=normal.crop((25,37,195,150))
	ahash = str(imagehash.average_hash(crop))
        phash = str(imagehash.phash(crop))
	psimplehash = str(imagehash.phash_simple(crop))
	dhash = str(imagehash.dhash(crop))
	vertdhash = str(imagehash.dhash_vertical(crop))
	whash = str(imagehash.whash(crop))
	return ahash,phash,psimplehash,dhash,vertdhash,whash 
Exemplo n.º 16
0
    def get_image_hash(self,file):
    	if(not os.path.isfile(file)):
    		print file+"is not a file"
    		# sys.exit(0)
    	try:
    		img = Image.open(file)
	        h = str(imagehash.dhash(img))
	        return h
    	except Exception, e:
    		raise
Exemplo n.º 17
0
def hash_scan():
	img = Image.open("test_scan/test_inferno_fistcropped.png")
	imageG = img.convert('L')
	small = imageG.resize((9, 8), Image.ANTIALIAS)
	hashimg = imagehash.dhash(small) # hash img
	hstr = str(hashimg) # change to number
	num_of_bits = 64
	hashbin = bin(int(hstr, 16))[2:].zfill(num_of_bits)
	print hashbin
	return hashbin
Exemplo n.º 18
0
def resize(imageG):
    # Resize for comparison
    imageG.show() # shows cropped image
    small = imageG.resize((9, 8), Image.ANTIALIAS) 
    small.show() # shows resized image
    hashimg = imagehash.dhash(small) # hash img
    h = str(hashimg) # change to number
    num_of_bits = 64
    hashbin = bin(int(h, 16))[2:].zfill(num_of_bits) # convert to bytestring and fill in 0s
    print "this is hashbin", hashbin
    return hashbin
Exemplo n.º 19
0
def index(folder, type="jpg", reindex=False):
    for imagePath in glob.glob(folder + "/*."+type):
        filename = imagePath[imagePath.rfind("/") + 1:]
	image = Image.open(imagePath)
	h = str(imagehash.dhash(image))
        if db.has_key(h):
            print "Already there, copying to duplicates"
            image.save(DUPLICATE+"/"+filename)
        elif not reindex:
            image.save(UNIQUE+"/"+filename)
	db[h] = db.get(h, []) + [filename]
Exemplo n.º 20
0
def take_photo():
    # initialize the camera
    s, img = cam.read()
    if s:    # frame captured without any errors
        print "Took a photo"
        pil_im = Image.fromarray(img)

        if len(pastPhotos) > 1:
            image1_hash = list(str(imagehash.dhash(pil_im)))
            image2_hash = list(str(imagehash.dhash(pastPhotos[len(pastPhotos) - 1])))

            dist = spatial.distance.hamming(image1_hash, image2_hash)
            # print dist

            if dist > MIN_SUSPICIOUS_DIFF:
                print "Cockroach detected!"

        pastPhotos.append(pil_im)

    sleep(0.1)
    take_photo()
Exemplo n.º 21
0
def index_video(filename):
    video = ffvideo.VideoStream(filename)
    hashes = []
    for t in xrange(skip_first_seconds, int(video.duration), steps_every_seconds):
        # try:
        frame = video.get_frame_at_sec(t).image()
        frame_hash = str(imagehash.dhash(frame))
        hashes.append({'t': t, 'hash': frame_hash})
        # except:
        #     print "Error processing file."

    return hashes
Exemplo n.º 22
0
def getHash(img):
        size = 223,310
        normal = Image.open(img).convert('L')
        normal = normal.resize(size, Image.ANTIALIAS) 
        crop=normal.crop((25,37,195,150))
        ahash = str(imagehash.average_hash(crop))
        phash = str(imagehash.phash(crop))
        psimplehash = str(imagehash.phash_simple(crop))
        dhash = str(imagehash.dhash(crop))
        vertdhash = str(imagehash.dhash_vertical(crop))
        whash = str(imagehash.whash(crop))
        return ahash,phash,psimplehash,phash,vertdhash,whash
Exemplo n.º 23
0
def compute_avg_hash(image):
	"""
	This is a 'diff perceptual hash' - it gives us an image's unique signature in hex format

	The function is erroneously named - it used to be an 'avg perceptual hash' computation but we've changed that
	"""
	# small_image_bw = image.resize((8,8), Image.ANTIALIAS).convert("L")
	# pixels = list(small_image_bw.getdata())
	# avg = sum(pixels) / len(pixels)
	# bits = "".join(map(lambda pixel: '1' if pixel > avg else '0', pixels)) #turning the image into string of 0s and 1s
	# photo_hash = int(bits, 2).__format__('16x').upper()
	# return photo_hash
	return str(imagehash.dhash(image))
Exemplo n.º 24
0
def hash_value(img_fn, htype):
    img = Image.open(img_fn)
    if htype == 'a':
        hval = imagehash.average_hash(img)
    elif htype == 'p':
        hval = imagehash.phash(img)
    elif htype == 'd':
        hval = imagehash.dhash(img)
    elif htype == 'w':
        hval = imagehash.whash(img)
    else:
        hval = imagehash.average_hash(img)
    return hval
Exemplo n.º 25
0
def get_imagehashes(fp: Fileish,
                    size=FINGERPRINT_SIZE) -> Dict[str, imagehash.ImageHash]:
    """Calculate perceptual hashes for comparison of identical images"""
    try:
        img = pil_image(fp)
        thumb = img.resize((size, size), PIL.Image.BILINEAR).convert('L')
        return dict(
            ahash=imagehash.average_hash(thumb),
            phash=imagehash.phash(thumb),
            whash=imagehash.whash(thumb),
            dhash=imagehash.dhash(thumb),
        )
    except OSError:  # corrupt image file probably
        return {}
def findnearduplicates(
    dataset_path
):  # function to find near duplicates - uses perceptual hash from the imagehash module, returns both the duplicates list as well as the hashdict
    duplicatelist = []
    for imgfile in glob.glob(dataset_path + os.sep + "*.jpg"):
        filen, ext = os.path.splitext(imgfile)
        fileid = filen.split(os.sep)
        img = Image.open(imgfile)
        filehash = str(imagehash.dhash(img))
        if hashdict.has_key(filehash):
            duplicatelist.append(fileid)
        else:
            hashdict[filehash] = fileid
    for imgfile in glob.glob(dataset_path + os.sep + "*.png"):
        filen, ext = os.path.splitext(imgfile)
        fileid = filen.split(os.sep)
        img = Image.open(imgfile)
        filehash = str(imagehash.dhash(img))
        if hashdict.has_key(filehash):
            duplicatelist.append(fileid)
        else:
            hashdict[filehash] = fileid
    return duplicatelist, hashdict
Exemplo n.º 27
0
    def run(self, task):
        self.task = task
        image = str2image(task.get_file_data)

        # Calculate hash.
        self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image, hash_size=self.HASH_SIZE))
        self.results["imghash"]["p_hash"] = str(imagehash.phash(image, hash_size=self.HASH_SIZE))
        self.results["imghash"]["d_hash"] = str(imagehash.dhash(image, hash_size=self.HASH_SIZE))

        # Get similar images.
        self.results["similar"]["a_hash"] = self.get_similar_images(self.results["imghash"]["a_hash"], imagehash.average_hash)
        self.results["similar"]["p_hash"] = self.get_similar_images(self.results["imghash"]["p_hash"], imagehash.phash)
        self.results["similar"]["d_hash"] = self.get_similar_images(self.results["imghash"]["d_hash"], imagehash.dhash)

        return self.results
def run():
    signatures = get_pickled_signatures()

    import csv
    hashes_file = open('image_hashes.csv', 'w')
    columns = ['image_id', 'script_dhash', 'ahash', 'dhash', 'phash', 'signature']
    csv_writer = csv.DictWriter(hashes_file, fieldnames=columns)
    csv_writer.writeheader()

    t0 = time()

    for zip_counter in range(0, 10):
        filename = '../input/Images_%d.zip' % zip_counter
        print 'processing %s...' % filename

        imgzipfile = zipfile.ZipFile(filename)
        namelist = imgzipfile.namelist()

        for name in tqdm(namelist):
            if not name.endswith('.jpg'):
                continue
            filename = name.split('/')[-1]
            img_id = filename[:-4]
            try:
                imgdata = imgzipfile.read(name)

                if len(imgdata) == 0:
                    print '%s is empty' % img_id 
                    continue

                stream = io.BytesIO(imgdata)
                img = Image.open(stream)

                ahash = imagehash.average_hash(img)
                dhash = imagehash.dhash(img)
                phash = imagehash.phash(img)
                script_dhash = extract_dhash(img)

                csv_writer.writerow({'image_id': img_id, 'script_dhash': script_dhash, 
                                     'ahash': str(ahash), 'dhash': str(dhash), 'phash': str(phash),
                                     'signature': signatures[int(img_id)]})
            except:
                print 'error with ' + img_id

    hashes_file.flush()
    hashes_file.close()

    print 'took %0.5fm' % ((time() - t0) / 60)
Exemplo n.º 29
0
def get_image_hash(url):
    file_name = '/tmp/' + get_random_file_name()
    imgur = ImgurFetcher()
    myopener = MyOpener()

    if not url.startswith('http'):
        url = '//' + url
    parsed = urlparse(url)
    imgur_url = imgur.get_image_url(parsed.path[1:])

    try:
        myopener.retrieve(imgur_url, file_name)
        return str(imagehash.dhash(Image.open(file_name)))
    except:
        return None
    finally:
        os.remove(file_name)
Exemplo n.º 30
0
 def _insert_meta(self, data_store, comic_id):
     j = self._get_xkcd_json(comic_id)
     hash_avg = ''
     hash_d = ''
     hash_p = ''
     if not j:
         return
     if j.get('img'):
         file_name = '/tmp/' + get_random_file_name()
         try:
             self.myopener.retrieve(j.get('img'), file_name)
             hash_avg = imagehash.average_hash(Image.open(file_name))
             hash_d = imagehash.dhash(Image.open(file_name))
             hash_p = imagehash.phash(Image.open(file_name))
         except:
             pass
         finally:
             os.remove(file_name)
     data_store.insert_xkcd_meta(comic_id, json.dumps(j), str(hash_avg), str(hash_d), str(hash_p))
Exemplo n.º 31
0
#                 if (compare_images(original, possible_duplicate)==True):
#                     uni= uni+1
#
# print("COUNT: ",uni)
# print(UNI)

#############3 Many counter
Uni = []
Uni_hash = []
no = 0
no1 = 0
Uni.append("out/0_0.png")

image_file = Image.open("out/0_0.png")
# print (str(imagehash.dhash(image_file)))
Uni_hash.append(str(imagehash.dhash(image_file)))

for u in Uni:
    for i in range(37):
        print("i: ", i)
        for j in range(37):
            # print ("j: ",j)
            original = Image.open(u)
            pd = "out/" + str(j) + "_" + str(i) + ".png"
            possible_duplicate = Image.open(pd)
            if (compare_images(original, possible_duplicate) == True):
                pd_hash = (str(imagehash.dhash(possible_duplicate)))
                if pd_hash not in Uni_hash:
                    Uni.append(possible_duplicate)
                    print(Uni)
                    print("============= appended")
Exemplo n.º 32
0
def image_dhash(filepath):
    img = Image.open(filepath)
    hash_value = imagehash.dhash(img, hash_size=16)
    return str(hash_value)
import MySQLdb
from PIL import Image
import imagehash
import numpy
import sys
import json
option = sys.argv[1]

# db = MySQLdb.connect("host", "username", "password", "dbname")
db = MySQLdb.connect("localhost", "root", "biappanwar", "dejavu")
cursor = db.cursor()
hash = imagehash.dhash(Image.open(sys.argv[2]))

if option == "--recognize":
    getAllHashesQuery = "SELECT * FROM images"
    try:
        minDiff = 20
        similarFound = False
        similarImage = {}
        cursor.execute(getAllHashesQuery)
        result = cursor.fetchall()
        for row in result:
            hashDiff = imagehash.hex_to_hash(row[2]) - hash
            if (hashDiff <= minDiff):
                minDiff = hashDiff
                similarFound = True
                similarImage = row
        if (similarFound):
            res = {
                'image_id': int(similarImage[0]),
                'image_name': similarImage[1],
Exemplo n.º 34
0
def calc_image_hash(img_path):
    with Image.open(img_path) as img:
        img_hash = imagehash.dhash(img)
        return img_hash
Exemplo n.º 35
0
 def hash_image(cls, filename, size=64):
     image = Image.open(filename)
     return imagehash.dhash(image, size)
Exemplo n.º 36
0
def img_hash(fn):
    img = Image.open(fn)
    return str(imagehash.dhash(img)) + str(imagehash.phash(img))
Exemplo n.º 37
0
def frame2dhash(frame):
    img = Image.fromarray(np.uint8(frame))
    return imagehash.dhash(img)
Exemplo n.º 38
0
)
ex = img._getexif()
if ex != None:
    for (k, v) in img._getexif().items():
        print(ExifTags.TAGS.get(k), v)

# duplicate images

images = glob.glob('./data/images_sample/6812098/**.jpg'
                   )  # just comparing two folders for demo
images += glob.glob('./data/images_sample/6812035/**.jpg')

for im in range(100):
    im1 = random.choice(images)
    im2 = random.choice(images)
    h1 = imagehash.dhash(Image.open(im1))
    h2 = imagehash.dhash(Image.open(im2))
    feature = h1 - h2
    if feature < 7 and im1 != im2:
        print(feature, im1, im2)
        imgx = np.concatenate((Image.open(im1).resize(
            (400, 400), Image.ANTIALIAS), Image.open(im2).resize(
                (400, 400), Image.ANTIALIAS)),
                              axis=1)
        plt.imshow(imgx)
        plt.axis('off')
        break

# feature engineering

start_time = time.time()
Exemplo n.º 39
0
def remove_duplicates(
    images_dir: str,
    annotations_dir: str = None,
    duplicates_dir: str = None,
) -> List[str]:
    """
    TODO
    """

    # create the duplicates directory in case it doesn't yet exist
    if duplicates_dir is not None:
        os.makedirs(duplicates_dir, exist_ok=True)

    # build a set of image hashes and a list of IDs that are duplicates
    _logger.info("Building image hashes and identifying duplicates...")
    image_hashes = set()
    duplicate_ids = []
    for image_file_name in tqdm(os.listdir(images_dir)):

        # only process JPG images
        if not image_file_name.endswith(".jpg"):
            continue

        # get a hash of the image and add the image ID to the list of duplicates
        # if it's already been added, otherwise add it to the set of hashes
        image = Image.open(os.path.join(images_dir, image_file_name))
        image_id = os.path.splitext(image_file_name)[0]
        image_hash = imagehash.dhash(image)
        if image_hash in image_hashes:
            duplicate_ids.append(image_id)
        else:
            image_hashes.add(image_hash)
    _logger.info("Done")

    # move or remove the duplicates
    _logger.info("Moving/removing duplicate images...")
    duplicate_files = []
    for duplicate_id in tqdm(duplicate_ids):

        image_file_name = duplicate_id + ".jpg"
        image_path = os.path.join(images_dir, image_file_name)
        duplicate_files.append(image_path)

        # either move or delete the image file
        if duplicates_dir is None:
            os.remove(image_path)
        else:
            shutil.move(image_path,
                        os.path.join(duplicates_dir, image_file_name))
    _logger.info("Done")

    # move/remove corresponding annotations, if specified
    if annotations_dir is not None:
        _logger.info("Moving/removing corresponding duplicate annotations...")
        for annotation_file_name in tqdm(os.listdir(annotations_dir)):
            if os.path.splitext(annotation_file_name)[0] in duplicate_ids:
                annotation_path = os.path.join(annotations_dir,
                                               annotation_file_name)
                if duplicates_dir is None:
                    os.remove(annotation_path)
                else:
                    shutil.move(
                        annotation_path,
                        os.path.join(duplicates_dir, annotation_file_name),
                    )
        _logger.info("Done")

    return duplicate_files
Exemplo n.º 40
0
def get_image_hash(image):
    image = Image.fromarray(image, 'RGB')
    return str(imagehash.dhash(image))
Exemplo n.º 41
0
# skapa filen
try:
    file = open("dHash.txt", 'w')
    fileFel = open("fel2.txt", 'w')
    nollVarden = open("nollVarden2.txt", 'w')
    hashVarde = open("hashVarde2.txt", 'w')
    dubblett = open("dHash_dubbletter.txt", 'w')
except IOError:
    print("Kunde inte skapa filen {} eller fel filen {}".format(file, fileFel))
start = time.time()
i = 0

for p in pathTillBilderna:
    image = Image.open(p)
    h = str(imagehash.dhash(image))
    hashen = imagehash.dhash(image)

    filename = p[p.rfind("/") + 1:]
    file.write("{};{}\n".format(p, h))

    l = allaBilder.get(hashen, [])
    l.append(p)
    allaBilder[hashen] = l

    # skriver alla identiska kopior till filen.
    if len(l) > 1:
        dubblett.write("{};{}\n".format(l, hashen))
    else:
        # vi sätter ett has-värde på alla bilder som inte är dubletter
        # det kan ju komma in dubletter i framtiden.
Exemplo n.º 42
0
def getImageHash(img):
    io = Image.open(img)
    hash1 = imagehash.average_hash(io)
    hash2 = imagehash.phash(io)
    hash3 = imagehash.dhash(io)
    return hash1, hash2, hash3
Exemplo n.º 43
0
def dhash(imageA, imageB):
    hashA = imagehash.dhash(imageA)
    hashB = imagehash.dhash(imageB)
    return hashA - hashB
Exemplo n.º 44
0
def dcm_hash(patient_id):
    dcm_data = pydicom.read_file(f'{TRAIN_DIR}/{patient_id}.dcm')
    img = Image.fromarray(dcm_data.pixel_array)
    return str(imagehash.dhash(img)) + str(imagehash.phash(img))
Exemplo n.º 45
0
def dhash(file1, file2):
    fake_hash1 = imagehash.dhash(ImageOps.grayscale(Image.open(file1)))
    fake_hash2 = imagehash.dhash(ImageOps.grayscale(Image.open(file2)))
    print("fake_hash1", fake_hash1)
    print("fake_hash2", fake_hash2)
    print("dhash diff text", fake_hash1 - fake_hash2)
Exemplo n.º 46
0
# time the hashing operation
start = time.time()

counter = 0
for f in haystackPaths:

    image_orig = Image.open(f)
    image = extract.foregroundExtract(f)  # check
    image = Image.fromarray(image)  # check
    filename = os.path.basename(f)
    image.save("./imagebooks2/" + filename)
    #     imageHash = imagehash.phash(image)
    p = imagehash.phash(image, hash_size=32)
    a = imagehash.average_hash(image, hash_size=32)
    d = imagehash.dhash(image, hash_size=32)
    w = imagehash.whash(image, hash_size=32)

    haystack = haystack.append(
        {
            'file': f,
            'phash': p,
            'ahash': a,
            'dhash': d,
            'whash': w
        },
        ignore_index=True)

    counter += 1
    print("Completed", counter, f)
Exemplo n.º 47
0
import PIL.ImageOps
import imagehash
import subprocess
import os
import hashlib

images3 = subprocess.Popen('ls data/6/*.jpg',
                           stdout=subprocess.PIPE,
                           shell=True).communicate()[0].strip().split('\n')
#images6 = subprocess.Popen('ls data/6/*.jpg', stdout=subprocess.PIPE, shell=True).communicate()[0].strip().split('\n')
hash3 = {}
#hash6 = []

for i in images3:
    im = Image.open(i)
    h = imagehash.dhash(im)
    im.close()
    hash3[i] = int(str(h), 16)

v = hash3.values()
s = 0.0
for i in v:
    s += i

s = s / len(v)

a = v[0]
m = abs(v[0] - s)
for i in v:
    if abs(i - s) > m and i != 17632265907812790788:
        m = abs(i - s)
Exemplo n.º 48
0
def hash_image(image, algorithm=None):
    """
    Hashes a given image

    image: Can be an URL, a path, a base64 encoded string or a PIL.Image.Image instance

    Erina Project — 2020\n
    © Anime no Sekai
    """
    result = None
    has_url = False
    url = None

    log("ErinaHash", "Hashing an image...")
    # Needs to be a PIL instance
    if isfile(str(image)):
        image = Image.open(image)
    elif isinstance(image, Image.Image):
        image = image
    else:
        try:
            if base64.b64decode(str(image), validate=True):
                image = Image.open(BytesIO(base64.b64decode(str(image))))
            else:
                raise ValueError("b64decode returned an empty string")
        except:
            try:
                url = image
                image = Image.open(
                    BytesIO(requests.get(str(image)).content)
                )  # Open the downloaded image as a PIL Image instance
                has_url = True
            except:
                return HashingError(
                    "INVALID_IMAGE_TYPE",
                    "We couldn't convert the given image to a PIL.Image.Image instance"
                )

    if algorithm is None:
        algorithm = str(config.Hash.algorithm)

    algorithm = str(algorithm).lower().replace(" ", "")
    if algorithm in ['ahash', 'a', 'averagehash', 'average']:
        result = imagehash.average_hash(image)
    elif algorithm in ['chash', 'c']:
        result = imagehash.colorhash(image)
    elif algorithm in ['dhash', 'd']:
        result = imagehash.dhash(image)
    elif algorithm in ['phash', 'p', 'perceptual', 'perceptualhash']:
        result = imagehash.phash(image)
    elif algorithm in ['wHash', 'w']:
        result = imagehash.whash(image)
    else:
        algorithm = algorithm.replace("_", "")
        if algorithm in [
                'dhashvertical', 'dvertical', 'dvert', 'verticald',
                'verticaldhash'
        ]:
            result = imagehash.dhash_vertical(image)
        elif algorithm in [
                'phashsimple', 'psimple', 'perceptualsimple',
                'simpleperceptual', 'simplep', 'simplephash',
                'simpleperceptualhas'
        ]:
            result = imagehash.phash_simple(image)
        else:
            return HashingError(
                "INVALID_ALGORITHM",
                "We couldn't determine the hashing algorithm you wanted to use."
            )

    if has_url:
        return HashObject(result, image, url)
    else:
        return HashObject(result, image)
Exemplo n.º 49
0
    def validate(self, data):
        # This is abnormally long since we're not verifying actual user input
        # we're analyzing the image or url provided and then having to do
        # the validation on the populated parameters
        # Please note this is run after all other field validators
        # http://stackoverflow.com/questions/27591574/
        # order-of-serializer-validation-in-django-rest-framework
        request = self.context.get('request')
        if request is not None:
            object_uuid = request.query_params.get('random', None)
            editor = request.query_params.get('editor', 'false').lower()
        else:
            object_uuid = None
            editor = 'false'
        verify_unique = self.context.get('verify_unique', False)
        check_hamming = self.context.get('check_hamming', False)
        file_object = data.get('file_object')

        if object_uuid is not None:
            serializers.UUIDField().run_validators(object_uuid)
            query = 'MATCH (a:SBObject {object_uuid: "%s"}) ' \
                    'RETURN a' % object_uuid
            res, _ = db.cypher_query(query)
            if res.one:
                raise ValidationError("ID must be unique.")
            data['object_uuid'] = object_uuid
        if file_object is None:
            # For cropping unless we want to move the processing into the
            # validator
            file_object = self.context.get('file_object', None)

        folder = self.context.get('folder',
                                  settings.AWS_PROFILE_PICTURE_FOLDER_NAME)
        url = data.get('url')

        if file_object and url:
            raise ValidationError("Cannot process both a URL and a "
                                  "File at the same time")
        try:
            file_size, file_format, file_object = get_file_info(
                file_object, url)
        except (ValueError, urllib2.HTTPError, urllib2.URLError):
            raise ValidationError("Invalid URL")
        image_uuid = str(uuid1())
        try:
            data['width'], data['height'], file_name, image = get_image_data(
                image_uuid, file_object)
        except IOError:
            raise ValidationError("You've uploaded an invalid file type. "
                                  "Valid types are jpeg, jpg, and png")
        if self.context.get('file_name', None) is not None:
            file_name = self.context.get('file_name')
        if editor != 'true':
            if data['width'] < 100:
                raise ValidationError("Must be at least 100 pixels wide")
            if data['height'] < 100:
                raise ValidationError("Must be at least 100 pixels tall")
        if file_size > settings.ALLOWED_IMAGE_SIZE:
            raise ValidationError(
                "Your file cannot be larger than 20mb. Please select "
                "a smaller file.")
        if file_format not in settings.ALLOWED_IMAGE_FORMATS:
            raise serializers.ValidationError(
                'You have provided an invalid file type. '
                'The valid file types are gif, jpeg, jpg, and png')

        data['url'] = check_sagebrew_url(url, folder, file_name, file_object)
        data['image_hash'] = str(imagehash.dhash(image))
        if verify_unique:
            query = 'MATCH (upload:UploadedObject) ' \
                    'WHERE upload.image_hash="%s" ' \
                    'RETURN true' % data['image_hash']
            res, _ = db.cypher_query(query)
            if res.one:
                raise ValidationError("Image must be unique")
        if check_hamming:
            verify_hamming_distance(data['image_hash'],
                                    check_hamming.get('distance', 11),
                                    check_hamming.get('time_frame'))
        data['file_format'] = file_format
        data['file_size'] = file_size
        return data
Exemplo n.º 50
0
import sys
'''
Requirements:
    pip intsall pillow
    pip install ImageHash
Hash mode:
    average hashing (aHash)
    perception hashing (pHash)
    difference hashing (dHash)
    wavelet hashing (wHash)
'''
from PIL import Image
import imagehash

image = sys.argv[1]
mode = int(sys.argv[2])
if mode == 1:
    hash = imagehash.average_hash(Image.open(image))
elif mode == 2:
    hash = imagehash.phash(Image.open(image))
elif mode == 3:
    hash = imagehash.dhash(Image.open(image))
elif mode == 4:
    hash = imagehash.whash(Image.open(image))
else:
    hash = imagehash.average_hash(Image.open(image))
print(hash)
Exemplo n.º 51
0
    def query_image(self, image):
        """ image_hash = hashing_func(processing_func(image))
            find most similar images by hamming distance 
        
        Parameters:
        -query image
        
        Output:
        -final_matches: paths to the similar images
        """

        matches = []
        final_matches = []
        hash_differences = []
        phash_differences = []

        query = np.array(image)
        query_h = imagehash.dhash(Image.fromarray(query))
        query_ph = imagehash.phash(Image.fromarray(query))

        # generate hash differences
        for i in range(len(self.hashes)):

            diff = query_h - self.hashes[i]
            hash_differences.append(diff)

        # use k means to find threshold for similarity cutoff
        kmeans = KMeans(n_clusters=2).fit(
            np.array(hash_differences).reshape(-1, 1))
        centers = sorted(kmeans.cluster_centers_.flatten())
        threshold = np.mean(centers)

        for i in range(len(hash_differences)):
            if hash_differences[i] < threshold:
                matches.append(i)

        # if no match, output this message
        if not matches:
            pass
        else:
            # for images that fall within dhash threshold, try nesting phash or Dice, Jaccard, Mutual information
            for j in matches:

                #if image.endswith('.dcm'):
                #    ds = pydicom.dcmread(matches[j])
                #    im = Image.fromarray(ds.pixel_array)

                ph = self.phashes[j]
                diff = query_ph - ph
                phash_differences.append(diff)

                if diff < 10:
                    final_matches.append({
                        "name": self.paths[j],
                        "similarity": diff / MAX_DIFF,
                        "response": 1,
                    })

        # skip this for now
        #if not final_matches:
        #    final_matches= matches

        # final output: paths to the images
        return final_matches
Exemplo n.º 52
0
from PIL import Image
import imagehash
import argparse
import shelve
import glob


ap = argparse.ArgumentParser()
ap.add_argument("-d","--dataset",required=True,
                help="path to input datasets")
ap.add_argument("-s","--shelve",required=True,
                help="output shelve database")
args = vars(ap.parse_args())

db = shelve.open(args["shelve"], writeback=True)

for imagePath in glob.glob(args["dataset"]+"/*.jp*g"):
    print(imagePath)
    image = Image.open(imagePath)
    h = str(imagehash.dhash(image))
    filename = imagePath[imagePath.rfind("/")+1:]
    db[h] = db.get(h,[]) + [filename]

for key in db.keys():
    if len(db[key]) > 1:
        print(db[key])
db.close()
Exemplo n.º 53
0
def process_image(image_id, full_url, thumb_url, source_url):
    import requests
    import orm
    import boto
    import gcs_oauth2_boto_plugin
    import tempfile
    import mimetypes
    import conf
    from PIL import Image as pimage
    from PIL import ImageFile
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    import imagehash
    from hashtest import hash_image

    session = orm.Session()

    gcs_oauth2_boto_plugin.SetFallbackClientIdAndSecret(
        conf.client_id, conf.client_secret)

    fullbucket = boto.storage_uri(conf.fullbucket, 'gs').get_bucket()
    thumbbucket = boto.storage_uri(conf.thumbbucket, 'gs').get_bucket()

    # Fetch images
    print "%d: Starting" % image_id
    response = requests.get(source_url, stream=True)
    if not response.status_code == 200:
        session.query(orm.Image).filter(orm.Image.id == image_id).update(
            {'fetched': -1})
        session.commit()
        return

    fulltemp = tempfile.NamedTemporaryFile()
    thumbtemp = tempfile.NamedTemporaryFile()

    for block in response.iter_content(4096):
        fulltemp.write(block)
    fulltemp.seek(0)

    himg = pimage.open(fulltemp)
    ahash, phash, dhash = imagehash.average_hash(himg), imagehash.phash(
        himg), imagehash.dhash(himg)
    ahash, phash, dhash = int(str(ahash),
                              base=16), int(str(phash),
                                            base=16), int(str(dhash), base=16)

    # Save images, make thumb
    himg.thumbnail((640, 640))
    himg.convert("RGB").save(thumbtemp, format='WebP')

    del himg

    if ahash >= 2**63:
        ahash -= 2**64

    if phash >= 2**63:
        phash -= 2**64

    if dhash >= 2**63:
        dhash -= 2**64

    # Upload
    fulltemp.seek(0)
    thumbtemp.seek(0)

    fullkey = fullbucket.new_key(full_url.split('/')[-1])
    thumbkey = thumbbucket.new_key(thumb_url.split('/')[-1])

    meta = {
        'Cache-Control': 'public, max-age=3600',
        'Content-Type': response.headers['content-type'],
    }

    fullkey.set_contents_from_file(fulltemp, headers=meta)
    print "%d: Uploaded full" % image_id

    meta['Content-Type'] = 'image/webp'
    thumbkey.set_contents_from_file(thumbtemp, headers=meta)
    print "%d: Uploaded thumb" % image_id

    try:
        bmbhash = hash_image(fulltemp.name)
        session.add(orm.Hash(name=u'bmbhash', value=bmbhash,
                             image_id=image_id))
    except:
        pass

    session.add(orm.Hash(name=u'ahash', value=ahash, image_id=image_id))
    session.add(orm.Hash(name=u'phash', value=phash, image_id=image_id))
    session.add(orm.Hash(name=u'dhash', value=dhash, image_id=image_id))
    session.query(orm.Image).filter(orm.Image.id == image_id).update({
        'fetched':
        1,
        'size':
        int(response.headers['content-length'])
    })
    session.commit()
    fulltemp.close()
    thumbtemp.close()
Exemplo n.º 54
0
def hash_image(image_file: str) -> str:
    image = Image.open(image_file)
    image_hash = str(imagehash.dhash(image))
    return image_hash
def ImageHash(path):
    image = Image.open(path)
    h = str(imagehash.dhash(image))
    return h
Exemplo n.º 56
0
from PIL import Image,ImageFile
import imagehash,os
from glob import glob
#サイズの大きな画像をスキップしない
ImageFile.LOAD_TRUNCATED_IMAGES = True

hash_a = imagehash.dhash(Image.open('./a_resize_cut.png'))
hash_b = imagehash.dhash(Image.open('./b_resize_cut.png'))
print('hash_a=')
print(hash_a)
print('hash_b=')
print(hash_b)

Exemplo n.º 57
0
 def get_image_hash(self, file):
     img = Image.open(file)
     h = str(imagehash.dhash(img))
     return h
Exemplo n.º 58
0
        self.fontType = fontType
        self.fontFilePath = fontFilePath
        img = Image.open(fontFilePath)
        self.aHash = str(imagehash.average_hash(img))
        self.dHash = str(imagehash.dhash(img))
        self.pHash = str(imagehash.phash(img))
        self.wHash = str(imagehash.whash(img))


list = []

for fontType in [x for x in os.listdir(rootPah)]:
    fontTypeList = []
    for item in os.listdir(os.path.join(rootPah, fontType)):
        img = Image.open(os.path.join(rootPah, fontType, item))
        font = {
            'fontType': fontType,
            'fontFilePath': os.path.join(rootPah, fontType, item),
            'aHash': str(imagehash.average_hash(img)),
            'dHash': str(imagehash.dhash(img)),
            'pHash': str(imagehash.phash(img)),
            'wHash': str(imagehash.whash(img)),
        }
        fontTypeList.append(font)

    list.append(fontTypeList)

fp = open('obj.json', 'w')
json.dump(list, fp)
fp.close()
Exemplo n.º 59
0
	def test_dhash(self):
		result_hash = imagehash.dhash(self.image)
		known_hash = "0026273b2b19550e"
		self.assertEqual(str(result_hash), known_hash)
Exemplo n.º 60
0
 def dh(self):
     """
     #http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html
     :return:
     """
     return imagehash.dhash(self.pil)