def compare_imgs(img, truth_filename, do_assert=True): """ PROTIP: run the following to re-generate the test images: REGENERATE_TEST_IMAGES=1 pytest mujoco_py/tests/test_modder.py Note: do this in Docker so that images will work for testing. """ assert isinstance(truth_filename, str) truth_filename = join(TEST_ASSET_DIR_PATH, truth_filename) if os.getenv('REGENERATE_TEST_IMAGES'): if exists(truth_filename): pre_path, ext = splitext(truth_filename) backup_path = "%s_old%s" % (pre_path, ext) move(truth_filename, backup_path) save_test_image(truth_filename, img) return 0 true_img = np.asarray(Image.open(truth_filename)) assert img.shape == true_img.shape hash0 = imagehash.dhash(Image.fromarray(img)) hash1 = imagehash.dhash(Image.fromarray(true_img)) diff = np.sum(hash0.hash != hash1.hash) if diff != 0: # If the assert fails, the best way to investigate is to run # pytest for the particular test. For example, # # pytest -k test_something_something path/to/test.py save_test_image("/tmp/img.png", img) save_test_image("/tmp/true_img.png", true_img) save_test_image("/tmp/diff_img.png", img - true_img) if do_assert: assert diff <= 1 return diff
def image_descriptor(image_path, prior=None): mtime = os.path.getmtime(image_path) ctime = os.path.getctime(image_path) if not prior or (not prior.get('modified')): img = Image.open(image_path) result = {'width': img.size[0], 'height': img.size[1], 'created': mtime, 'modified': ctime, # TODO: if results too bad, change hash sizes for more precission? 'aHash': str(imagehash.average_hash(img)), 'pHash': str(imagehash.phash(img)), 'dHash': str(imagehash.dhash(img)), } return result changed = prior['modified'] < mtime img = Image.open(image_path) if changed or not prior["width"]: prior["width"] = img.size[0] if changed or not prior["height"]: prior["height"] = img.size[1] if changed or not prior["aHash"]: prior["aHash"] = str(imagehash.average_hash(img)) if changed or not prior["pHash"]: prior["pHash"] = str(imagehash.phash(img)) if changed or not prior["dHash"]: prior["dHash"] = str(imagehash.dhash(img)) return prior
def color_dhash(imagepath): im = Image.open(imagepath) npim = np.asarray(im) imr = Image.fromarray(npim[:,:,0]) img = Image.fromarray(npim[:,:,1]) imb = Image.fromarray(npim[:,:,2]) hashr = bin(int(str(imagehash.dhash(imr)), 16))[2:] hashg = bin(int(str(imagehash.dhash(img)), 16))[2:] hashb = bin(int(str(imagehash.dhash(imb)), 16))[2:] gapr = 64 - len(hashr) gapg = 64 - len(hashg) gapb = 64 - len(hashb) hashrf = ''.join(['0' for i in range(gapr)]) hashgf = ''.join(['0' for i in range(gapg)]) hashbf = ''.join(['0' for i in range(gapb)]) hashrf += hashr hashgf += hashg hashbf += hashb im.close() return str(hex(int(hashrf + hashgf + hashbf, 2)).split('0x')[1].split('L')[0])
def __init__(self,res1,res2): print "Inside" print res1,res2 urllib.urlretrieve(res1, "data1") print "Image 1 downloaded" urllib.urlretrieve(res2, "data2") print "Image 2 downloaded" self.hash = imagehash.dhash(Image.open("data1")) print "Hashing first image" self.otherhash = imagehash.dhash(Image.open("data2")) print "Hashing second image"
def keep_dissimilar(image1, image2, threshold): # takes 2 PIL Image objects # threshold is the max acceptable Hamming distance # returns image2 only if dissimilar enough image1_hash = list(str(imagehash.dhash(image1))) image2_hash = list(str(imagehash.dhash(image2))) print image1_hash print image2_hash dist = spatial.distance.hamming( image1_hash, image2_hash) print dist if dist >= threshold: return image2 else: return False
def run(self): files = sorted(os.listdir('data/%s/media' % self.date_path)) hashes = {} matches = [] g = nx.Graph() for i in range(len(files)): f = files[i] fn = 'data/%s/media/%s' % (self.date_path, f) ahash = imagehash.average_hash(Image.open(fn)) dhash = imagehash.dhash(Image.open(fn)) phash = imagehash.phash(Image.open(fn)) hashes[f] = {'ahash': ahash, 'dhash': dhash, 'phash': phash} for j in range(0, i): f2name = files[j] f2 = hashes[f2name] sumhash = sum([ahash - f2['ahash'], dhash - f2['dhash'], phash - f2['phash']]) if sumhash <= 40: matches.append([f, files[j], ahash - f2['ahash'], dhash - f2['dhash'], phash - f2['phash'], sumhash]) g.add_edge(f, f2name) with self.output().open('w') as fp_graph: components = list(nx.connected_components(g)) # Note: sets are not JSON serializable d = [] for s in components: d.append(list(s)) logging.debug(' - = - = - = GRAPH HERE - = - = - = -') logging.debug(d) json.dump(d, fp_graph, indent=2)
def scrape(self): """ This function converts captured images into text """ item_images = self.capture() for entry in item_images: key = entry[0] image = entry[1] _type = self.allItems[key]["type"] #_hash = hashlib.md5(image.tobytes()).hexdigest() _hash = str(imagehash.dhash(image)) _map = self.allItems[key]["hashmap"] #If the hash of this image does not already exist use Tesseract OCR to give us a first guess if _hash not in _map: #Run each image through tesseract OCR to get a string representation of that image _map[_hash] = pytesseract.image_to_string(image) self.items[key] = _map[_hash] ###DEBUG image.save(self.config_home + "DEBUG\\" + str(key) + " " + (self.sanitize(self.items[key]) + " " + str(_hash)).replace("\\", "") + ".png") ###END DEBUG #Stop there if the type of the item is string but otherwise we have more parsing to do if _type != "String": self.items[key] = self.scenario.parse(self.items[key], _type) print key + "\t" + str(self.items[key])
def hash_cards(db_session): my_images_path = "MTGimglib/set/M15" #put your image path here if you want to override current directory extension = "*.jpg" if not my_images_path: path = os.getcwd() #get the current directory else: path = my_images_path imgs = list() #load up an image list directory = os.path.join(path, extension) files = glob.glob(directory) for file in files: img = Image.open(file) imageG = img.convert('L') small = imageG.resize((9, 8), Image.ANTIALIAS) #img hash hashimg = imagehash.dhash(small) #change to number hstr = str(hashimg) num_of_bits = 64 hashbin = bin(int(hstr, 16))[2:].zfill(num_of_bits) # backfills 0s #get file name name_split = file.split('/') img_name = name_split[-1] name_ex = img_name.split('.') card_name = name_ex[0] print len(hashbin), img_name , hashbin # need to handle exceptions card_from_table = db_session.query(model.Card).filter(model.Card.imageName==card_name).first() # returning an object which is a row from the Card tabe card_from_table.hashId = hashbin # Make class attribute of card row hashbin db_session.add(card_from_table) db_session.commit()
def getImageHash(imagename): imageF = Image.open(imagename) h = str(imagehash.dhash(imageF, 12)) if h == '000000000000000000000000000000000000': h = 'phash_'+str(imagehash.phash(imageF)) return h
def imageSearch(dataset, shlv, query): # open the shelve database db = shelve.open(shlv) # load the query image, compute the difference image hash, and # and grab the images from the database that have the same hash # value filenames=[] q = Image.open(query) h = str(imagehash.dhash(q)) for ihash in db: hd = ham_dist(h, ihash) # print(hd) if hd < 12: filenames.append(str(db[ihash]).strip("[]'")) print ("Found %d images" % (len(filenames))) # loop over the images # for filename in filenames: # image = Image.open(dataset + "/" + str(filename)) # image.show() return (filenames) # close the shelve database db.close() # Main test # img = str("images") # ds = str("patterns") # q = str("sample2.jpg") # imageSearch(img,ds,q)
def get_image_metadata(config, request): ''' Handle request for an image. ''' try: url = request.GET['url'] except KeyError: raise aiohttp.web.HTTPBadRequest(reason='"url" argument is required.') tor_config = config['Tor'] socks_proxy = SOCKSConnector(tor_config['ip'], int(tor_config['port'])) response = yield from aiohttp.get(url, connector=socks_proxy) content_type = response.headers['Content-type'] if not content_type.startswith('image/'): reason = 'Requested a non-image resource ({}).'.format(content_type) raise aiohttp.web.HTTPBadRequest(reason=reason) image_data = yield from response.read() image_file = io.BytesIO(image_data) image = Image.open(image_file) extra = dict() if content_type in ('image/jpeg', 'image/tiff'): for name, tag in exifread.process_file(io.BytesIO(image_data)).items(): if name.startswith('Image') or name.startswith('MakerNote'): if isinstance(tag.values, (int, str)): extra[name] = tag.values elif isinstance(tag.values, list): if len(tag.values) > 0 and isinstance(tag.values[0], int): extra[name] = tag.values else: extra[name] = ','.join(map(str,tag.values)) else: extra[name] = str(tag) metadata = { 'content_type': content_type, 'extra': extra, 'format': image.format, 'hashes': { 'ahash': str(imagehash.average_hash(image)), 'dhash': str(imagehash.dhash(image)), 'md5': hashlib.md5(image_data).hexdigest(), 'phash': str(imagehash.phash(image)), 'sha1': hashlib.sha1(image_data).hexdigest(), 'sha256': hashlib.sha256(image_data).hexdigest(), }, 'last_modified': response.headers.get('Last-modified', None), 'resolution': { 'width': image.width, 'height': image.height, }, 'size': len(image_data), } return aiohttp.web.Response( headers={'Content-type': 'application/json; charset=utf8'}, body=json.dumps(metadata).encode('utf8'), )
def run(self, task): image = str2image(task.get_file_data) self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image)) self.results["imghash"]["p_hash"] = str(imagehash.phash(image)) self.results["imghash"]["d_hash"] = str(imagehash.dhash(image)) return self.results
def create_from_image(cls, img, url, facebook=None, okcupid=None): cls.objects.create( ahash = imagehash.average_hash(img), phash = imagehash.phash(img), dhash = imagehash.dhash(img), url = url, facebook = facebook, okcupid = okcupid)
def hroom(autoquit=False): run_id = get_id() LOG = logging.getLogger('RUN_{}'.format(run_id)) db = shelve.open('equations.dat') workdir = make_workdir(run_id) driver = start_driver() # cookies = driver.get_cookies() # s = requests.Session() # for cookie in cookies: # s.cookies.set(cookie['name'], cookie['value']) GameCell.set_driver(driver) GameCell.set_run_id(run_id) game = start_game(driver) column, row = get_column_row_sums(game) cells = separate_cells(game) picture = Image.open(take_picture(driver, workdir)) numbers = [] for cell in cells.values(): img = crop_buttons(cell, picture, workdir) dha = imagehash.dhash(img) numbers.append(db[dha.__str__()]['value']) column = [int(x) for x in column] row = [int(x) for x in row] numbers = [int(x) for x in numbers] click_on_me = robjects.r.get_right_indexes(column, row, numbers) forj = ', '.join(map(lambda x: "'{}'".format(int(x)), click_on_me)) tutu = ''' game = document.getElementById('game'); cells = game.getElementsByClassName('tile-clickable'); results = [{}] for (i = 0; i < cells.length; i++) {{ if(results.includes(cells[i].getAttribute('data-index'))){{ cells[i].click(); }} }} subm = document.getElementById('game-submit'); subm.click() '''.format(forj) driver.execute_script(tutu) # print(tutu) # result = set(click_on_me) # for key, value in cells.items(): # if key in result: # value.click() # else: # value.unclick() # # subm_button = driver.find_element_by_id('game-submit') # subm_button.click() # if autoquit: # driver.quit() return cells, game, driver
def getHash(img): normal = Image.open(img).convert('L') crop=normal.crop((25,37,195,150)) ahash = str(imagehash.average_hash(crop)) phash = str(imagehash.phash(crop)) psimplehash = str(imagehash.phash_simple(crop)) dhash = str(imagehash.dhash(crop)) vertdhash = str(imagehash.dhash_vertical(crop)) whash = str(imagehash.whash(crop)) return ahash,phash,psimplehash,dhash,vertdhash,whash
def get_image_hash(self,file): if(not os.path.isfile(file)): print file+"is not a file" # sys.exit(0) try: img = Image.open(file) h = str(imagehash.dhash(img)) return h except Exception, e: raise
def hash_scan(): img = Image.open("test_scan/test_inferno_fistcropped.png") imageG = img.convert('L') small = imageG.resize((9, 8), Image.ANTIALIAS) hashimg = imagehash.dhash(small) # hash img hstr = str(hashimg) # change to number num_of_bits = 64 hashbin = bin(int(hstr, 16))[2:].zfill(num_of_bits) print hashbin return hashbin
def resize(imageG): # Resize for comparison imageG.show() # shows cropped image small = imageG.resize((9, 8), Image.ANTIALIAS) small.show() # shows resized image hashimg = imagehash.dhash(small) # hash img h = str(hashimg) # change to number num_of_bits = 64 hashbin = bin(int(h, 16))[2:].zfill(num_of_bits) # convert to bytestring and fill in 0s print "this is hashbin", hashbin return hashbin
def index(folder, type="jpg", reindex=False): for imagePath in glob.glob(folder + "/*."+type): filename = imagePath[imagePath.rfind("/") + 1:] image = Image.open(imagePath) h = str(imagehash.dhash(image)) if db.has_key(h): print "Already there, copying to duplicates" image.save(DUPLICATE+"/"+filename) elif not reindex: image.save(UNIQUE+"/"+filename) db[h] = db.get(h, []) + [filename]
def take_photo(): # initialize the camera s, img = cam.read() if s: # frame captured without any errors print "Took a photo" pil_im = Image.fromarray(img) if len(pastPhotos) > 1: image1_hash = list(str(imagehash.dhash(pil_im))) image2_hash = list(str(imagehash.dhash(pastPhotos[len(pastPhotos) - 1]))) dist = spatial.distance.hamming(image1_hash, image2_hash) # print dist if dist > MIN_SUSPICIOUS_DIFF: print "Cockroach detected!" pastPhotos.append(pil_im) sleep(0.1) take_photo()
def index_video(filename): video = ffvideo.VideoStream(filename) hashes = [] for t in xrange(skip_first_seconds, int(video.duration), steps_every_seconds): # try: frame = video.get_frame_at_sec(t).image() frame_hash = str(imagehash.dhash(frame)) hashes.append({'t': t, 'hash': frame_hash}) # except: # print "Error processing file." return hashes
def getHash(img): size = 223,310 normal = Image.open(img).convert('L') normal = normal.resize(size, Image.ANTIALIAS) crop=normal.crop((25,37,195,150)) ahash = str(imagehash.average_hash(crop)) phash = str(imagehash.phash(crop)) psimplehash = str(imagehash.phash_simple(crop)) dhash = str(imagehash.dhash(crop)) vertdhash = str(imagehash.dhash_vertical(crop)) whash = str(imagehash.whash(crop)) return ahash,phash,psimplehash,phash,vertdhash,whash
def compute_avg_hash(image): """ This is a 'diff perceptual hash' - it gives us an image's unique signature in hex format The function is erroneously named - it used to be an 'avg perceptual hash' computation but we've changed that """ # small_image_bw = image.resize((8,8), Image.ANTIALIAS).convert("L") # pixels = list(small_image_bw.getdata()) # avg = sum(pixels) / len(pixels) # bits = "".join(map(lambda pixel: '1' if pixel > avg else '0', pixels)) #turning the image into string of 0s and 1s # photo_hash = int(bits, 2).__format__('16x').upper() # return photo_hash return str(imagehash.dhash(image))
def hash_value(img_fn, htype): img = Image.open(img_fn) if htype == 'a': hval = imagehash.average_hash(img) elif htype == 'p': hval = imagehash.phash(img) elif htype == 'd': hval = imagehash.dhash(img) elif htype == 'w': hval = imagehash.whash(img) else: hval = imagehash.average_hash(img) return hval
def get_imagehashes(fp: Fileish, size=FINGERPRINT_SIZE) -> Dict[str, imagehash.ImageHash]: """Calculate perceptual hashes for comparison of identical images""" try: img = pil_image(fp) thumb = img.resize((size, size), PIL.Image.BILINEAR).convert('L') return dict( ahash=imagehash.average_hash(thumb), phash=imagehash.phash(thumb), whash=imagehash.whash(thumb), dhash=imagehash.dhash(thumb), ) except OSError: # corrupt image file probably return {}
def findnearduplicates( dataset_path ): # function to find near duplicates - uses perceptual hash from the imagehash module, returns both the duplicates list as well as the hashdict duplicatelist = [] for imgfile in glob.glob(dataset_path + os.sep + "*.jpg"): filen, ext = os.path.splitext(imgfile) fileid = filen.split(os.sep) img = Image.open(imgfile) filehash = str(imagehash.dhash(img)) if hashdict.has_key(filehash): duplicatelist.append(fileid) else: hashdict[filehash] = fileid for imgfile in glob.glob(dataset_path + os.sep + "*.png"): filen, ext = os.path.splitext(imgfile) fileid = filen.split(os.sep) img = Image.open(imgfile) filehash = str(imagehash.dhash(img)) if hashdict.has_key(filehash): duplicatelist.append(fileid) else: hashdict[filehash] = fileid return duplicatelist, hashdict
def run(self, task): self.task = task image = str2image(task.get_file_data) # Calculate hash. self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image, hash_size=self.HASH_SIZE)) self.results["imghash"]["p_hash"] = str(imagehash.phash(image, hash_size=self.HASH_SIZE)) self.results["imghash"]["d_hash"] = str(imagehash.dhash(image, hash_size=self.HASH_SIZE)) # Get similar images. self.results["similar"]["a_hash"] = self.get_similar_images(self.results["imghash"]["a_hash"], imagehash.average_hash) self.results["similar"]["p_hash"] = self.get_similar_images(self.results["imghash"]["p_hash"], imagehash.phash) self.results["similar"]["d_hash"] = self.get_similar_images(self.results["imghash"]["d_hash"], imagehash.dhash) return self.results
def run(): signatures = get_pickled_signatures() import csv hashes_file = open('image_hashes.csv', 'w') columns = ['image_id', 'script_dhash', 'ahash', 'dhash', 'phash', 'signature'] csv_writer = csv.DictWriter(hashes_file, fieldnames=columns) csv_writer.writeheader() t0 = time() for zip_counter in range(0, 10): filename = '../input/Images_%d.zip' % zip_counter print 'processing %s...' % filename imgzipfile = zipfile.ZipFile(filename) namelist = imgzipfile.namelist() for name in tqdm(namelist): if not name.endswith('.jpg'): continue filename = name.split('/')[-1] img_id = filename[:-4] try: imgdata = imgzipfile.read(name) if len(imgdata) == 0: print '%s is empty' % img_id continue stream = io.BytesIO(imgdata) img = Image.open(stream) ahash = imagehash.average_hash(img) dhash = imagehash.dhash(img) phash = imagehash.phash(img) script_dhash = extract_dhash(img) csv_writer.writerow({'image_id': img_id, 'script_dhash': script_dhash, 'ahash': str(ahash), 'dhash': str(dhash), 'phash': str(phash), 'signature': signatures[int(img_id)]}) except: print 'error with ' + img_id hashes_file.flush() hashes_file.close() print 'took %0.5fm' % ((time() - t0) / 60)
def get_image_hash(url): file_name = '/tmp/' + get_random_file_name() imgur = ImgurFetcher() myopener = MyOpener() if not url.startswith('http'): url = '//' + url parsed = urlparse(url) imgur_url = imgur.get_image_url(parsed.path[1:]) try: myopener.retrieve(imgur_url, file_name) return str(imagehash.dhash(Image.open(file_name))) except: return None finally: os.remove(file_name)
def _insert_meta(self, data_store, comic_id): j = self._get_xkcd_json(comic_id) hash_avg = '' hash_d = '' hash_p = '' if not j: return if j.get('img'): file_name = '/tmp/' + get_random_file_name() try: self.myopener.retrieve(j.get('img'), file_name) hash_avg = imagehash.average_hash(Image.open(file_name)) hash_d = imagehash.dhash(Image.open(file_name)) hash_p = imagehash.phash(Image.open(file_name)) except: pass finally: os.remove(file_name) data_store.insert_xkcd_meta(comic_id, json.dumps(j), str(hash_avg), str(hash_d), str(hash_p))
# if (compare_images(original, possible_duplicate)==True): # uni= uni+1 # # print("COUNT: ",uni) # print(UNI) #############3 Many counter Uni = [] Uni_hash = [] no = 0 no1 = 0 Uni.append("out/0_0.png") image_file = Image.open("out/0_0.png") # print (str(imagehash.dhash(image_file))) Uni_hash.append(str(imagehash.dhash(image_file))) for u in Uni: for i in range(37): print("i: ", i) for j in range(37): # print ("j: ",j) original = Image.open(u) pd = "out/" + str(j) + "_" + str(i) + ".png" possible_duplicate = Image.open(pd) if (compare_images(original, possible_duplicate) == True): pd_hash = (str(imagehash.dhash(possible_duplicate))) if pd_hash not in Uni_hash: Uni.append(possible_duplicate) print(Uni) print("============= appended")
def image_dhash(filepath): img = Image.open(filepath) hash_value = imagehash.dhash(img, hash_size=16) return str(hash_value)
import MySQLdb from PIL import Image import imagehash import numpy import sys import json option = sys.argv[1] # db = MySQLdb.connect("host", "username", "password", "dbname") db = MySQLdb.connect("localhost", "root", "biappanwar", "dejavu") cursor = db.cursor() hash = imagehash.dhash(Image.open(sys.argv[2])) if option == "--recognize": getAllHashesQuery = "SELECT * FROM images" try: minDiff = 20 similarFound = False similarImage = {} cursor.execute(getAllHashesQuery) result = cursor.fetchall() for row in result: hashDiff = imagehash.hex_to_hash(row[2]) - hash if (hashDiff <= minDiff): minDiff = hashDiff similarFound = True similarImage = row if (similarFound): res = { 'image_id': int(similarImage[0]), 'image_name': similarImage[1],
def calc_image_hash(img_path): with Image.open(img_path) as img: img_hash = imagehash.dhash(img) return img_hash
def hash_image(cls, filename, size=64): image = Image.open(filename) return imagehash.dhash(image, size)
def img_hash(fn): img = Image.open(fn) return str(imagehash.dhash(img)) + str(imagehash.phash(img))
def frame2dhash(frame): img = Image.fromarray(np.uint8(frame)) return imagehash.dhash(img)
) ex = img._getexif() if ex != None: for (k, v) in img._getexif().items(): print(ExifTags.TAGS.get(k), v) # duplicate images images = glob.glob('./data/images_sample/6812098/**.jpg' ) # just comparing two folders for demo images += glob.glob('./data/images_sample/6812035/**.jpg') for im in range(100): im1 = random.choice(images) im2 = random.choice(images) h1 = imagehash.dhash(Image.open(im1)) h2 = imagehash.dhash(Image.open(im2)) feature = h1 - h2 if feature < 7 and im1 != im2: print(feature, im1, im2) imgx = np.concatenate((Image.open(im1).resize( (400, 400), Image.ANTIALIAS), Image.open(im2).resize( (400, 400), Image.ANTIALIAS)), axis=1) plt.imshow(imgx) plt.axis('off') break # feature engineering start_time = time.time()
def remove_duplicates( images_dir: str, annotations_dir: str = None, duplicates_dir: str = None, ) -> List[str]: """ TODO """ # create the duplicates directory in case it doesn't yet exist if duplicates_dir is not None: os.makedirs(duplicates_dir, exist_ok=True) # build a set of image hashes and a list of IDs that are duplicates _logger.info("Building image hashes and identifying duplicates...") image_hashes = set() duplicate_ids = [] for image_file_name in tqdm(os.listdir(images_dir)): # only process JPG images if not image_file_name.endswith(".jpg"): continue # get a hash of the image and add the image ID to the list of duplicates # if it's already been added, otherwise add it to the set of hashes image = Image.open(os.path.join(images_dir, image_file_name)) image_id = os.path.splitext(image_file_name)[0] image_hash = imagehash.dhash(image) if image_hash in image_hashes: duplicate_ids.append(image_id) else: image_hashes.add(image_hash) _logger.info("Done") # move or remove the duplicates _logger.info("Moving/removing duplicate images...") duplicate_files = [] for duplicate_id in tqdm(duplicate_ids): image_file_name = duplicate_id + ".jpg" image_path = os.path.join(images_dir, image_file_name) duplicate_files.append(image_path) # either move or delete the image file if duplicates_dir is None: os.remove(image_path) else: shutil.move(image_path, os.path.join(duplicates_dir, image_file_name)) _logger.info("Done") # move/remove corresponding annotations, if specified if annotations_dir is not None: _logger.info("Moving/removing corresponding duplicate annotations...") for annotation_file_name in tqdm(os.listdir(annotations_dir)): if os.path.splitext(annotation_file_name)[0] in duplicate_ids: annotation_path = os.path.join(annotations_dir, annotation_file_name) if duplicates_dir is None: os.remove(annotation_path) else: shutil.move( annotation_path, os.path.join(duplicates_dir, annotation_file_name), ) _logger.info("Done") return duplicate_files
def get_image_hash(image): image = Image.fromarray(image, 'RGB') return str(imagehash.dhash(image))
# skapa filen try: file = open("dHash.txt", 'w') fileFel = open("fel2.txt", 'w') nollVarden = open("nollVarden2.txt", 'w') hashVarde = open("hashVarde2.txt", 'w') dubblett = open("dHash_dubbletter.txt", 'w') except IOError: print("Kunde inte skapa filen {} eller fel filen {}".format(file, fileFel)) start = time.time() i = 0 for p in pathTillBilderna: image = Image.open(p) h = str(imagehash.dhash(image)) hashen = imagehash.dhash(image) filename = p[p.rfind("/") + 1:] file.write("{};{}\n".format(p, h)) l = allaBilder.get(hashen, []) l.append(p) allaBilder[hashen] = l # skriver alla identiska kopior till filen. if len(l) > 1: dubblett.write("{};{}\n".format(l, hashen)) else: # vi sätter ett has-värde på alla bilder som inte är dubletter # det kan ju komma in dubletter i framtiden.
def getImageHash(img): io = Image.open(img) hash1 = imagehash.average_hash(io) hash2 = imagehash.phash(io) hash3 = imagehash.dhash(io) return hash1, hash2, hash3
def dhash(imageA, imageB): hashA = imagehash.dhash(imageA) hashB = imagehash.dhash(imageB) return hashA - hashB
def dcm_hash(patient_id): dcm_data = pydicom.read_file(f'{TRAIN_DIR}/{patient_id}.dcm') img = Image.fromarray(dcm_data.pixel_array) return str(imagehash.dhash(img)) + str(imagehash.phash(img))
def dhash(file1, file2): fake_hash1 = imagehash.dhash(ImageOps.grayscale(Image.open(file1))) fake_hash2 = imagehash.dhash(ImageOps.grayscale(Image.open(file2))) print("fake_hash1", fake_hash1) print("fake_hash2", fake_hash2) print("dhash diff text", fake_hash1 - fake_hash2)
# time the hashing operation start = time.time() counter = 0 for f in haystackPaths: image_orig = Image.open(f) image = extract.foregroundExtract(f) # check image = Image.fromarray(image) # check filename = os.path.basename(f) image.save("./imagebooks2/" + filename) # imageHash = imagehash.phash(image) p = imagehash.phash(image, hash_size=32) a = imagehash.average_hash(image, hash_size=32) d = imagehash.dhash(image, hash_size=32) w = imagehash.whash(image, hash_size=32) haystack = haystack.append( { 'file': f, 'phash': p, 'ahash': a, 'dhash': d, 'whash': w }, ignore_index=True) counter += 1 print("Completed", counter, f)
import PIL.ImageOps import imagehash import subprocess import os import hashlib images3 = subprocess.Popen('ls data/6/*.jpg', stdout=subprocess.PIPE, shell=True).communicate()[0].strip().split('\n') #images6 = subprocess.Popen('ls data/6/*.jpg', stdout=subprocess.PIPE, shell=True).communicate()[0].strip().split('\n') hash3 = {} #hash6 = [] for i in images3: im = Image.open(i) h = imagehash.dhash(im) im.close() hash3[i] = int(str(h), 16) v = hash3.values() s = 0.0 for i in v: s += i s = s / len(v) a = v[0] m = abs(v[0] - s) for i in v: if abs(i - s) > m and i != 17632265907812790788: m = abs(i - s)
def hash_image(image, algorithm=None): """ Hashes a given image image: Can be an URL, a path, a base64 encoded string or a PIL.Image.Image instance Erina Project — 2020\n © Anime no Sekai """ result = None has_url = False url = None log("ErinaHash", "Hashing an image...") # Needs to be a PIL instance if isfile(str(image)): image = Image.open(image) elif isinstance(image, Image.Image): image = image else: try: if base64.b64decode(str(image), validate=True): image = Image.open(BytesIO(base64.b64decode(str(image)))) else: raise ValueError("b64decode returned an empty string") except: try: url = image image = Image.open( BytesIO(requests.get(str(image)).content) ) # Open the downloaded image as a PIL Image instance has_url = True except: return HashingError( "INVALID_IMAGE_TYPE", "We couldn't convert the given image to a PIL.Image.Image instance" ) if algorithm is None: algorithm = str(config.Hash.algorithm) algorithm = str(algorithm).lower().replace(" ", "") if algorithm in ['ahash', 'a', 'averagehash', 'average']: result = imagehash.average_hash(image) elif algorithm in ['chash', 'c']: result = imagehash.colorhash(image) elif algorithm in ['dhash', 'd']: result = imagehash.dhash(image) elif algorithm in ['phash', 'p', 'perceptual', 'perceptualhash']: result = imagehash.phash(image) elif algorithm in ['wHash', 'w']: result = imagehash.whash(image) else: algorithm = algorithm.replace("_", "") if algorithm in [ 'dhashvertical', 'dvertical', 'dvert', 'verticald', 'verticaldhash' ]: result = imagehash.dhash_vertical(image) elif algorithm in [ 'phashsimple', 'psimple', 'perceptualsimple', 'simpleperceptual', 'simplep', 'simplephash', 'simpleperceptualhas' ]: result = imagehash.phash_simple(image) else: return HashingError( "INVALID_ALGORITHM", "We couldn't determine the hashing algorithm you wanted to use." ) if has_url: return HashObject(result, image, url) else: return HashObject(result, image)
def validate(self, data): # This is abnormally long since we're not verifying actual user input # we're analyzing the image or url provided and then having to do # the validation on the populated parameters # Please note this is run after all other field validators # http://stackoverflow.com/questions/27591574/ # order-of-serializer-validation-in-django-rest-framework request = self.context.get('request') if request is not None: object_uuid = request.query_params.get('random', None) editor = request.query_params.get('editor', 'false').lower() else: object_uuid = None editor = 'false' verify_unique = self.context.get('verify_unique', False) check_hamming = self.context.get('check_hamming', False) file_object = data.get('file_object') if object_uuid is not None: serializers.UUIDField().run_validators(object_uuid) query = 'MATCH (a:SBObject {object_uuid: "%s"}) ' \ 'RETURN a' % object_uuid res, _ = db.cypher_query(query) if res.one: raise ValidationError("ID must be unique.") data['object_uuid'] = object_uuid if file_object is None: # For cropping unless we want to move the processing into the # validator file_object = self.context.get('file_object', None) folder = self.context.get('folder', settings.AWS_PROFILE_PICTURE_FOLDER_NAME) url = data.get('url') if file_object and url: raise ValidationError("Cannot process both a URL and a " "File at the same time") try: file_size, file_format, file_object = get_file_info( file_object, url) except (ValueError, urllib2.HTTPError, urllib2.URLError): raise ValidationError("Invalid URL") image_uuid = str(uuid1()) try: data['width'], data['height'], file_name, image = get_image_data( image_uuid, file_object) except IOError: raise ValidationError("You've uploaded an invalid file type. " "Valid types are jpeg, jpg, and png") if self.context.get('file_name', None) is not None: file_name = self.context.get('file_name') if editor != 'true': if data['width'] < 100: raise ValidationError("Must be at least 100 pixels wide") if data['height'] < 100: raise ValidationError("Must be at least 100 pixels tall") if file_size > settings.ALLOWED_IMAGE_SIZE: raise ValidationError( "Your file cannot be larger than 20mb. Please select " "a smaller file.") if file_format not in settings.ALLOWED_IMAGE_FORMATS: raise serializers.ValidationError( 'You have provided an invalid file type. ' 'The valid file types are gif, jpeg, jpg, and png') data['url'] = check_sagebrew_url(url, folder, file_name, file_object) data['image_hash'] = str(imagehash.dhash(image)) if verify_unique: query = 'MATCH (upload:UploadedObject) ' \ 'WHERE upload.image_hash="%s" ' \ 'RETURN true' % data['image_hash'] res, _ = db.cypher_query(query) if res.one: raise ValidationError("Image must be unique") if check_hamming: verify_hamming_distance(data['image_hash'], check_hamming.get('distance', 11), check_hamming.get('time_frame')) data['file_format'] = file_format data['file_size'] = file_size return data
import sys ''' Requirements: pip intsall pillow pip install ImageHash Hash mode: average hashing (aHash) perception hashing (pHash) difference hashing (dHash) wavelet hashing (wHash) ''' from PIL import Image import imagehash image = sys.argv[1] mode = int(sys.argv[2]) if mode == 1: hash = imagehash.average_hash(Image.open(image)) elif mode == 2: hash = imagehash.phash(Image.open(image)) elif mode == 3: hash = imagehash.dhash(Image.open(image)) elif mode == 4: hash = imagehash.whash(Image.open(image)) else: hash = imagehash.average_hash(Image.open(image)) print(hash)
def query_image(self, image): """ image_hash = hashing_func(processing_func(image)) find most similar images by hamming distance Parameters: -query image Output: -final_matches: paths to the similar images """ matches = [] final_matches = [] hash_differences = [] phash_differences = [] query = np.array(image) query_h = imagehash.dhash(Image.fromarray(query)) query_ph = imagehash.phash(Image.fromarray(query)) # generate hash differences for i in range(len(self.hashes)): diff = query_h - self.hashes[i] hash_differences.append(diff) # use k means to find threshold for similarity cutoff kmeans = KMeans(n_clusters=2).fit( np.array(hash_differences).reshape(-1, 1)) centers = sorted(kmeans.cluster_centers_.flatten()) threshold = np.mean(centers) for i in range(len(hash_differences)): if hash_differences[i] < threshold: matches.append(i) # if no match, output this message if not matches: pass else: # for images that fall within dhash threshold, try nesting phash or Dice, Jaccard, Mutual information for j in matches: #if image.endswith('.dcm'): # ds = pydicom.dcmread(matches[j]) # im = Image.fromarray(ds.pixel_array) ph = self.phashes[j] diff = query_ph - ph phash_differences.append(diff) if diff < 10: final_matches.append({ "name": self.paths[j], "similarity": diff / MAX_DIFF, "response": 1, }) # skip this for now #if not final_matches: # final_matches= matches # final output: paths to the images return final_matches
from PIL import Image import imagehash import argparse import shelve import glob ap = argparse.ArgumentParser() ap.add_argument("-d","--dataset",required=True, help="path to input datasets") ap.add_argument("-s","--shelve",required=True, help="output shelve database") args = vars(ap.parse_args()) db = shelve.open(args["shelve"], writeback=True) for imagePath in glob.glob(args["dataset"]+"/*.jp*g"): print(imagePath) image = Image.open(imagePath) h = str(imagehash.dhash(image)) filename = imagePath[imagePath.rfind("/")+1:] db[h] = db.get(h,[]) + [filename] for key in db.keys(): if len(db[key]) > 1: print(db[key]) db.close()
def process_image(image_id, full_url, thumb_url, source_url): import requests import orm import boto import gcs_oauth2_boto_plugin import tempfile import mimetypes import conf from PIL import Image as pimage from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True import imagehash from hashtest import hash_image session = orm.Session() gcs_oauth2_boto_plugin.SetFallbackClientIdAndSecret( conf.client_id, conf.client_secret) fullbucket = boto.storage_uri(conf.fullbucket, 'gs').get_bucket() thumbbucket = boto.storage_uri(conf.thumbbucket, 'gs').get_bucket() # Fetch images print "%d: Starting" % image_id response = requests.get(source_url, stream=True) if not response.status_code == 200: session.query(orm.Image).filter(orm.Image.id == image_id).update( {'fetched': -1}) session.commit() return fulltemp = tempfile.NamedTemporaryFile() thumbtemp = tempfile.NamedTemporaryFile() for block in response.iter_content(4096): fulltemp.write(block) fulltemp.seek(0) himg = pimage.open(fulltemp) ahash, phash, dhash = imagehash.average_hash(himg), imagehash.phash( himg), imagehash.dhash(himg) ahash, phash, dhash = int(str(ahash), base=16), int(str(phash), base=16), int(str(dhash), base=16) # Save images, make thumb himg.thumbnail((640, 640)) himg.convert("RGB").save(thumbtemp, format='WebP') del himg if ahash >= 2**63: ahash -= 2**64 if phash >= 2**63: phash -= 2**64 if dhash >= 2**63: dhash -= 2**64 # Upload fulltemp.seek(0) thumbtemp.seek(0) fullkey = fullbucket.new_key(full_url.split('/')[-1]) thumbkey = thumbbucket.new_key(thumb_url.split('/')[-1]) meta = { 'Cache-Control': 'public, max-age=3600', 'Content-Type': response.headers['content-type'], } fullkey.set_contents_from_file(fulltemp, headers=meta) print "%d: Uploaded full" % image_id meta['Content-Type'] = 'image/webp' thumbkey.set_contents_from_file(thumbtemp, headers=meta) print "%d: Uploaded thumb" % image_id try: bmbhash = hash_image(fulltemp.name) session.add(orm.Hash(name=u'bmbhash', value=bmbhash, image_id=image_id)) except: pass session.add(orm.Hash(name=u'ahash', value=ahash, image_id=image_id)) session.add(orm.Hash(name=u'phash', value=phash, image_id=image_id)) session.add(orm.Hash(name=u'dhash', value=dhash, image_id=image_id)) session.query(orm.Image).filter(orm.Image.id == image_id).update({ 'fetched': 1, 'size': int(response.headers['content-length']) }) session.commit() fulltemp.close() thumbtemp.close()
def hash_image(image_file: str) -> str: image = Image.open(image_file) image_hash = str(imagehash.dhash(image)) return image_hash
def ImageHash(path): image = Image.open(path) h = str(imagehash.dhash(image)) return h
from PIL import Image,ImageFile import imagehash,os from glob import glob #サイズの大きな画像をスキップしない ImageFile.LOAD_TRUNCATED_IMAGES = True hash_a = imagehash.dhash(Image.open('./a_resize_cut.png')) hash_b = imagehash.dhash(Image.open('./b_resize_cut.png')) print('hash_a=') print(hash_a) print('hash_b=') print(hash_b)
def get_image_hash(self, file): img = Image.open(file) h = str(imagehash.dhash(img)) return h
self.fontType = fontType self.fontFilePath = fontFilePath img = Image.open(fontFilePath) self.aHash = str(imagehash.average_hash(img)) self.dHash = str(imagehash.dhash(img)) self.pHash = str(imagehash.phash(img)) self.wHash = str(imagehash.whash(img)) list = [] for fontType in [x for x in os.listdir(rootPah)]: fontTypeList = [] for item in os.listdir(os.path.join(rootPah, fontType)): img = Image.open(os.path.join(rootPah, fontType, item)) font = { 'fontType': fontType, 'fontFilePath': os.path.join(rootPah, fontType, item), 'aHash': str(imagehash.average_hash(img)), 'dHash': str(imagehash.dhash(img)), 'pHash': str(imagehash.phash(img)), 'wHash': str(imagehash.whash(img)), } fontTypeList.append(font) list.append(fontTypeList) fp = open('obj.json', 'w') json.dump(list, fp) fp.close()
def test_dhash(self): result_hash = imagehash.dhash(self.image) known_hash = "0026273b2b19550e" self.assertEqual(str(result_hash), known_hash)
def dh(self): """ #http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html :return: """ return imagehash.dhash(self.pil)