def checkForDuplicates(image_id): from image_upload_app.models import Image # download the image for the given image_id try: image = Image.objects.get(id=image_id) except ObjectDoesNotExist: print "[WARNING] Image {} does not exist.".format(image_id) return urllib.urlretrieve(image.fileName.url, 'temp_file') # generate phash image_phash = pHash.imagehash('temp_file') # could optionally delete temp_file here, but it's not necessary # search for similar phash's and set Duplicate field accordingly if Image.objects.filter(Hash=image_phash).exists(): image.Duplicate = True else: image.Duplicate = False # store phash and save image.Hash = image_phash image.save() return
def calculate_hash(primary_key): """ Calculates and persists image hash for ImageDupe object referenced by given primary key """ instance = ImageDupe.objects.get(pk=primary_key) if instance and instance.image and instance.image.url and not instance.image_hash: # download image to temporary file temp_image_path = '/tmp/{}.{}'.format(uuid4().hex , instance.image.url.split('.')[-1]) urllib.urlretrieve(instance.image.url, temp_image_path) # set image hash (hex of the perceptual hash long integer, with leading 0x and trailing L stripped off) image_hash = hex(pHash.imagehash(temp_image_path)).split('x')[-1].split('L')[0] instance.image_hash = image_hash # check if other object has this hash query_set = ImageDupe.objects.filter(image_hash=image_hash).exclude(pk=instance.pk) instance.is_duplicate = len(query_set) > 0 # save image instance.save() # clean up delete temporary file os.remove(temp_image_path)
def save(self): super(Commodity, self).save() cd = ColorDescriptor((8, 12, 3)) img = cv2.imread(self.img.path) cf = cd.describe(img) # 提取颜色特征 self.color_features = ','.join(str(i) for i in cf) self.shape_features = pHash.imagehash(self.img.path) # 提取形状特征 super(Commodity, self).save()
def get_phash(content): # the library receives a filepath, so we can't convert directly from memory tmp_filename = '/tmp/capstone_hash_%s' % (random.randint(0, sys.maxint)) f = open(tmp_filename, 'w') f.write(content) f.close() imghash = pHash.imagehash(tmp_filename) os.remove(tmp_filename) return imghash
def calculate(backup_dir, filename): cvec = utils.puzzle.get_cvec_from_file(filename) if not cvec: return None return ImageHash( puzzle_vec=utils.puzzle.compress_cvec(cvec), phash=pHash.imagehash(filename), path=filename )
def hash_photo(path_queue, db_queue): """receive an indexing request from a given queue. it's the main action for the PhotoIndexer thread """ while not stop_threads: path = path_queue.get() try: print "Opening file %s"% path trash, myhash = imagehash(path) db_queue.put({myhash:path}) except Exception, e: print "error %s hashing file %s" % (e, path)
def make_hash(itemPhoto_id): recent_photo = ItemPhoto.objects.get(pk=itemPhoto_id) temp = NamedTemporaryFile(delete=False) temp.write(recent_photo.photo.read()) hash1 = pHash.imagehash(temp.name) # hash1 = 'string1' recent_photo.phash = str(hash1) is_duplicate = ItemPhoto.objects.filter(phash=recent_photo.phash).exists() recent_photo.duplicate = is_duplicate recent_photo.save(elevation=False)
def check_for_duplicates(image_id, image=None): for i in xrange(10): try: image = Image.objects.get(pk=image_id) break except ObjectDoesNotExist: time.sleep(1) if image: urllib.urlretrieve(image.fileName.url, 'img_file') image_hash = pHash.imagehash('img_file') if Image.objects.filter(Hash=image_hash): image.Duplicate = True image.Hash = image_hash image.save(cel_save=True) else: print "Could not retrieve image with this id."
def hash_check_duplicate(image): """Hash the image and check for a duplicate image (one with the same hash)""" image_url = os.path.join(MEDIA_URL, image.fileName.name) with open("image", "wb") as f: r = requests.get(image_url, stream=True) for chunk in r.iter_content(1024): f.write(chunk) new_hash = pHash.imagehash("image") previous_image = Image.objects.filter(p_hash=new_hash) if previous_image: image.duplicate = True image.p_hash = new_hash image.save(needs_hash=False)
def __getitem__(self, frame_number): if type(frame_number) is slice: return [(frame, self[frame]) for frame in range( frame_number.start, frame_number.stop, frame_number.step if frame_number.step else 10 )] if frame_number in self: return self.get(frame_number) video = cv2.VideoCapture(self.video.filename) video.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, frame_number) success, frame = video.read() temporary_image_file = NamedTemporaryFile(suffix='.jpg') cv2.imwrite(temporary_image_file.name, frame) frame_hash = pHash.imagehash(temporary_image_file.name) self[frame_number] = frame_hash return frame_hash
print 'Started: %s' % datetime.now().ctime() if not os.path.exists('gatherer_images'): os.mkdir('gatherer_images') cards = MTGCard.objects.all() for card in cards: # check if we already have a hash try: MTGHash.objects.get(card=card) except: url = 'http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=%s&type=card' \ % card.gatherer_id print 'Downloading Card Image for %s from %s' % (card, url) i = urlopen(url).read() filename = 'gatherer_images/%s.jpg' % card.gatherer_id f = open(filename, 'wb') f.write(i) f.close() h = pHash.imagehash(filename) print 'Got %s for %s' % (h, card) MTGHash.objects.create(card=card, hash=h) # create object in DB else: continue
# logo_hash = pHash.imagehash('video_wp.png') # logo1_hash = pHash.imagehash('logo1.jpg') # logo2_hash = pHash.imagehash('logo2.jpg') # logo3_hash = pHash.imagehash('logo3.jpg') # logo4_hash = pHash.imagehash('logo4.jpg') # # print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo1_hash), logo_hash, logo1_hash) # print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo2_hash), logo_hash, logo2_hash) # print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo3_hash), logo_hash, logo3_hash) # print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo4_hash), logo_hash, logo4_hash) # # # print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo1_hash, logo3_hash), logo1_hash, logo3_hash) # o4n_hash = pHash.imagehash('frame_o4n_03.jpg') wp_hash = pHash.imagehash('frame_wp_003.jpg') wpc_hash = pHash.imagehash('frame_wpc_0003.jpg') wpstar_hash = pHash.imagehash('frame_wpstar_000056.jpg') print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(o4n_hash, wp_hash), o4n_hash, wp_hash) print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(o4n_hash, wpc_hash), o4n_hash, wpc_hash) print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(wp_hash, wpc_hash), wp_hash, wpc_hash) print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(wpstar_hash, wpc_hash), wpstar_hash, wpc_hash)
#!/usr/bin/env python # coding: utf-8 import sys import pHash if __name__ == "__main__": if len(sys.argv) != 3: print "usage: ${prog} src dst" sys.exit(0) d1 = pHash.image_digest(sys.argv[1], 1.0, 1.0, 180) d2 = pHash.image_digest(sys.argv[2], 1.0, 1.0, 180) print 'digest', pHash.crosscorr(d1, d2)[1] h1 = pHash.imagehash(sys.argv[1]) h2 = pHash.imagehash(sys.argv[2]) print 'hash', pHash.hamming_distance(h1, h2)
TEMP_DIR=None SIMILARITY_THRESHOLD=5 class CompressionSimilarityError(Exception): pass def _images_are_similar(filename1,filename2): try: import pHash except FileNotFoundError, fe: logging.info("Could not determine similarity - missing pHash module") return True hash1 = pHash.imagehash(filename1) hash2 = pHash.imagehash(filename2) hd = pHash.hamming_distance(hash1,hash2) logging.info('Hamming distance: %d (%08x / %08x)' % ( hd, hash1, hash2 )) if hd <= SIMILARITY_THRESHOLD: return True return False """ digest1 = pHash.image_digest(filename1, 1.0, 1.0, 180 ) digest2 = pHash.image_digest(filename2, 1.0, 1.0, 180 ) print 'Cross-correelation: %d' % ( pHash.crosscorr( digest1, digest2 ) ) """
def get_perceptive_hash(self, file_path): """ Calculate the perceptive hash for the file. """ image_hash = pHash.imagehash(file_path) return "%x" % (int(image_hash))
latitude = float(str( latitude[0])) + float(str(latitude[1])) / 60 if latRef == 'S': latitude *= -1 longitude = float(str( longitude[0])) + float(str(longitude[1])) / 60 if longRef == 'W': longitude *= -1 else: latitude = 0 longitude = 0 hash1 = pHash.imagehash(temp.name) command = 'blur-detection ' + temp.name output = commands.getoutput(command) p = re.compile('.*density: (\d+\.\d+)') image_quality = float(p.match(output).group(1)) images = conn.get_images(user_id=user['_id'], is_duplicate=False) is_duplicate = False group = 1 is_unique = True for image in images: if pHash.hamming_distance(hash1, long(image['hash'])) < 15: is_unique = False
albums = graph.request("me/albums") compare_photos = [] compare_hashes = [] compare_count = 0 print "Obtaining profile pictures for %s..." % (profile["name"]) for album in albums["data"]: if album["type"] == "profile": albumPhotos = graph.request("%s/photos" % (album["id"])) for albumPhoto in albumPhotos["data"]: image = albumPhoto["images"][-1] photoName = "compared_%s.jpg" % (albumPhoto["id"]) urllib.urlretrieve(albumPhoto["images"][-1]["source"], photoName); compare_photos.append(photoName) compare_hashes.append(pHash.imagehash(photoName)) print "Saved %s" % (photoName) response = graph.request("search",{"q":profile["name"], "fields":"id,name,picture", "type":"user"}) next = response["paging"]["next"].replace("https://graph.facebook.com/v1.0/", "") print "Hunt commencing!" while next: for user in response["data"]: urllib.urlretrieve(user["picture"]["data"]["url"], "compared.jpg"); compared_hash = pHash.imagehash("compared.jpg") compare_count += 1 for compare_hash in compare_hashes: hamming_distance = pHash.hamming_distance( compare_hash, compared_hash ) if hamming_distance < 8: print 'Potential scammer: http://graph.facebook.com/%s Hamming distance: %d (%08x / %08x)' % (user["id"], hamming_distance, compare_hash, compared_hash)
latRef = exif_data['GPS GPSLatitudeRef'].values longRef = exif_data['GPS GPSLongitudeRef'].values latitude = float(str(latitude[0])) + float(str(latitude[1]))/60 if latRef == 'S': latitude *= -1 longitude = float(str(longitude[0])) + float(str(longitude[1]))/60 if longRef == 'W': longitude *= -1 else: latitude = 0 longitude = 0 hash1 = pHash.imagehash(temp.name) command = 'blur-detection ' + temp.name output = commands.getoutput(command) p=re.compile('.*density: (\d+\.\d+)') image_quality = float(p.match(output).group(1)) images = conn.get_images(user_id=user['_id'], is_duplicate=False) is_duplicate = False group = 1 is_unique = True for image in images: if pHash.hamming_distance(hash1, long(image['hash'])) < 15: is_unique = False