Exemplo n.º 1
0
def checkForDuplicates(image_id):
    from image_upload_app.models import Image
    # download the image for the given image_id
    try:
        image = Image.objects.get(id=image_id)
    except ObjectDoesNotExist:
        print "[WARNING] Image {} does not exist.".format(image_id)
        return

    urllib.urlretrieve(image.fileName.url, 'temp_file')

    # generate phash
    image_phash = pHash.imagehash('temp_file')

    # could optionally delete temp_file here, but it's not necessary

    # search for similar phash's and set Duplicate field accordingly
    if Image.objects.filter(Hash=image_phash).exists():
        image.Duplicate = True
    else:
        image.Duplicate = False

    # store phash and save
    image.Hash = image_phash
    image.save()
    return
def calculate_hash(primary_key):
    """
    Calculates and persists image hash for ImageDupe object referenced by given primary key
    
    """
    instance = ImageDupe.objects.get(pk=primary_key)

    if instance and instance.image and instance.image.url and not instance.image_hash:
        # download image to temporary file
        temp_image_path = '/tmp/{}.{}'.format(uuid4().hex , instance.image.url.split('.')[-1])
        urllib.urlretrieve(instance.image.url, temp_image_path)

        # set image hash (hex of the perceptual hash long integer, with leading 0x and trailing L stripped off)
        image_hash = hex(pHash.imagehash(temp_image_path)).split('x')[-1].split('L')[0]
        instance.image_hash = image_hash

        # check if other object has this hash
        query_set = ImageDupe.objects.filter(image_hash=image_hash).exclude(pk=instance.pk)
        instance.is_duplicate = len(query_set) > 0

        # save image
        instance.save()

        # clean up delete temporary file
        os.remove(temp_image_path)
Exemplo n.º 3
0
 def save(self):
     super(Commodity, self).save()
     cd = ColorDescriptor((8, 12, 3))
     img = cv2.imread(self.img.path)
     cf = cd.describe(img)  # 提取颜色特征
     self.color_features = ','.join(str(i) for i in cf)
     self.shape_features = pHash.imagehash(self.img.path)   # 提取形状特征
     super(Commodity, self).save()
def get_phash(content):
    # the library receives a filepath, so we can't convert directly from memory
    tmp_filename = '/tmp/capstone_hash_%s' % (random.randint(0, sys.maxint))
    f = open(tmp_filename, 'w')
    f.write(content)
    f.close()
    imghash = pHash.imagehash(tmp_filename)
    os.remove(tmp_filename)
    return imghash
Exemplo n.º 5
0
def calculate(backup_dir, filename):
    cvec = utils.puzzle.get_cvec_from_file(filename)
    if not cvec:
        return None

    return ImageHash(
        puzzle_vec=utils.puzzle.compress_cvec(cvec),
        phash=pHash.imagehash(filename),
        path=filename
    )
Exemplo n.º 6
0
def hash_photo(path_queue, db_queue):
    """receive an indexing request from a given queue.
	it's the main action for the PhotoIndexer thread
    """
    while not stop_threads:
	path = path_queue.get()
	try:
	    print "Opening file %s"% path
	    trash, myhash = imagehash(path)
	    db_queue.put({myhash:path})
	except Exception, e:
	    print "error %s hashing file %s" % (e, path)
Exemplo n.º 7
0
def make_hash(itemPhoto_id):
    recent_photo = ItemPhoto.objects.get(pk=itemPhoto_id)

    temp = NamedTemporaryFile(delete=False)
    temp.write(recent_photo.photo.read())
    hash1 = pHash.imagehash(temp.name)

    # hash1 = 'string1'

    recent_photo.phash = str(hash1)
    is_duplicate = ItemPhoto.objects.filter(phash=recent_photo.phash).exists()
    recent_photo.duplicate = is_duplicate
    recent_photo.save(elevation=False)
Exemplo n.º 8
0
def check_for_duplicates(image_id, image=None):
    for i in xrange(10):
        try:
            image = Image.objects.get(pk=image_id)
            break
        except ObjectDoesNotExist:
            time.sleep(1)
    if image:
        urllib.urlretrieve(image.fileName.url, 'img_file')
        image_hash = pHash.imagehash('img_file')
        if Image.objects.filter(Hash=image_hash):
            image.Duplicate = True
        image.Hash = image_hash
        image.save(cel_save=True)
    else:
        print "Could not retrieve image with this id."
Exemplo n.º 9
0
def check_for_duplicates(image_id, image=None):
    for i in xrange(10):
        try:
            image = Image.objects.get(pk=image_id)
            break
        except ObjectDoesNotExist:
            time.sleep(1)
    if image:
        urllib.urlretrieve(image.fileName.url, 'img_file')
        image_hash = pHash.imagehash('img_file')
        if Image.objects.filter(Hash=image_hash):
            image.Duplicate = True
        image.Hash = image_hash
        image.save(cel_save=True)
    else:
        print "Could not retrieve image with this id."
Exemplo n.º 10
0
def hash_check_duplicate(image):
    """Hash the image and check for a duplicate image (one with the same hash)"""

    image_url = os.path.join(MEDIA_URL, image.fileName.name)

    with open("image", "wb") as f:
        r = requests.get(image_url, stream=True)
        for chunk in r.iter_content(1024):
            f.write(chunk)

    new_hash = pHash.imagehash("image")

    previous_image = Image.objects.filter(p_hash=new_hash)
    if previous_image:
        image.duplicate = True

    image.p_hash = new_hash
    image.save(needs_hash=False)
Exemplo n.º 11
0
    def __getitem__(self, frame_number):
        if type(frame_number) is slice:
            return [(frame, self[frame]) for frame in range(
                frame_number.start,
                frame_number.stop,
                frame_number.step if frame_number.step else 10
            )]

        if frame_number in self:
            return self.get(frame_number)

        video = cv2.VideoCapture(self.video.filename)
        video.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, frame_number)

        success, frame = video.read()

        temporary_image_file = NamedTemporaryFile(suffix='.jpg')
        cv2.imwrite(temporary_image_file.name, frame)
        frame_hash = pHash.imagehash(temporary_image_file.name)

        self[frame_number] = frame_hash
        return frame_hash
Exemplo n.º 12
0
print 'Started: %s' % datetime.now().ctime()

if not os.path.exists('gatherer_images'):
    os.mkdir('gatherer_images')

cards = MTGCard.objects.all()

for card in cards:
    
    # check if we already have a hash
    try:
        MTGHash.objects.get(card=card)
    except:
        url = 'http://gatherer.wizards.com/Handlers/Image.ashx?multiverseid=%s&type=card' \
                                                             % card.gatherer_id
        print 'Downloading Card Image for %s from %s' % (card, url)
        i = urlopen(url).read()
        
        filename = 'gatherer_images/%s.jpg' % card.gatherer_id
        f = open(filename, 'wb')
        f.write(i)
        f.close()
        
        h = pHash.imagehash(filename)
        print 'Got %s for %s' % (h, card)
        
        MTGHash.objects.create(card=card, hash=h) # create object in DB
    else:
        continue
Exemplo n.º 13
0


# logo_hash = pHash.imagehash('video_wp.png')
# logo1_hash = pHash.imagehash('logo1.jpg')
# logo2_hash = pHash.imagehash('logo2.jpg')
# logo3_hash = pHash.imagehash('logo3.jpg')
# logo4_hash = pHash.imagehash('logo4.jpg')
#
# print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo1_hash), logo_hash, logo1_hash)
# print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo2_hash), logo_hash, logo2_hash)
# print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo3_hash), logo_hash, logo3_hash)
# print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo_hash, logo4_hash), logo_hash, logo4_hash)
#
#
# print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(logo1_hash, logo3_hash), logo1_hash, logo3_hash)
#




o4n_hash = pHash.imagehash('frame_o4n_03.jpg')
wp_hash = pHash.imagehash('frame_wp_003.jpg')
wpc_hash = pHash.imagehash('frame_wpc_0003.jpg')
wpstar_hash = pHash.imagehash('frame_wpstar_000056.jpg')

print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(o4n_hash, wp_hash), o4n_hash, wp_hash)
print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(o4n_hash, wpc_hash), o4n_hash, wpc_hash)
print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(wp_hash, wpc_hash), wp_hash, wpc_hash)
print 'Hamming distance: %d (%08x / %08x)' % (pHash.hamming_distance(wpstar_hash, wpc_hash), wpstar_hash, wpc_hash)
Exemplo n.º 14
0
#!/usr/bin/env python
# coding: utf-8

import sys

import pHash

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "usage: ${prog} src dst"
        sys.exit(0)
    d1 = pHash.image_digest(sys.argv[1], 1.0, 1.0, 180)
    d2 = pHash.image_digest(sys.argv[2], 1.0, 1.0, 180)
    print 'digest', pHash.crosscorr(d1, d2)[1]
    h1 = pHash.imagehash(sys.argv[1])
    h2 = pHash.imagehash(sys.argv[2])
    print 'hash', pHash.hamming_distance(h1, h2)

Exemplo n.º 15
0
TEMP_DIR=None
SIMILARITY_THRESHOLD=5



class CompressionSimilarityError(Exception):
    pass

def _images_are_similar(filename1,filename2):
    try:
        import pHash
    except FileNotFoundError, fe:
        logging.info("Could not determine similarity - missing pHash module") 
        return True
    
    hash1 = pHash.imagehash(filename1)
    hash2 = pHash.imagehash(filename2)
    hd = pHash.hamming_distance(hash1,hash2)
    logging.info('Hamming distance: %d (%08x / %08x)' % ( hd, hash1, hash2 ))

    if hd <= SIMILARITY_THRESHOLD:
        return True

    return False
    
    """
    digest1 = pHash.image_digest(filename1, 1.0, 1.0, 180 )
    digest2 = pHash.image_digest(filename2, 1.0, 1.0, 180 )
    print 'Cross-correelation: %d' % ( pHash.crosscorr( digest1, digest2 ) )
    """
Exemplo n.º 16
0
 def get_perceptive_hash(self, file_path):
     """ Calculate the perceptive hash for the file.
     """
     image_hash = pHash.imagehash(file_path)
     return "%x" % (int(image_hash))
Exemplo n.º 17
0
                latitude = float(str(
                    latitude[0])) + float(str(latitude[1])) / 60
                if latRef == 'S':
                    latitude *= -1

                longitude = float(str(
                    longitude[0])) + float(str(longitude[1])) / 60
                if longRef == 'W':
                    longitude *= -1

            else:
                latitude = 0
                longitude = 0

            hash1 = pHash.imagehash(temp.name)

            command = 'blur-detection ' + temp.name
            output = commands.getoutput(command)
            p = re.compile('.*density: (\d+\.\d+)')

            image_quality = float(p.match(output).group(1))

            images = conn.get_images(user_id=user['_id'], is_duplicate=False)

            is_duplicate = False
            group = 1
            is_unique = True
            for image in images:
                if pHash.hamming_distance(hash1, long(image['hash'])) < 15:
                    is_unique = False
Exemplo n.º 18
0
albums = graph.request("me/albums")

compare_photos = []
compare_hashes = []
compare_count = 0

print "Obtaining profile pictures for %s..." % (profile["name"])
for album in albums["data"]:
	if album["type"] == "profile":
		albumPhotos = graph.request("%s/photos" % (album["id"]))
		for albumPhoto in albumPhotos["data"]:
			image = albumPhoto["images"][-1]
			photoName = "compared_%s.jpg" % (albumPhoto["id"])
			urllib.urlretrieve(albumPhoto["images"][-1]["source"], photoName);
			compare_photos.append(photoName)
			compare_hashes.append(pHash.imagehash(photoName))
			print "Saved %s" % (photoName)

response = graph.request("search",{"q":profile["name"], "fields":"id,name,picture", "type":"user"})
next = response["paging"]["next"].replace("https://graph.facebook.com/v1.0/", "")

print "Hunt commencing!"
while next:
	for user in response["data"]:
		urllib.urlretrieve(user["picture"]["data"]["url"], "compared.jpg");
		compared_hash = pHash.imagehash("compared.jpg")
		compare_count += 1
		for compare_hash in compare_hashes:	
			hamming_distance = pHash.hamming_distance( compare_hash, compared_hash )
			if hamming_distance < 8:
				print 'Potential scammer: http://graph.facebook.com/%s Hamming distance: %d (%08x / %08x)' % (user["id"], hamming_distance, compare_hash, compared_hash)
Exemplo n.º 19
0
                  latRef = exif_data['GPS GPSLatitudeRef'].values
                  longRef = exif_data['GPS GPSLongitudeRef'].values

                  latitude = float(str(latitude[0])) + float(str(latitude[1]))/60
                  if latRef == 'S':
                      latitude *= -1

                  longitude = float(str(longitude[0])) + float(str(longitude[1]))/60
                  if longRef == 'W':
                      longitude *= -1

             else:
                  latitude = 0
                  longitude = 0

             hash1 = pHash.imagehash(temp.name)
	
             command = 'blur-detection ' + temp.name
             output = commands.getoutput(command)
             p=re.compile('.*density: (\d+\.\d+)')

             image_quality = float(p.match(output).group(1))

             images = conn.get_images(user_id=user['_id'], is_duplicate=False)
        
             is_duplicate = False
             group = 1
	     is_unique = True
             for image in images:
                 if pHash.hamming_distance(hash1, long(image['hash'])) < 15:
		    is_unique = False