def _worker(self):
        '''
        This is the worker which will get the image from 'inbox',
        calculate the hash and puts the result in 'outbox'
        '''

        while not self.shutdown.isSet():
            
            try:
                image_path = self.inbox.get_nowait()
            except Empty:
                print 'no data found. isset: ' , self.done.isSet()
                if not self.done.isSet():
                    with self.empty:
                        self.empty.wait()
                        continue
                else:
                    break

            if not os.path.exists(image_path):
                self.error((image_path, 'Image Does not Exist'))
                
            try:
                print '[%s] Processing %s' % (current_thread().ident, image_path)
                image_hash = average_hash(image_path)
                self.outbox.put((image_hash, image_path))
            except IOError as err:
                print 'ERROR: Got %s for image : %s' % (image_path, err)
        print 'Worker %s has done processing.' % current_thread().ident
Exemplo n.º 2
0
    def _worker(self):
        '''
        This is the worker which will get the image from 'inbox',
        calculate the hash and puts the result in 'outbox'
        '''

        while not self.shutdown.isSet():

            try:
                image_path = self.inbox.get_nowait()
            except Empty:
                print('no data found. isset: ', self.done.isSet())
                if not self.done.isSet():
                    with self.empty:
                        self.empty.wait()
                        continue
                else:
                    break

            if not os.path.exists(image_path):
                self.error.put((image_path, 'Image Does not Exist'))

            try:
                print('[%s] Processing %s' %
                      (current_thread().ident, image_path))
                image_hash = average_hash(image_path)
                info = ImageInfo(image_hash, image_path)
                print(info)
                self.outbox.put(info)
                print("Qsize: ", self.outbox.qsize())
            except IOError as err:
                print('ERROR: Got %s for image : %s' % (image_path, err))
            finally:
                if self.progress_callback:
                    self.progress_callback()

        print('Worker %s has done processing.' % current_thread().ident)
Exemplo n.º 3
0
 def test_average_hash(self):
     """average_hash should output the expected hash for each test image"""
     for photo in self.photos:
         self.assertEqual(photo['average_hash'], average_hash(photo['path']))
Exemplo n.º 4
0
 def test_average_hash(self):
     """average_hash should output the expected hash for each test image"""
     for photo in self.photos:
         self.assertEqual(photo['average_hash'],
                          average_hash(photo['path']))
        '''
            Function given the directory path and extension, it'll
            reutrn the generator to iterator the list of files found
        '''
        if filter_exts and not isinstance(filter_exts, list):
            raise TypeError('filter_exts should be a list of extensions')
 
        for dirpath, subdirs, filenames in os.walk(src_path):
            for fn in filenames:
                if filter_exts:
                    if fn[ fn.rfind(ext_delimiter) + 1:] in filter_exts:
                        yield os.path.join(dirpath, fn)
                else:
                    yield os.path.join(dirpath, fn)

#lets find all the images
files = FileFinder.ifind('/vagrant/my_pictures/blog/', ['jpg', 'jpeg', 'JPG'])
  
#dictionary to store image hash as key, and all similar images in a list as value
dupe_images = defaultdict(list)
 
#lets iterate through each image and genrate a dict of image-hash with similar images
for filename in files:
    image_hash = average_hash(filename)   
    dupe_images[image_hash].append(filename)
 
#at this point we have all the list of similar images
#here is how we can print the number of copies of same image we have
for image_hash, images in dupe_images.iteritems():
    print 'Image Hash: {0} Image Copies: {1} Image Files: {2}'.format(image_hash, len(images), ','.join(images))