def _worker(self): ''' This is the worker which will get the image from 'inbox', calculate the hash and puts the result in 'outbox' ''' while not self.shutdown.isSet(): try: image_path = self.inbox.get_nowait() except Empty: print 'no data found. isset: ' , self.done.isSet() if not self.done.isSet(): with self.empty: self.empty.wait() continue else: break if not os.path.exists(image_path): self.error((image_path, 'Image Does not Exist')) try: print '[%s] Processing %s' % (current_thread().ident, image_path) image_hash = average_hash(image_path) self.outbox.put((image_hash, image_path)) except IOError as err: print 'ERROR: Got %s for image : %s' % (image_path, err) print 'Worker %s has done processing.' % current_thread().ident
def _worker(self): ''' This is the worker which will get the image from 'inbox', calculate the hash and puts the result in 'outbox' ''' while not self.shutdown.isSet(): try: image_path = self.inbox.get_nowait() except Empty: print('no data found. isset: ', self.done.isSet()) if not self.done.isSet(): with self.empty: self.empty.wait() continue else: break if not os.path.exists(image_path): self.error.put((image_path, 'Image Does not Exist')) try: print('[%s] Processing %s' % (current_thread().ident, image_path)) image_hash = average_hash(image_path) info = ImageInfo(image_hash, image_path) print(info) self.outbox.put(info) print("Qsize: ", self.outbox.qsize()) except IOError as err: print('ERROR: Got %s for image : %s' % (image_path, err)) finally: if self.progress_callback: self.progress_callback() print('Worker %s has done processing.' % current_thread().ident)
def test_average_hash(self): """average_hash should output the expected hash for each test image""" for photo in self.photos: self.assertEqual(photo['average_hash'], average_hash(photo['path']))
''' Function given the directory path and extension, it'll reutrn the generator to iterator the list of files found ''' if filter_exts and not isinstance(filter_exts, list): raise TypeError('filter_exts should be a list of extensions') for dirpath, subdirs, filenames in os.walk(src_path): for fn in filenames: if filter_exts: if fn[ fn.rfind(ext_delimiter) + 1:] in filter_exts: yield os.path.join(dirpath, fn) else: yield os.path.join(dirpath, fn) #lets find all the images files = FileFinder.ifind('/vagrant/my_pictures/blog/', ['jpg', 'jpeg', 'JPG']) #dictionary to store image hash as key, and all similar images in a list as value dupe_images = defaultdict(list) #lets iterate through each image and genrate a dict of image-hash with similar images for filename in files: image_hash = average_hash(filename) dupe_images[image_hash].append(filename) #at this point we have all the list of similar images #here is how we can print the number of copies of same image we have for image_hash, images in dupe_images.iteritems(): print 'Image Hash: {0} Image Copies: {1} Image Files: {2}'.format(image_hash, len(images), ','.join(images))