def find_duplicates(self): """ Find and Delete Duplicates """ fnames = os.listdir(self.dirname) hashes = {} duplicates = [] print("Finding Duplicates Now!\n") for image in fnames: with Image.open(os.path.join(self.dirname,image)) as img: temp_hash = imagehash.colorhash(img, binbits=3) if temp_hash in hashes: print("Duplicate {} \nfound for Image {}!\n".format(image,hashes[temp_hash])) duplicates.append(image) else: hashes[temp_hash] = image if len(duplicates) != 0: a = input("Do you want to delete these {} Images? Press Y or N: ".format(len(duplicates))) space_saved = 0 if(a.strip().lower() == "y"): for duplicate in duplicates: space_saved += os.path.getsize(os.path.join(self.dirname,duplicate)) os.remove(os.path.join(self.dirname,duplicate)) print("{} Deleted Succesfully!".format(duplicate)) print("\n\nYou saved {} mb of Space!".format(round(space_saved/1000000),2)) else: print("Thank you for Using Duplicate Remover") else: print("No Duplicates Found :(")
def run(self) -> None: img_by_hash = dict() for file_name, hashes in self.image_by_hashes.items(): hash_value = hashes[self.hash_algo] # TODO: Monkey patch. https://github.com/JohannesBuchner/imagehash/issues/112 if self.hash_algo == 'colorhash': from PIL import Image hash_value = imagehash.colorhash(Image.open(file_name)) img_by_hash[file_name] = hash_value file_name_by_similars = defaultdict(list) for img_1, img_2 in itertools.product(img_by_hash.items(), repeat=2): if img_1 == img_2: continue file_name_1, hash_img_1 = img_1 file_name_2, hash_img_2 = img_2 score = hash_img_1 - hash_img_2 if score > self.max_score: continue file_name_by_similars[file_name_1].append((file_name_2, score)) # Обратная сортировка по количеству элементов, а названия элементов сортируются по возрастанию items = sorted(file_name_by_similars.items(), key=lambda x: (-len(x[1]), x[0])) for file_name, similars in items: if not similars: continue self.about_found_similars.emit(file_name, similars)
def get_unique_images(images_path: Path, show_matches: bool = False) -> Tuple[int, Dict[Tuple[imagehash.ImageHash, imagehash.ImageHash], List[Path]]]: image_hashes: Dict[Tuple[str, str], List[Path]] = dict() img_count = 0 for test_img in images_path.iterdir(): img_count += 1 img = Image.open(test_img).resize((300,300)) color_hash = imagehash.colorhash(img) average_hash = imagehash.average_hash(img) if len(image_hashes) == 0: image_hashes[(color_hash, average_hash)] = [test_img] for existing_hashes, existing_paths in image_hashes.items(): existing_color_hash, existing_average_hash = existing_hashes color_diff = existing_color_hash - color_hash average_diff = existing_average_hash - average_hash if average_diff == 0: if color_diff == 0: if show_matches: print(f"similar images (average_diff={average_diff} color_diff={color_diff})") imgcat(Image.open(existing_paths[0])) imgcat(img) print() image_hashes[existing_hashes].append(test_img) recorded_paths = list() for paths in image_hashes.values(): recorded_paths.extend(paths) if len(set(recorded_paths)) < img_count: image_hashes[(color_hash, average_hash)] = [test_img] return img_count, image_hashes
def hash_func(self, x): ''''Hash one image and return hash''' x = self.process_for_hash(x) if self.hash_name == "AverageHash": hash_value = imagehash.average_hash(x, hash_size=8, mean=np.mean) elif self.hash_name == "Phash": hash_value = imagehash.phash(x, hash_size=8, highfreq_factor=4) elif self.hash_name == "PhashSimple": hash_value = imagehash.phash_simple(x, hash_size=8, highfreq_factor=4) elif self.hash_name == "DhashH": hash_value = imagehash.dhash(x) elif self.hash_name == "DhashV": hash_value = imagehash.dhash_vertical(x) elif self.hash_name == "Whash": hash_value = imagehash.whash(x, hash_size=8, image_scale=None, mode='haar', remove_max_haar_ll=True) elif self.hash_name == "ColorHash": hash_value = imagehash.colorhash(x, binbits=3) elif self.hash_name == "CropResistantHash": # does not work yet hash_value = imagehash.crop_resistant_hash(x, hash_func=None, limit_segments=None, segment_threshold=128, min_segment_size=500, segmentation_image_size=300 ) else: raise NotImplementedError(f"Hash Name -- {self.hash_name} -- Unknown") return str(hash_value)
def hash_emoji(img_fname): "Return average/color/diff hashes (lengths 64, 49, 64 respectively)" img = Image.open(img_fname) mini_img = img.resize((32, 32)) a = average_hash(mini_img, hash_size=16) c = colorhash(mini_img, binbits=14) # must be 14 to get square array d = dhash(mini_img, hash_size=16) return str(a), str(c), str(d)
def db_add_image(file_name: str) -> bool: image = Image.open(file_name) return db_add(file_name, str(imagehash.average_hash(image)), str(imagehash.phash(image)), str(imagehash.phash_simple(image)), str(imagehash.dhash(image)), str(imagehash.dhash_vertical(image)), str(imagehash.whash(image)), str(imagehash.colorhash(image)))
def hash_image(image: Image, image_url: str) -> str: """ """ hash_tuple = (imagehash.colorhash(image), imagehash.average_hash(image)) name = "" for hash_component in hash_tuple: for char in hash_component.hash.flatten(): if char: name += "0" else: name += "I" return hashlib.md5((image_url + name).encode("utf-8")).hexdigest()
def hash_images(paths, hash_size=8, avg_RGB=True, a_Hash=True, p_Hash=False, d_Hash=True, w_Hash=False, color_Hash=True, register_copies=False): count=0 d=dict() #Key: the images hashes. Value: The image-files that results in this hash for filename in paths: image_file = Image.open(filename) filename_split = filename.split("\\")[-2:] filename_short = filename_split[0] + "\\" + filename_split[1] filename_short = filename.split("\\")[-1] image_array = np.asarray(image_file) #hex=hashlib.md5(image_array).hexdigest() hex=filename_short if hex not in d: d[hex]={"filename":[filename_short]} if avg_RGB: d[hex]["avg_RGB"]=np.mean(image_array,axis=(0,1)) if a_Hash: ahash=imagehash.average_hash(image_file,hash_size=hash_size) d[hex]["a_Hash"]=str(ahash) if p_Hash: phash=imagehash.phash(image_file,hash_size=hash_size) d[hex]["p_Hash"]=str(phash) if d_Hash: dhash=imagehash.dhash(image_file,hash_size=hash_size) d[hex]["d_Hash"]=str(dhash) if w_Hash: whash=imagehash.whash(image_file,hash_size=hash_size) d[hex]["w_Hash"]=str(whash) if color_Hash: colorhash=imagehash.colorhash(image_file,hash_size=hash_size) d[hex]['color_Hash']=str(colorhash) elif register_copies: d[hex]['filename'].extend([filename_short]) if count%100 == 0: print(f"{count+1} of {len(paths)}") count+=1 return d
def clrhash(params: Dict, **data: Dict) -> Dict: ''' Computes the color hash Parameters: - params: - data: image: ndarray; an image Returns: - data: clrhash: string; color hash ''' image = data.get('image') hash = imagehash.colorhash(Image.fromarray(image)) data['clrhash'] = "{}".format(hash) return data
TEST_1 = 'Skyrim+Edition+Collector+ +Hard+Corner+(Benzaie).mp4' TEST_2 = 'Special+BERSERK+-+Hard+Corner+(Benzaie).mp4' FRAMES_DICT = {} FLAGS = {} MIN_PERIOD = 24 METHODS = { 'bytes': lambda x: hash(x.tobytes), 'string': lambda x: hash(str(x.data)), 'average': lambda x: imagehash.average_hash(Image.fromarray(x, 'RGB')), 'perceptual': lambda x: imagehash.phash(Image.fromarray(x, 'RGB')), 'difference': lambda x: imagehash.dhash(Image.fromarray(x, 'RGB')), 'wavelet': lambda x: imagehash.whash(Image.fromarray(x, 'RGB')), 'color': lambda x: imagehash.colorhash(Image.fromarray(x, 'RGB')) } def log(string, boolean): """ print or not """ if boolean: print(string) def update_progress(progress, total): """ progress percentage """
def get_colorhash(image): return imagehash.colorhash(image)
def getImageMixHash(self, image): shape_hash = imagehash.average_hash(image) color_hash = imagehash.colorhash(image) return shape_hash, color_hash
os.chdir(r"C:\Users\james\Documents\OCR") text_files = set(glob.glob("logs/*.txt")) average_hashes = set() color_hashes = set() success_data = [] failure_data = [] for image_file in glob.glob("logs/*.png"): # Skip near-duplicate images. Hash functions and parameters determined # experimentally. image = Image.open(image_file) average_hash = imagehash.average_hash(image, 10) if average_hash in average_hashes: continue average_hashes.add(average_hash) color_hash = imagehash.colorhash(image, 5) if color_hash in color_hashes: continue color_hashes.add(color_hash) text_file = image_file[:-3] + "txt" if not text_file in text_files: continue base_name = os.path.basename(text_file) if base_name.startswith("success"): success_data.append((image_file, text_file)) elif base_name.startswith("failure"): failure_data.append((image_file, text_file)) else: raise AssertionError("Unexpected file name: {}".format(base_name))
def hash_image(image, algorithm=None): """ Hashes a given image image: Can be an URL, a path, a base64 encoded string or a PIL.Image.Image instance Erina Project — 2020\n © Anime no Sekai """ result = None has_url = False url = None log("ErinaHash", "Hashing an image...") # Needs to be a PIL instance if isfile(str(image)): image = Image.open(image) elif isinstance(image, Image.Image): image = image else: try: if base64.b64decode(str(image), validate=True): image = Image.open(BytesIO(base64.b64decode(str(image)))) else: raise ValueError("b64decode returned an empty string") except: try: url = image image = Image.open( BytesIO(requests.get(str(image)).content) ) # Open the downloaded image as a PIL Image instance has_url = True except: return HashingError( "INVALID_IMAGE_TYPE", "We couldn't convert the given image to a PIL.Image.Image instance" ) if algorithm is None: algorithm = str(config.Hash.algorithm) algorithm = str(algorithm).lower().replace(" ", "") if algorithm in ['ahash', 'a', 'averagehash', 'average']: result = imagehash.average_hash(image) elif algorithm in ['chash', 'c']: result = imagehash.colorhash(image) elif algorithm in ['dhash', 'd']: result = imagehash.dhash(image) elif algorithm in ['phash', 'p', 'perceptual', 'perceptualhash']: result = imagehash.phash(image) elif algorithm in ['wHash', 'w']: result = imagehash.whash(image) else: algorithm = algorithm.replace("_", "") if algorithm in [ 'dhashvertical', 'dvertical', 'dvert', 'verticald', 'verticaldhash' ]: result = imagehash.dhash_vertical(image) elif algorithm in [ 'phashsimple', 'psimple', 'perceptualsimple', 'simpleperceptual', 'simplep', 'simplephash', 'simpleperceptualhas' ]: result = imagehash.phash_simple(image) else: return HashingError( "INVALID_ALGORITHM", "We couldn't determine the hashing algorithm you wanted to use." ) if has_url: return HashObject(result, image, url) else: return HashObject(result, image)
def colorhash(imageA, imageB): hashA = imagehash.colorhash(imageA) hashB = imagehash.colorhash(imageB) return hashA - hashB
def hash_img(path): if path.exists(): return (imagehash.colorhash(Image.open(path)), imagehash.average_hash(Image.open(path))) else: return -1
def dehazeImage(img:Union[str, np.ndarray], outputImgFile:Optional[str]= None, a:Optional[np.ndarray]= None, t:Optional[np.ndarray]= None, rt:Optional[np.ndarray]= None, tmin:float= 0.1, ps:int= 15, w:float= 0.99, px:float= 1e-3, r:int= 40, eps:float= 1e-3, verbose:bool= False, report:bool= False, checkSections:bool= False) -> np.ndarray: #pylint:disable= redefined-outer-name """ Dehaze an image Parameters ======================= img: str, np.ndarray A file path or numpy array corresponding to an image outputImgFile: str (default= None) When not none, the file to save the output image to a: np.ndarray (default= None) Atmospheric light array (computed if None) t: np.ndarray (default= None) Transmission array (computed if None) rt: np.ndarray (default= None) Raw transmission array (computed if None) tmin: float (default= 0.1) Minimum transmission allowed ps: int (default= 15) Patch size w: float (default= 0.99) Omega weight px: float (default= 1e-3) Percentage of pixels for the atmospheric light r: int (default= 40) Pixel radius for the guided filter eps: float (default= 1e-3) Epsilon of the guided filter verbose: bool (default= False) report: bool (default= False) If True, returns tuple (img:np.ndarray, stats:list-of-dicts) with stats containing statistics for the image and optionally sections checkSections: bool (default= False) Also run stats on horizontal slices of the image Returns ================================ np.ndarray : dehazed image If report is True, returns (np.ndarray, list) """ startTime = dt.datetime.now() # Image loading saveImage = isinstance(outputImgFile, str) if not saveImage: outputImgFile = None if saveImage and not os.path.exists(os.path.dirname(outputImgFile)): raise ValueError(f"Output directory `{os.path.dirname(outputImgFile)}` does not exist") if isinstance(img, str): # tries to open the input image try: inFilename = os.path.basename(img) img = AImage.open(img) if verbose: print(f"Image `{inFilename}` opened.") except PermissionError: raise PermissionError(f"Permission denied reading `{os.path.abspath(img)}`") except (IOError, FileNotFoundError): raise FileNotFoundError(f"File `{os.path.abspath(img)}`` cannot be found.") elif isinstance(img, np.ndarray): inFilename = None img = AImage.load(img) if verbose: print("Loaded image from ndarray") else: raise TypeError("Invalid image type") # Dehaze the input image oImgO, totalLight = dehaze(img.array(), a, t, rt, tmin, ps, w, px, r, eps, verbose, returnLight= True) # Fix the pixel ranges that are returned from the dehazer, if need be if np.min(oImgO) <= -0.1: # Some images have insane range, eg, -3 oImgR = (oImgO - np.clip(np.min(oImgO), -255, 0)) oImg = oImgR / np.max(oImgR) else: oImg = oImgO.copy() oImg = np.clip(exposure.rescale_intensity(oImg, in_range= (np.min(img.array()), np.max(img.array()))), 0, 255) # Compare to original, if sufficiently dehazed do exposure correction originalHash = phash(Image.fromarray((255 * img.array()).astype(np.uint8))) originalHashC = colorhash(Image.fromarray((255 * img.array()).astype(np.uint8))) newHash = phash(Image.fromarray((255 * oImg).astype(np.uint8))) newHashC = colorhash(Image.fromarray((255 * oImg).astype(np.uint8))) percepHashDiff = abs(newHash - originalHash) colorHashDiff = abs(newHashC - originalHashC) # Check the differences between input and output if verbose: # aerial: 10 # RED: 4 print(inFilename) print("Perceptual hash:", percepHashDiff) print("Color hash:", colorHashDiff) if percepHashDiff >= 20 and colorHashDiff >= 5: if outputImgFile is None: warnings.warn("There may be an issue with the dehazed image") else: warnings.warn(f"There may be an issue with the dehazed image `{outputImgFile}`") # Generate a final exposure-corrected image if percepHashDiff > 1 and (colorHashDiff > 2 or percepHashDiff >= 4 or totalLight >= 2.75): needed = True try: gamma = np.clip(1.1, 1, 1.2) # Brightness gain = np.clip(5.4, 5, 5.7) # Contrast #pylint: disable= unused-variable try: oImg2 = exposure.adjust_gamma(np.clip(oImg, 0, 255), gamma= gamma) except ValueError: oImg2 = np.clip(oImg.copy(), 0, 255) # oImg3 = exposure.adjust_sigmoid(oImg2, gain= gain) oImg3 = (oImg2 * 255).astype(np.uint8) except ValueError as e: print(f"Did not need to dehaze; nonsensical result for hash difference {percepHashDiff} & {colorHashDiff}: {e}") oImg3 = (255 * img.array()).astype(np.uint8) else: needed = False print(f"Dehazing made no or trivial perceptual changes to the data in `{inFilename}` (hash difference {percepHashDiff} & {colorHashDiff})") oImg3 = (255 * img.array()).astype(np.uint8) #save the image to file if saveImage: _ = AImage.save(oImg3, outputImgFile) if verbose: print(f"Image '{outputImgFile}' saved.") otherStats = list() if checkSections: # Review horizontal sections of a photo # The goal of this is for the case where you only # care about haze in a subsection of an image and, # therefore, don't want to manipulate the image # unless haze exists in this "bad" location otherStatsDict = {} h, w = img.array().shape[:2] refImg = (255 * img.array()).astype(np.uint8) sections = { "topQuarter": ((0, h//4), (0, w)), "middleQuarter": ((h//4, h//2), (0, w)), "bottomHalf": ((h//2, h), (0, w)), } for corner, slices in sections.items(): if verbose: print(f"\tDehazing corner {corner}...") if outputImgFile is not None: oParts = outputImgFile.split(".") ext = oParts.pop() oParts.append(f"section_{corner}") oParts.append(ext) quadOut = ".".join(oParts) else: quadOut = None h0, h1 = slices[0] w0, w1 = slices[1] sectionOHash = phash(Image.fromarray(refImg[h0:h1, w0:w1])) sectionNewHash = phash(Image.fromarray(oImg3[h0:h1, w0:w1])) sectionCOHash = colorhash(Image.fromarray(refImg[h0:h1, w0:w1])) sectionCNewHash = colorhash(Image.fromarray(oImg3[h0:h1, w0:w1])) sHashDiff = abs(sectionNewHash - sectionOHash) sHashDiff2 = abs(sectionCNewHash - sectionCOHash) needed = sHashDiff > 1 and (sHashDiff2 > 2 or sHashDiff >= 4) # or totalLight >= 2.75) qs = { "perceptualHashDifference": sHashDiff, "colorHashDifference": sHashDiff2, "totalLight": "", "needed": needed, "needMeasure": { "perceptualBasic": sHashDiff > 1, "perceptualStrong": sHashDiff >= 4, "colorShift": sHashDiff2 > 2, "atmosphericLight": False }, "runTimeSeconds": "-", "style": f"section_{corner}", "topHalfBad": None, "topQuarterBad": None, "wholeImageBad": None, "wholeImageGood": None } otherStatsDict[corner] = qs if quadOut is not None: io.imsave(quadOut, oImg3[h0:h1, w0:w1]) print(f"\twrote subimage `{quadOut}`") if report: stats = { "perceptualHashDifference": percepHashDiff, "colorHashDifference": colorHashDiff, "totalLight": totalLight, "needed": needed, "needMeasure": { "perceptualBasic": percepHashDiff > 1, "perceptualStrong": percepHashDiff >= 4, "colorShift": colorHashDiff > 2, "atmosphericLight": totalLight >= 2.75 }, "runTimeSeconds": np.around((dt.datetime.now() - startTime).total_seconds(), 3), "style": "fullPhoto" } if checkSections: # if there's haze in the bottom half, the whole frame is bad. If there's frame in the next quarter up, the top half is bad. if there's haze in the top quarter, the top quarter is bad. otherwise the whole frame is good. stats["topQuarterBad"] = otherStatsDict["topQuarter"]["needed"] stats["topHalfBad"] = otherStatsDict["middleQuarter"]["needed"] or stats["topQuarterBad"] stats["wholeImageBad"] = otherStatsDict["bottomHalf"]["needed"] or stats["needed"] stats["wholeImageGood"] = not (stats["topQuarterBad"] or stats["topHalfBad"] or stats["wholeImageBad"]) # Aggregate it into a list for _, statSet in otherStatsDict.items(): otherStats += [statSet] return oImg3, [stats] + otherStats return oImg3
def get_hash(image_name): image = get_image(image_name) return imagehash.colorhash(image)
def test_color_hash(self): result_hash = imagehash.colorhash(self.image) known_hash = "07007000000" self.assertEqual(str(result_hash), known_hash)