def find_duplicates(self):
     """
     Find and Delete Duplicates
     """
     
     fnames = os.listdir(self.dirname)
     hashes = {}
     duplicates = []
     print("Finding Duplicates Now!\n")
     for image in fnames:
         with Image.open(os.path.join(self.dirname,image)) as img:
             temp_hash = imagehash.colorhash(img, binbits=3)
             if temp_hash in hashes:
                 print("Duplicate {} \nfound for Image {}!\n".format(image,hashes[temp_hash]))
                 duplicates.append(image)
             else:
                 hashes[temp_hash] = image
                
     if len(duplicates) != 0:
         a = input("Do you want to delete these {} Images? Press Y or N:  ".format(len(duplicates)))
         space_saved = 0
         if(a.strip().lower() == "y"):
             for duplicate in duplicates:
                 space_saved += os.path.getsize(os.path.join(self.dirname,duplicate))
                 
                 os.remove(os.path.join(self.dirname,duplicate))
                 print("{} Deleted Succesfully!".format(duplicate))
 
             print("\n\nYou saved {} mb of Space!".format(round(space_saved/1000000),2))
         else:
             print("Thank you for Using Duplicate Remover")
     else:
         print("No Duplicates Found :(")
    def run(self) -> None:
        img_by_hash = dict()
        for file_name, hashes in self.image_by_hashes.items():
            hash_value = hashes[self.hash_algo]

            # TODO: Monkey patch. https://github.com/JohannesBuchner/imagehash/issues/112
            if self.hash_algo == 'colorhash':
                from PIL import Image
                hash_value = imagehash.colorhash(Image.open(file_name))

            img_by_hash[file_name] = hash_value

        file_name_by_similars = defaultdict(list)
        for img_1, img_2 in itertools.product(img_by_hash.items(), repeat=2):
            if img_1 == img_2:
                continue

            file_name_1, hash_img_1 = img_1
            file_name_2, hash_img_2 = img_2

            score = hash_img_1 - hash_img_2
            if score > self.max_score:
                continue

            file_name_by_similars[file_name_1].append((file_name_2, score))

        # Обратная сортировка по количеству элементов, а названия элементов сортируются по возрастанию
        items = sorted(file_name_by_similars.items(),
                       key=lambda x: (-len(x[1]), x[0]))
        for file_name, similars in items:
            if not similars:
                continue

            self.about_found_similars.emit(file_name, similars)
示例#3
0
def get_unique_images(images_path: Path, show_matches: bool = False) -> Tuple[int, Dict[Tuple[imagehash.ImageHash, imagehash.ImageHash], List[Path]]]:
    image_hashes: Dict[Tuple[str, str], List[Path]] = dict()
    img_count = 0
    for test_img in images_path.iterdir():
        img_count += 1
        img = Image.open(test_img).resize((300,300))
        color_hash = imagehash.colorhash(img)
        average_hash = imagehash.average_hash(img)

        
        if len(image_hashes) == 0:
            image_hashes[(color_hash, average_hash)] = [test_img]

        for existing_hashes, existing_paths in image_hashes.items():
            existing_color_hash, existing_average_hash = existing_hashes
            color_diff = existing_color_hash - color_hash
            average_diff = existing_average_hash - average_hash
            if average_diff == 0:
                if color_diff == 0:
                    if show_matches:
                        print(f"similar images (average_diff={average_diff} color_diff={color_diff})")
                        imgcat(Image.open(existing_paths[0]))
                        imgcat(img)
                        print()
                    image_hashes[existing_hashes].append(test_img)
            
        recorded_paths = list()
        for paths in image_hashes.values():
            recorded_paths.extend(paths)
        if len(set(recorded_paths)) < img_count:
            image_hashes[(color_hash, average_hash)] = [test_img]

    return img_count, image_hashes
示例#4
0
    def hash_func(self, x):
        ''''Hash one image and return hash'''

        x = self.process_for_hash(x)

        if self.hash_name == "AverageHash":
            hash_value = imagehash.average_hash(x, hash_size=8, mean=np.mean)
        elif self.hash_name == "Phash":
            hash_value = imagehash.phash(x, hash_size=8, highfreq_factor=4)
        elif self.hash_name == "PhashSimple":
            hash_value = imagehash.phash_simple(x, hash_size=8, highfreq_factor=4)
        elif self.hash_name == "DhashH":
            hash_value = imagehash.dhash(x)
        elif self.hash_name == "DhashV":
            hash_value = imagehash.dhash_vertical(x)
        elif self.hash_name == "Whash":
            hash_value = imagehash.whash(x,
                                         hash_size=8,
                                         image_scale=None,
                                         mode='haar',
                                         remove_max_haar_ll=True)
        elif self.hash_name == "ColorHash":
            hash_value = imagehash.colorhash(x, binbits=3)
        elif self.hash_name == "CropResistantHash": # does not work yet
            hash_value = imagehash.crop_resistant_hash(x,
                                                       hash_func=None,
                                                       limit_segments=None,
                                                       segment_threshold=128,
                                                       min_segment_size=500,
                                                       segmentation_image_size=300
                                                       )
        else:
            raise NotImplementedError(f"Hash Name -- {self.hash_name} -- Unknown")

        return str(hash_value)
示例#5
0
def hash_emoji(img_fname):
    "Return average/color/diff hashes (lengths 64, 49, 64 respectively)"
    img = Image.open(img_fname)
    mini_img = img.resize((32, 32))
    a = average_hash(mini_img, hash_size=16)
    c = colorhash(mini_img, binbits=14)  # must be 14 to get square array
    d = dhash(mini_img, hash_size=16)
    return str(a), str(c), str(d)
示例#6
0
def db_add_image(file_name: str) -> bool:
    image = Image.open(file_name)
    return db_add(file_name, str(imagehash.average_hash(image)),
                  str(imagehash.phash(image)),
                  str(imagehash.phash_simple(image)),
                  str(imagehash.dhash(image)),
                  str(imagehash.dhash_vertical(image)),
                  str(imagehash.whash(image)), str(imagehash.colorhash(image)))
示例#7
0
def hash_image(image: Image, image_url: str) -> str:
    """
    """
    hash_tuple = (imagehash.colorhash(image), imagehash.average_hash(image))
    name = ""
    for hash_component in hash_tuple:
        for char in hash_component.hash.flatten():
            if char:
                name += "0"
            else:
                name += "I"

    return hashlib.md5((image_url + name).encode("utf-8")).hexdigest()
示例#8
0
def hash_images(paths,
                hash_size=8,
                avg_RGB=True,
                a_Hash=True,
                p_Hash=False,
                d_Hash=True,
                w_Hash=False,
                color_Hash=True,
                register_copies=False):    
    count=0
    d=dict() #Key: the images hashes. Value: The image-files that results in this hash

    for filename in paths:
        image_file = Image.open(filename)
        filename_split = filename.split("\\")[-2:]
        filename_short = filename_split[0] + "\\" + filename_split[1]
        filename_short = filename.split("\\")[-1]
        image_array = np.asarray(image_file)
        
        
        
        
        #hex=hashlib.md5(image_array).hexdigest()
        hex=filename_short
        if hex not in d:
            d[hex]={"filename":[filename_short]}
            if avg_RGB:
                d[hex]["avg_RGB"]=np.mean(image_array,axis=(0,1))
            if a_Hash:
                ahash=imagehash.average_hash(image_file,hash_size=hash_size)
                d[hex]["a_Hash"]=str(ahash)
            if p_Hash:
                phash=imagehash.phash(image_file,hash_size=hash_size)
                d[hex]["p_Hash"]=str(phash)
            if d_Hash:
                dhash=imagehash.dhash(image_file,hash_size=hash_size)
                d[hex]["d_Hash"]=str(dhash)
            if w_Hash:
                whash=imagehash.whash(image_file,hash_size=hash_size)
                d[hex]["w_Hash"]=str(whash)
            if color_Hash:
                colorhash=imagehash.colorhash(image_file,hash_size=hash_size)
                d[hex]['color_Hash']=str(colorhash)
        elif register_copies:
            d[hex]['filename'].extend([filename_short])
        if count%100 == 0:
            print(f"{count+1} of {len(paths)}")
        count+=1
    return d
示例#9
0
def clrhash(params: Dict, **data: Dict) -> Dict:
    '''
  Computes the color hash

  Parameters:
    - params:
    - data: 
      image: ndarray; an image
  Returns:
    - data:
      clrhash: string; color hash
  '''

    image = data.get('image')
    hash = imagehash.colorhash(Image.fromarray(image))
    data['clrhash'] = "{}".format(hash)
    return data
TEST_1 = 'Skyrim+Edition+Collector+ +Hard+Corner+(Benzaie).mp4'
TEST_2 = 'Special+BERSERK+-+Hard+Corner+(Benzaie).mp4'
FRAMES_DICT = {}
FLAGS = {}

MIN_PERIOD = 24

METHODS = {
    'bytes': lambda x: hash(x.tobytes),
    'string': lambda x: hash(str(x.data)),
    'average': lambda x: imagehash.average_hash(Image.fromarray(x, 'RGB')),
    'perceptual': lambda x: imagehash.phash(Image.fromarray(x, 'RGB')),
    'difference': lambda x: imagehash.dhash(Image.fromarray(x, 'RGB')),
    'wavelet': lambda x: imagehash.whash(Image.fromarray(x, 'RGB')),
    'color': lambda x: imagehash.colorhash(Image.fromarray(x, 'RGB'))
}


def log(string, boolean):
    """
    print or not
    """
    if boolean:
        print(string)


def update_progress(progress, total):
    """
    progress percentage
    """
def get_colorhash(image):
    return imagehash.colorhash(image)
示例#12
0
 def getImageMixHash(self, image):
     shape_hash = imagehash.average_hash(image)
     color_hash = imagehash.colorhash(image)
     return shape_hash, color_hash
示例#13
0
os.chdir(r"C:\Users\james\Documents\OCR")
text_files = set(glob.glob("logs/*.txt"))
average_hashes = set()
color_hashes = set()
success_data = []
failure_data = []
for image_file in glob.glob("logs/*.png"):
    # Skip near-duplicate images. Hash functions and parameters determined
    # experimentally.
    image = Image.open(image_file)
    average_hash = imagehash.average_hash(image, 10)
    if average_hash in average_hashes:
        continue
    average_hashes.add(average_hash)
    color_hash = imagehash.colorhash(image, 5)
    if color_hash in color_hashes:
        continue
    color_hashes.add(color_hash)

    text_file = image_file[:-3] + "txt"
    if not text_file in text_files:
        continue
    base_name = os.path.basename(text_file)
    if base_name.startswith("success"):
        success_data.append((image_file, text_file))
    elif base_name.startswith("failure"):
        failure_data.append((image_file, text_file))
    else:
        raise AssertionError("Unexpected file name: {}".format(base_name))
示例#14
0
def hash_image(image, algorithm=None):
    """
    Hashes a given image

    image: Can be an URL, a path, a base64 encoded string or a PIL.Image.Image instance

    Erina Project — 2020\n
    © Anime no Sekai
    """
    result = None
    has_url = False
    url = None

    log("ErinaHash", "Hashing an image...")
    # Needs to be a PIL instance
    if isfile(str(image)):
        image = Image.open(image)
    elif isinstance(image, Image.Image):
        image = image
    else:
        try:
            if base64.b64decode(str(image), validate=True):
                image = Image.open(BytesIO(base64.b64decode(str(image))))
            else:
                raise ValueError("b64decode returned an empty string")
        except:
            try:
                url = image
                image = Image.open(
                    BytesIO(requests.get(str(image)).content)
                )  # Open the downloaded image as a PIL Image instance
                has_url = True
            except:
                return HashingError(
                    "INVALID_IMAGE_TYPE",
                    "We couldn't convert the given image to a PIL.Image.Image instance"
                )

    if algorithm is None:
        algorithm = str(config.Hash.algorithm)

    algorithm = str(algorithm).lower().replace(" ", "")
    if algorithm in ['ahash', 'a', 'averagehash', 'average']:
        result = imagehash.average_hash(image)
    elif algorithm in ['chash', 'c']:
        result = imagehash.colorhash(image)
    elif algorithm in ['dhash', 'd']:
        result = imagehash.dhash(image)
    elif algorithm in ['phash', 'p', 'perceptual', 'perceptualhash']:
        result = imagehash.phash(image)
    elif algorithm in ['wHash', 'w']:
        result = imagehash.whash(image)
    else:
        algorithm = algorithm.replace("_", "")
        if algorithm in [
                'dhashvertical', 'dvertical', 'dvert', 'verticald',
                'verticaldhash'
        ]:
            result = imagehash.dhash_vertical(image)
        elif algorithm in [
                'phashsimple', 'psimple', 'perceptualsimple',
                'simpleperceptual', 'simplep', 'simplephash',
                'simpleperceptualhas'
        ]:
            result = imagehash.phash_simple(image)
        else:
            return HashingError(
                "INVALID_ALGORITHM",
                "We couldn't determine the hashing algorithm you wanted to use."
            )

    if has_url:
        return HashObject(result, image, url)
    else:
        return HashObject(result, image)
示例#15
0
def colorhash(imageA, imageB):
    hashA = imagehash.colorhash(imageA)
    hashB = imagehash.colorhash(imageB)
    return hashA - hashB
示例#16
0
def hash_img(path):
    if path.exists():
        return (imagehash.colorhash(Image.open(path)),
                imagehash.average_hash(Image.open(path)))
    else:
        return -1
示例#17
0
def dehazeImage(img:Union[str, np.ndarray], outputImgFile:Optional[str]= None,  a:Optional[np.ndarray]= None, t:Optional[np.ndarray]= None, rt:Optional[np.ndarray]= None, tmin:float= 0.1, ps:int= 15, w:float= 0.99, px:float= 1e-3, r:int= 40, eps:float= 1e-3, verbose:bool= False, report:bool= False, checkSections:bool= False) -> np.ndarray: #pylint:disable= redefined-outer-name
    """
    Dehaze an image

    Parameters
    =======================

    img: str, np.ndarray
        A file path or numpy array corresponding to an image

    outputImgFile: str (default= None)
        When not none, the file to save the output image to

    a: np.ndarray (default= None)
        Atmospheric light array (computed if None)

    t: np.ndarray (default= None)
        Transmission array (computed if None)

    rt: np.ndarray (default= None)
        Raw transmission array (computed if None)

    tmin: float (default= 0.1)
        Minimum transmission allowed

    ps: int (default= 15)
        Patch size

    w: float (default= 0.99)
        Omega weight

    px: float (default= 1e-3)
        Percentage of pixels for the atmospheric light

    r: int (default= 40)
        Pixel radius for the guided filter

    eps: float (default= 1e-3)
        Epsilon of the guided filter

    verbose: bool (default= False)

    report: bool (default= False)
        If True, returns tuple (img:np.ndarray, stats:list-of-dicts)
        with stats containing statistics for the image and optionally
        sections

    checkSections: bool (default= False)
        Also run stats on horizontal slices of the image

    Returns
    ================================

    np.ndarray : dehazed image

    If report is True, returns (np.ndarray, list)
    """
    startTime = dt.datetime.now()
    # Image loading
    saveImage = isinstance(outputImgFile, str)
    if not saveImage:
        outputImgFile = None
    if saveImage and not os.path.exists(os.path.dirname(outputImgFile)):
        raise ValueError(f"Output directory `{os.path.dirname(outputImgFile)}` does not exist")
    if isinstance(img, str):
        # tries to open the input image
        try:
            inFilename = os.path.basename(img)
            img = AImage.open(img)
            if verbose:
                print(f"Image `{inFilename}` opened.")
        except PermissionError:
            raise PermissionError(f"Permission denied reading `{os.path.abspath(img)}`")
        except (IOError, FileNotFoundError):
            raise FileNotFoundError(f"File `{os.path.abspath(img)}`` cannot be found.")
    elif isinstance(img, np.ndarray):
        inFilename = None
        img = AImage.load(img)
        if verbose:
            print("Loaded image from ndarray")
    else:
        raise TypeError("Invalid image type")
    # Dehaze the input image
    oImgO, totalLight = dehaze(img.array(), a, t, rt, tmin, ps, w, px, r, eps, verbose, returnLight= True)
    # Fix the pixel ranges that are returned from the dehazer, if need be
    if np.min(oImgO) <= -0.1:
        # Some images have insane range, eg, -3
        oImgR = (oImgO - np.clip(np.min(oImgO), -255, 0))
        oImg = oImgR / np.max(oImgR)
    else:
        oImg = oImgO.copy()
    oImg = np.clip(exposure.rescale_intensity(oImg, in_range= (np.min(img.array()), np.max(img.array()))), 0, 255)
    # Compare to original, if sufficiently dehazed do exposure correction
    originalHash = phash(Image.fromarray((255 * img.array()).astype(np.uint8)))
    originalHashC = colorhash(Image.fromarray((255 * img.array()).astype(np.uint8)))
    newHash = phash(Image.fromarray((255 * oImg).astype(np.uint8)))
    newHashC = colorhash(Image.fromarray((255 * oImg).astype(np.uint8)))
    percepHashDiff = abs(newHash - originalHash)
    colorHashDiff = abs(newHashC - originalHashC)
    # Check the differences between input and output
    if verbose:
        # aerial: 10
        # RED: 4
        print(inFilename)
        print("Perceptual hash:", percepHashDiff)
        print("Color hash:", colorHashDiff)
    if percepHashDiff >= 20 and colorHashDiff >= 5:
        if outputImgFile is None:
            warnings.warn("There may be an issue with the dehazed image")
        else:
            warnings.warn(f"There may be an issue with the dehazed image `{outputImgFile}`")
    # Generate a final exposure-corrected image
    if percepHashDiff > 1 and (colorHashDiff > 2 or percepHashDiff >= 4 or totalLight >= 2.75):
        needed = True
        try:
            gamma = np.clip(1.1, 1, 1.2) # Brightness
            gain = np.clip(5.4, 5, 5.7) # Contrast #pylint: disable= unused-variable
            try:
                oImg2 = exposure.adjust_gamma(np.clip(oImg, 0, 255), gamma= gamma)
            except ValueError:
                oImg2 = np.clip(oImg.copy(), 0, 255)
            # oImg3 = exposure.adjust_sigmoid(oImg2, gain= gain)
            oImg3 = (oImg2 * 255).astype(np.uint8)
        except ValueError as e:
            print(f"Did not need to dehaze; nonsensical result for hash difference {percepHashDiff} & {colorHashDiff}: {e}")
            oImg3 = (255 * img.array()).astype(np.uint8)
    else:
        needed = False
        print(f"Dehazing made no or trivial perceptual changes to the data in `{inFilename}` (hash difference {percepHashDiff} & {colorHashDiff})")
        oImg3 = (255 * img.array()).astype(np.uint8)
    #save the image to file
    if saveImage:
        _ = AImage.save(oImg3, outputImgFile)
        if verbose:
            print(f"Image '{outputImgFile}' saved.")
    otherStats = list()
    if checkSections:
        # Review horizontal sections of a photo
        # The goal of this is for the case where you only
        # care about haze in a subsection of an image and,
        # therefore, don't want to manipulate the image
        # unless haze exists in this "bad" location
        otherStatsDict = {}
        h, w = img.array().shape[:2]
        refImg = (255 * img.array()).astype(np.uint8)
        sections = {
            "topQuarter": ((0, h//4), (0, w)),
            "middleQuarter": ((h//4, h//2), (0, w)),
            "bottomHalf": ((h//2, h), (0, w)),
        }
        for corner, slices in sections.items():
            if verbose:
                print(f"\tDehazing corner {corner}...")
            if outputImgFile is not None:
                oParts = outputImgFile.split(".")
                ext = oParts.pop()
                oParts.append(f"section_{corner}")
                oParts.append(ext)
                quadOut = ".".join(oParts)
            else:
                quadOut = None
            h0, h1 = slices[0]
            w0, w1 = slices[1]
            sectionOHash = phash(Image.fromarray(refImg[h0:h1, w0:w1]))
            sectionNewHash = phash(Image.fromarray(oImg3[h0:h1, w0:w1]))
            sectionCOHash = colorhash(Image.fromarray(refImg[h0:h1, w0:w1]))
            sectionCNewHash = colorhash(Image.fromarray(oImg3[h0:h1, w0:w1]))
            sHashDiff = abs(sectionNewHash - sectionOHash)
            sHashDiff2 = abs(sectionCNewHash - sectionCOHash)
            needed = sHashDiff > 1 and (sHashDiff2 > 2 or sHashDiff >= 4) # or totalLight >= 2.75)
            qs = {
                "perceptualHashDifference": sHashDiff,
                "colorHashDifference": sHashDiff2,
                "totalLight": "",
                "needed": needed,
                "needMeasure": {
                    "perceptualBasic": sHashDiff > 1,
                    "perceptualStrong": sHashDiff >= 4,
                    "colorShift": sHashDiff2 > 2,
                    "atmosphericLight": False
                },
                "runTimeSeconds": "-",
                "style": f"section_{corner}",
                "topHalfBad": None,
                "topQuarterBad": None,
                "wholeImageBad": None,
                "wholeImageGood": None
            }
            otherStatsDict[corner] = qs
            if quadOut is not None:
                io.imsave(quadOut, oImg3[h0:h1, w0:w1])
                print(f"\twrote subimage `{quadOut}`")
    if report:
        stats = {
            "perceptualHashDifference": percepHashDiff,
            "colorHashDifference": colorHashDiff,
            "totalLight": totalLight,
            "needed": needed,
            "needMeasure": {
                "perceptualBasic": percepHashDiff > 1,
                "perceptualStrong": percepHashDiff >= 4,
                "colorShift": colorHashDiff > 2,
                "atmosphericLight": totalLight >= 2.75
            },
            "runTimeSeconds": np.around((dt.datetime.now() - startTime).total_seconds(), 3),
            "style": "fullPhoto"
        }
        if checkSections:
            # if there's haze in the bottom half, the whole frame is bad.  If there's frame in the next quarter up, the top half is bad.  if there's haze in the top quarter, the top quarter is bad.  otherwise the whole frame is good.
            stats["topQuarterBad"] = otherStatsDict["topQuarter"]["needed"]
            stats["topHalfBad"] = otherStatsDict["middleQuarter"]["needed"] or stats["topQuarterBad"]
            stats["wholeImageBad"] = otherStatsDict["bottomHalf"]["needed"] or stats["needed"]
            stats["wholeImageGood"] = not (stats["topQuarterBad"] or stats["topHalfBad"] or stats["wholeImageBad"])
            # Aggregate it into a list
            for _, statSet in otherStatsDict.items():
                otherStats += [statSet]
        return oImg3, [stats] + otherStats
    return oImg3
示例#18
0
def get_hash(image_name):
    image = get_image(image_name)
    return imagehash.colorhash(image)
	def test_color_hash(self):
		result_hash = imagehash.colorhash(self.image)
		known_hash = "07007000000"
		self.assertEqual(str(result_hash), known_hash)