def findSimilarImgs(baseImageFile, tarDir, hasCmpedList, step): count = 0 image1 = load_image(baseImageFile) if image1 is None: return try: hash1 = dhash.dhash_int(image1, size=imageSize) except: return for path, d, filelist in os.walk(tarDir): if (not path.endswith('.git') and (not path.startswith(resultDir))): for filename in filelist: if (filename.endswith('jpg') or filename.endswith('png')): count = count + 1 imageName = os.path.join(path, filename) if (imageName not in hasCmpedList): image2 = load_image(imageName) if image2 is not None: try: hash2 = dhash.dhash_int(image2, size=imageSize) except: continue num_bits_different = dhash.get_num_bits_different( hash1, hash2) diff = 100 * num_bits_different / (imageSize * imageSize * 2) if (diff <= limitDiff): hasCmpedList.append(imageName) print(baseImageFile + " is same with " + imageName) movePicToResultDir(step, baseImageFile, imageName, diff)
def hashGif(conn, gifUrl, url): gifHash = '' nframes = 0 try: f = BytesIO( urlopen( Request( str(gifUrl), headers={'User-Agent': user_agent}, ), context=context, ).read(), ) frame = Image.open(f) except: c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', ( str(url), ), ) conn.commit() c.close() gifHash = 'invalid' else: while frame: dhash.dhash_int(frame) gifHash = '{}{} '.format(gifHash, str(dhash.dhash_int(frame))) nframes += 1 try: frame.seek(nframes) except EOFError: break return gifHash
def doComparison(photo1loc, photo2loc): original = cv2.imread(photo1loc) contrast = url_to_image(photo2loc) contrast = cv2.resize(contrast, (original.shape[1], original.shape[0])) original = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY) contrast = cv2.cvtColor(contrast, cv2.COLOR_BGR2GRAY) MSE, SSIM = compareImages(original, contrast) image1 = Image.open(photo1loc) row1, col1 = dhash.dhash_row_col(image1) # print(dhash.format_hex(row1, col1)) newfile = io.BytesIO(urllib.request.urlopen(photo2loc).read()) image2 = Image.open(newfile) row2, col2 = dhash.dhash_row_col(image2) # print(dhash.format_hex(row2, col2)) num_bits_different = dhash.get_num_bits_different(dhash.dhash_int(image1), dhash.dhash_int(image2)) # print(num_bits_different) #faceCompare = face.beginImageRec(photo1loc, photo2loc) faceCompare = 0.3 return (MSE, SSIM, num_bits_different, faceCompare)
def compare_images(imageA, imageB, title): image1 = Image.open(imageA) image2 = Image.open(imageB) imageHashInt = dhash.dhash_int(image1, 8) imageHashInt2 = dhash.dhash_int(image2, 8) res = dhash.get_num_bits_different(imageHashInt, imageHashInt2) finalRes = str(100 - ((res / 128) * 100)) + '%' x = {"Similarity is ": finalRes} y = json.dumps(x) print(y)
def main(arguments): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('infile', help="path to file to compare") #, type=argparse.FileType('r')) parser.add_argument('dbDir', help="path to 'Database' directory") parser.add_argument('--threshold', help="threshold val (default 0.75)", type=float, default=0.75) args = parser.parse_args(arguments) baseDir = os.path.abspath(args.dbDir) # test score = 0.0 match = False # convert test image into numpy array try: imTest = PIL.Image.open(args.infile) except FileNotFoundError: # filename not an image file print("An error occured trying to read the test file. Can't compare") exit() # loop through each image in the Test database for dbImg in os.listdir(args.dbDir): # convert current DB image into numpy array try: imDB = PIL.Image.open(baseDir + "/" + dbImg) except OSError: # filename not an image file accessible by PIL. Ignore, quit this loop, and continue. continue if imTest is not None and imDB is not None: # hash, then compare the two images dh1 = dhash.dhash_int(imTest) dh2 = dhash.dhash_int(imDB) score = 1 - dhash.get_num_bits_different(dh1,dh2)/dh1.bit_length() else: print("comparison error with files %s and %s", imDB, imTest) if score > args.threshold: # alert about the match print("Matching image found in database directory: ", dbImg, " (score: ", str(score), ").") match = True if match: print("Match(es) found. Not adding") else: print("No match found. Adding ", args.infile, " to database directory: ", baseDir) shutil.copy(args.infile, baseDir)
async def save_hashes(self, message: discord.Message): for attachment in message.attachments: if attachment.size > self.config.get("max_size") * 1024: continue extension = attachment.filename.split(".")[-1].lower() if extension not in ("jpg", "jpeg", "png", "webp", "gif"): continue fp = BytesIO() await attachment.save(fp) try: image = Image.open(fp) except OSError: continue h = dhash.dhash_int(image) repo_i.add_image( channel_id=message.channel.id, message_id=message.id, attachment_id=attachment.id, dhash=str(hex(h)), ) yield h
def hashImg(conn, imgUrl, url): imgHash = 'invalid' try: f = BytesIO( urlopen( Request( str(imgUrl), headers={ 'User-Agent': user_agent }, ), context=context, ).read(), ) except: c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', ( str(url), ), ) conn.commit() c.close() else: img = Image.open(f) imgHash = dhash.dhash_int(img) return imgHash
def hashImg(conn, imgUrl, url): imgHash = 'invalid' try: f = BytesIO( urlopen( Request( str(imgUrl), headers={'User-Agent': user_agent}, ), context=context, ).read(), ) img = Image.open(f) imgHash = dhash.dhash_int(img) except HTTPError: c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(url), ), ) conn.commit() c.close() except: f = open('dedLink.txt', 'a') f.write('{}\n{}\n'.format(str(traceback.format_exc()), url)) c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(url), ), ) conn.commit() c.close() return imgHash
def hash_vid(conn, vid_url, url): vid_hash = '' try: container = av.open(vid_url['reddit_video']['fallback_url']) for frame in container.decode(video=0): vid_hash = '{}{} '.format(vid_hash, str(dhash.dhash_int(frame.to_image()))) except Exception as e: if '403' in str(e): c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(url), ), ) conn.commit() c.close() else: f = open('dedLink.txt', 'a') f.write('{}\n{}\n'.format(str(traceback.format_exc()), vid_url)) c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(url), ), ) conn.commit() c.close() vid_hash = 'invalid' return vid_hash
def find_image_label(image_path, char_dict_hash): tg_image = Image.open(image_path) image_code = dhash.dhash_int(tg_image) if image_code in char_dict_hash.keys(): image_label = char_dict_hash[image_code] else: image_label = 'not find this character' return image_label
def d_hash_compute(file): """ Compute dhash with a file name, size is 16 :param file: string filename :return: string dhash hex """ image = Image.open(file) return dhash.dhash_int(image, size=32)
def get_dhash(filename): # given a filename, return the dhash of the image with Image.open(filename) as img: # adjust size for senstivity. greater size==more senstivity # results of testing for dups on my collection: # 215 detected @ s=8; 160@16; 160@32; img_dhash = dhash.dhash_int(img, size=16) return (img_dhash)
def url_to_img_hash(url): try: image = url_to_image(url) image_hash = dhash.dhash_int(image) except Exception as e: print(e) image_hash = None return image_hash
def get_existing_dhashes(self, img_dir): """ Get a list of existing dhashes from the images in that directory """ dhashes = [] for img_path in Path(img_dir).glob("*.png"): img = cv2.imread(str(img_path)) hash = dhash.dhash_int(Image.fromarray(img), HASH_SIZE) dhashes.append(hash) return dhashes
def record(self, new_image): """ This will decide whether or not to record the image, and then save it if it's novel enough""" hash = dhash.dhash_int(Image.fromarray(new_image), HASH_SIZE) if hash not in self.existing_dhashes: self.existing_dhashes.append(hash) write_to = self.output_dir / (str(hash) + ".png") print("Writing image to ", write_to) cv2.imwrite(str(write_to), new_image)
def get_image_info(path): with Image(filename=path) as image: bits = dhash.dhash_int(image) file_size_in_mb = os.path.getsize(path) / (1024 * 1024) dims = (image.width, image.height) image_info = ImageInfo(path=path, dhash=bits, file_size_in_mb=file_size_in_mb, dims=dims) return image_info
def hashGif(conn, gifUrl, url): gifHash = '' nframes = 0 try: f = BytesIO( urlopen( Request( str(gifUrl), headers={'User-Agent': user_agent}, ), context=context, ).read(), ) frame = Image.open(f) while frame: dhash.dhash_int(frame) gifHash = '{}{} '.format(gifHash, str(dhash.dhash_int(frame))) nframes += 1 try: frame.seek(nframes) except EOFError: break except HTTPError: c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(url), ), ) conn.commit() c.close() except: f = open('dedLink.txt', 'a') f.write('{}\n{}\n'.format(str(traceback.format_exc()), url)) c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(url), ), ) conn.commit() c.close() gifHash = 'invalid' return gifHash
def judge_image_similarity(image_path, chr_name, hash_list, bk_tree, diff_threshold): tg_image = Image.open(image_path) image_code = dhash.dhash_int(tg_image) similar_names_rank = [] find_result = bk_tree.find(image_code, 30) for diff, chr_code in find_result: if diff < diff_threshold: idx = hash_list.index(chr_code) similar_names_rank.append(chr_name[idx] + '_diff: ' + str(diff)) # print('the similarity rank from high to low is:{}'.format(similar_names_rank)) return similar_names_rank
def url_to_img_hash(url: str) -> int: """Converts a url to an image hash Arguments: url {str} -- url to image Returns: int -- hash of image """ image = url_to_image(url) image_hash = dhash.dhash_int(image) return image_hash
def get_image_infos(paths): image_infos = [] for path in paths: with Image(filename=path) as image: bits = dhash.dhash_int(image) file_size_in_mb = os.path.getsize(path) / (1024 * 1024) dims = (image.width, image.height) image_infos.append( ImageInfo(path=path, dhash=bits, file_size_in_mb=file_size_in_mb, dims=dims)) return image_infos
def build_dict_hash(dict_path): char_dict_hash = {} for f in get_all_files(dict_path): f_path = f if f_path[-3:] == 'png': chr_image = Image.open(f_path) chr_image = chr_image.convert('L') represent_hash = dhash.dhash_int(chr_image) if not represent_hash: continue chr_name = f_path.split('/')[-1][:-4] if represent_hash not in char_dict_hash.keys(): char_dict_hash[represent_hash] = chr_name return char_dict_hash
def sortPicsBySimilarity(allPicsDir, imageSize, threshold, sortedPicsDir, similarPicsDir): allPicsDirLen = len(allPicsDir) for path, d, filelist in os.walk(allPicsDir): L = [] for filename in filelist: fileNameWithPath = os.path.join(path, filename) image = load_image(fileNameWithPath) if image is not None: try: hash = dhash.dhash_int(image, size=imageSize) except: continue L.append((hash, filename)) sortedPath = os.path.join(sortedPicsDir, path[allPicsDirLen + 1:]) similarPath = os.path.join(similarPicsDir, path[allPicsDirLen + 1:]) if not os.path.exists(sortedPath): os.mkdir(sortedPath) if not os.path.exists(similarPath): os.mkdir(similarPath) S = sorted(L, key=lambda l: l[0]) count = 0 for item in S: origFile = os.path.join(path, item[1]) if count > 0: lastItem = S[count - 1] hash1 = lastItem[0] hash2 = item[0] num_bits_different = dhash.get_num_bits_different(hash1, hash2) diff = 100 * num_bits_different / (imageSize * imageSize * 2) newFileName = str(count) + "_" + str(diff) + "%_" + item[1] if diff <= threshold: shutil.copy(origFile, os.path.join(similarPath, newFileName)) if count == 1: lastNewFileName = str(count - 1) + "_" + lastItem[1] else: lastNewFileName = str(count - 1) + "_" + str( lastDiff) + "%_" + lastItem[1] lastNewFilePath = os.path.join(similarPath, lastNewFileName) if not os.path.exists(lastNewFilePath): shutil.copy(os.path.join(path, lastItem[1]), lastNewFilePath) lastDiff = diff else: newFileName = str(count) + "_" + item[1] shutil.copy(origFile, os.path.join(sortedPath, newFileName)) count = count + 1
def prepare_test_data(input_folder, output_file): hdf5_file = h5py.File(output_file, "w") print('Counting files and parsing meta data...') dir = input_folder for root, dirs, files in os.walk(dir): pids = files images = [*map(lambda x: dir + '/' + x, files)] train_shape = (len(images), 224, 224, 3) hdf5_file.create_dataset("images_test", train_shape, np.int8) hdf5_file.create_dataset("pids_test", [len(pids)], dtype=h5py.special_dtype(vlen=str)) hdf5_file['pids_test'][...] = pids hdf5_file.create_dataset("pixels_test", [len(pids)], dtype=np.int64) hdf5_file.create_dataset('Hash_test', [len(pids)], dtype=h5py.special_dtype(vlen=str)) # loop over train addresses for i, addr in enumerate(images): # print how many images are saved every 1000 images if i % 1000 == 0 and i > 1: print('Train data: {}/{}'.format(i, len(images))) # read an image and resize to (224, 224) # cv2 load images as BGR, convert it to RGB img = cv2.imread(addr) Hash_image = Image.open(addr) Hash_image = Hash_image.convert('L').resize((9, 9), Image.ANTIALIAS) Hash_valu = dhash.dhash_int(Hash_image) hdf5_file['Hash_test'][i, ...] = Hash_valu try: image_size = img.size img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) hdf5_file["images_test"][i, ...] = img hdf5_file['pixels_test'][i, ...] = image_size except: print(addr) os.remove(addr) hdf5_file.close() print('finished')
def run(self): self.blank_count = 0 self.saved_count = 0 self.total_count = 0 while self.blank_count < self.max_blank \ and self.saved_count < self.max_save: # Open the camera, get a frame, then close the stream cap = cv2.VideoCapture(self.url, cv2.CAP_GSTREAMER) _, frame = cap.read() cap.release() # Check to make sure a frame was actually received if frame is None: print("Worker: Stopped receiving frames. Received: ", self.total_count, "Saved: ", self.saved_count) self.running = False return self.total_count += 1 with detector_lock: preds = self.detector.predict([frame])[0] if any([pred.name in self.class_names for pred in preds]): d_hash = dhash.dhash_int(Image.fromarray(frame), self.DHASH_SIZE) filename = self.filename_prefix + "_" + \ str(d_hash) + ".jpg" save_path = Path(self.output_dir) / filename if save_path.exists(): print("Worker: Tried to save image with same path!") self.blank_count += 1 continue cv2.imwrite(str(save_path), frame) print("Worker: Saving!", filename) self.saved_count += 1 self.blank_count = 0 continue self.blank_count += 1 print("Worker: Reached maximum frames. " " Received", self.total_count, "Saved: ", self.saved_count) self.running = False
def build_dict_tree(dict_path): hash_list = [] chr_name = [] bk_tree = None for f in get_all_files(dict_path): f_path = f if f_path[-3:] == 'png': chr_image = Image.open(f_path) chr_image = chr_image.convert('L') represent_hash = dhash.dhash_int(chr_image) if not represent_hash: continue hash_list.append(represent_hash) chr_name.append(f_path.split('/')[-1][:-4]) bk_tree = pybktree.BKTree(pybktree.hamming_distance, hash_list) return chr_name, hash_list, bk_tree
async def saveMessageHashes(self, message: disnake.Message): for f in message.attachments: fp = BytesIO() await f.save(fp) try: image = Image.open(fp) except OSError: # not an image continue img_hash = dhash.dhash_int(image) repo_i.add_image( channel_id=message.channel.id, message_id=message.id, attachment_id=f.id, dhash=str(hex(img_hash)), ) yield img_hash
def is_image_duplicate(self, message): """ Detects if the image was already posted in the chat""" res = [] search_dist = 1 urls = filter(lambda x: 'png' in x or 'jpg' in x, message.get_urls()) for link in urls: try: response = requests.get(link) bytes = BytesIO(response.content) image = Image.open(bytes) img_hash = dhash.dhash_int(image) # dont really care what link from the message is repost res = self.images.find(img_hash, search_dist) self.images.add(img_hash) self.redis_connection.set_images_tree(self.images) except Exception as e: print(e) return len(res) > 0
def hashVid(conn, vidUrl, url): vidHash = '' try: container = av.open(vidUrl['reddit_video']['fallback_url']) except: c = conn.cursor() c.execute( 'DELETE FROM Posts WHERE Url = ?;', ( str(url), ), ) conn.commit() c.close() vidHash = 'invalid' else: for frame in container.decode(video=0): vidHash = '{}{} '.format(vidHash, str(dhash.dhash_int(frame.to_image()))) return vidHash
async def saveMessageHashes(self, message: discord.Message): for f in message.attachments: # FIXME Can we check that the file is image before downloading it? fp = BytesIO() await f.save(fp) try: i = Image.open(fp) except OSError: # not an image continue h = dhash.dhash_int(i) # fmt: off repo_i.add_image( channel_id=message.channel.id, message_id=message.id, attachment_id=f.id, dhash=str(hex(h)), ) # fmt: on yield h
async def saveMessageHashes(self, message: discord.Message): for f in message.attachments: if f.size > self.config.get("max_size") * 1024: continue fp = BytesIO() await f.save(fp) try: i = Image.open(fp) except OSError: # not an image continue h = dhash.dhash_int(i) # fmt: off repo_i.add_image( channel_id=message.channel.id, message_id=message.id, attachment_id=f.id, dhash=str(hex(h)), ) # fmt: on yield h
def is_same_image(img1, img2): current_hash = dhash.dhash_int(img1) old_hash = dhash.dhash_int(img2) return dhash.get_num_bits_different(current_hash, old_hash) == 0