async def check_for_duplicates(self, message: discord.Message, settings: Optional[WardenSettings] = None): hashes = [x async for x in self.generate_message_hash(message)] if not hashes: return duplicates = {} posts = self.warden_repo.get_all_channel_attachments( message.channel.id) for img_hash in hashes: hamming_min = 128 duplicate_of = None for post in posts: if post.message_id == str(message.id): continue post_hash = int(post.dhash, 16) hamming = dhash.get_num_bits_different(img_hash, post_hash) if hamming < hamming_min: duplicate_of = post hamming_min = hamming if duplicate_of is not None: duplicates[duplicate_of] = hamming_min for original, hamming_min in duplicates.items(): if hamming_min <= LIMIT_SOFT: await self.__announce_duplicate(message, original, hamming_min, settings) break
def findSimilarImgs(baseImageFile, tarDir, hasCmpedList, step): count = 0 image1 = load_image(baseImageFile) if image1 is None: return try: hash1 = dhash.dhash_int(image1, size=imageSize) except: return for path, d, filelist in os.walk(tarDir): if (not path.endswith('.git') and (not path.startswith(resultDir))): for filename in filelist: if (filename.endswith('jpg') or filename.endswith('png')): count = count + 1 imageName = os.path.join(path, filename) if (imageName not in hasCmpedList): image2 = load_image(imageName) if image2 is not None: try: hash2 = dhash.dhash_int(image2, size=imageSize) except: continue num_bits_different = dhash.get_num_bits_different( hash1, hash2) diff = 100 * num_bits_different / (imageSize * imageSize * 2) if (diff <= limitDiff): hasCmpedList.append(imageName) print(baseImageFile + " is same with " + imageName) movePicToResultDir(step, baseImageFile, imageName, diff)
def find_duplicates(image_info, image_infos): duplicates = [] for i, match_image_info in enumerate(image_infos): if dhash.get_num_bits_different(image_info.dhash, match_image_info.dhash) <= 2: duplicates.append(i) return duplicates
async def checkDuplicate(self, message: disnake.Message): """Check if uploaded files are known""" hashes = [x async for x in self.saveMessageHashes(message)] if len(message.attachments) > 0 and len(hashes) == 0: return duplicates = {} posts = repo_i.getAll() for img_hash in hashes: hamming_min = 128 duplicate = None for post in posts: # skip current message if post.message_id == message.id: continue # do the comparison post_hash = int(post.dhash, 16) hamming = dhash.get_num_bits_different(img_hash, post_hash) if hamming < hamming_min: duplicate = post hamming_min = hamming duplicates[duplicate] = hamming_min for duplicate, hamming_min in duplicates.items(): if hamming_min <= self.limit_soft: await self._announceDuplicate(message, duplicate, hamming_min)
def doComparison(photo1loc, photo2loc): original = cv2.imread(photo1loc) contrast = url_to_image(photo2loc) contrast = cv2.resize(contrast, (original.shape[1], original.shape[0])) original = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY) contrast = cv2.cvtColor(contrast, cv2.COLOR_BGR2GRAY) MSE, SSIM = compareImages(original, contrast) image1 = Image.open(photo1loc) row1, col1 = dhash.dhash_row_col(image1) # print(dhash.format_hex(row1, col1)) newfile = io.BytesIO(urllib.request.urlopen(photo2loc).read()) image2 = Image.open(newfile) row2, col2 = dhash.dhash_row_col(image2) # print(dhash.format_hex(row2, col2)) num_bits_different = dhash.get_num_bits_different(dhash.dhash_int(image1), dhash.dhash_int(image2)) # print(num_bits_different) #faceCompare = face.beginImageRec(photo1loc, photo2loc) faceCompare = 0.3 return (MSE, SSIM, num_bits_different, faceCompare)
def compute_hamming_dist(hashes): bit_diffs = [] for index, hash_elem in enumerate(hashes[:-1]): bit_diff = dhash.get_num_bits_different(int(hash_elem, 16), int(hashes[index + 1], 16)) bit_diffs.append(bit_diff) print(bit_diffs) return bit_diffs
def get_icon_similarity(phash_origin, phash_candidate): """ Get icons similarity score [0,1.0] :param phash_origin: original icon :param phash_candidate: icon to be compared :return: similarity score [0,1.0] """ diff = dhash.get_num_bits_different(phash_origin, phash_candidate) return 1 - 1. * diff / (PHASH_SIZE * PHASH_SIZE * 2)
async def checkDuplicate(self, message: discord.Message): """Check if uploaded files are known""" hashes = [x async for x in self.saveMessageHashes(message)] if len(message.attachments) > 0 and len(hashes) == 0: await message.add_reaction("▶") await asyncio.sleep(2) await message.remove_reaction("▶", self.bot.user) return duplicates = {} posts_all = None for image_hash in hashes: # try to look up hash directly posts_full = repo_i.getHash(str(hex(image_hash))) if len(posts_full) > 0: # full match found for post in posts_full: # skip current message if post.message_id == message.id: continue # add to duplicates duplicates[post] = 0 await self.console.debug(message, "Full dhash match") break # move on to the next hash continue # full match not found, iterate over whole database if posts_all is None: posts_all = repo_i.getAll() hamming_min = 128 duplicate = None for post in posts_all: # skip current message if post.message_id == message.id: continue # do the comparison post_hash = int(post.dhash, 16) hamming = dhash.get_num_bits_different(image_hash, post_hash) if hamming < hamming_min: duplicate = post hamming_min = hamming duplicates[duplicate] = hamming_min await self.console.debug( message, f"Closest Hamming distance: {hamming_min}/128 bits") for image_hash, hamming_distance in duplicates.items(): if hamming_distance <= self.limit_soft: await self._announceDuplicate(message, image_hash, hamming_distance)
def compare_images(imageA, imageB, title): image1 = Image.open(imageA) image2 = Image.open(imageB) imageHashInt = dhash.dhash_int(image1, 8) imageHashInt2 = dhash.dhash_int(image2, 8) res = dhash.get_num_bits_different(imageHashInt, imageHashInt2) finalRes = str(100 - ((res / 128) * 100)) + '%' x = {"Similarity is ": finalRes} y = json.dumps(x) print(y)
def compare_images(self, image1, image2): hash1 = self.get_image_hash(image1) hash2 = self.get_image_hash(image2) diff = dhash.get_num_bits_different(int(hash1, 16), int(hash2, 16)) if diff > 3: return False return True
def dHash_use_package(img1, img2): image1 = Image.open(img1) image2 = Image.open(img2) row1, col1 = dhash.dhash_row_col(image1) row2, col2 = dhash.dhash_row_col(image2) a1 = int(dhash.format_hex(row1, col1), 16) a2 = int(dhash.format_hex(row2, col2), 16) result = dhash.get_num_bits_different(a1, a2) if result<=5: print('Same Picture') return result
def is_image_duplicate(self, src, cam_id=''): meta = self.cache[self.get_hash_key(src)] if meta is None: return False new_hash = self.get_image_hash(src) diff = dhash.get_num_bits_different(int(meta['image_hash'], 16), int(new_hash, 16)) if diff > 3: return False return True
def main(arguments): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('infile', help="path to file to compare") #, type=argparse.FileType('r')) parser.add_argument('dbDir', help="path to 'Database' directory") parser.add_argument('--threshold', help="threshold val (default 0.75)", type=float, default=0.75) args = parser.parse_args(arguments) baseDir = os.path.abspath(args.dbDir) # test score = 0.0 match = False # convert test image into numpy array try: imTest = PIL.Image.open(args.infile) except FileNotFoundError: # filename not an image file print("An error occured trying to read the test file. Can't compare") exit() # loop through each image in the Test database for dbImg in os.listdir(args.dbDir): # convert current DB image into numpy array try: imDB = PIL.Image.open(baseDir + "/" + dbImg) except OSError: # filename not an image file accessible by PIL. Ignore, quit this loop, and continue. continue if imTest is not None and imDB is not None: # hash, then compare the two images dh1 = dhash.dhash_int(imTest) dh2 = dhash.dhash_int(imDB) score = 1 - dhash.get_num_bits_different(dh1,dh2)/dh1.bit_length() else: print("comparison error with files %s and %s", imDB, imTest) if score > args.threshold: # alert about the match print("Matching image found in database directory: ", dbImg, " (score: ", str(score), ").") match = True if match: print("Match(es) found. Not adding") else: print("No match found. Adding ", args.infile, " to database directory: ", baseDir) shutil.copy(args.infile, baseDir)
def hashVidDifference(originalHash, newHash): cntr = 0 originalHashList = originalHash.split() newHashList = newHash.split() frameDifferences = [] minDifferences = [] for i in originalHashList: for j in newHashList: frameDifferences.append(dhash.get_num_bits_different(int(i), int(j))) cntr += 1 minDifferences.append(min(frameDifferences)) frameDifferences = [] print(sum(minDifferences)/len(minDifferences)) return sum(minDifferences)/len(minDifferences)
def getFishLevel(imageData): COMPARE_PERCENTAGE = 15 lowRankFlag = False b, g, r = cv2.split(imageData) rgbImg = cv2.merge([r, g, b]) img = Image.fromarray(rgbImg, 'RGB') mBash = getImgHash(img) if dhash.get_num_bits_different(mBash, fishLv1ImgHash) < COMPARE_PERCENTAGE: lowRankFlag = True elif dhash.get_num_bits_different(mBash, fishLv2ImgHash) < COMPARE_PERCENTAGE: lowRankFlag = True elif dhash.get_num_bits_different(mBash, fishLv3ImgHash) < COMPARE_PERCENTAGE: lowRankFlag = True else: pass return lowRankFlag
def hash_vid_difference(original_hash, new_hash): cntr = 0 original_hash_list = original_hash.split() new_hash_list = new_hash.split() frame_differences = [] min_differences = [] for i in original_hash_list: for j in new_hash_list: frame_differences.append( dhash.get_num_bits_different(int(i), int(j))) cntr += 1 min_differences.append(min(frame_differences)) frame_differences = [] return sum(min_differences) / len(min_differences)
async def check_message(self, message: discord.Message): """Check if message contains duplicate image.""" image_hashes = [x async for x in self.save_hashes(message)] if len(message.attachments) > len(image_hashes): await message.add_reaction("▶") await asyncio.sleep(2) await message.remove_reaction("▶", self.bot.user) duplicates = {} all_images = None for image_hash in image_hashes: # try to look up hash directly images = repo_i.get_hash(str(hex(image_hash))) for image in images: # skip current message if image.message_id == message.id: continue # add to duplicates duplicates[image] = 0 await self.console.debug(message, "Full dhash match found.") break # move on to the next hash continue # full match not found, iterate over whole database if all_images is None: all_images = repo_i.get_all() minimal_distance = 128 duplicate = None for image in all_images: # skip current image if image.message_id == message.id: continue # do the comparison db_image_hash = int(image.dhash, 16) distance = dhash.get_num_bits_different(db_image_hash, image_hash) if distance < minimal_distance: duplicate = image minimal_distance = distance if minimal_distance < self.limit_soft: duplicates[duplicate] = minimal_distance for image_hash, distance in duplicates.items(): await self.report_duplicate(message, image_hash, distance)
def sortPicsBySimilarity(allPicsDir, imageSize, threshold, sortedPicsDir, similarPicsDir): allPicsDirLen = len(allPicsDir) for path, d, filelist in os.walk(allPicsDir): L = [] for filename in filelist: fileNameWithPath = os.path.join(path, filename) image = load_image(fileNameWithPath) if image is not None: try: hash = dhash.dhash_int(image, size=imageSize) except: continue L.append((hash, filename)) sortedPath = os.path.join(sortedPicsDir, path[allPicsDirLen + 1:]) similarPath = os.path.join(similarPicsDir, path[allPicsDirLen + 1:]) if not os.path.exists(sortedPath): os.mkdir(sortedPath) if not os.path.exists(similarPath): os.mkdir(similarPath) S = sorted(L, key=lambda l: l[0]) count = 0 for item in S: origFile = os.path.join(path, item[1]) if count > 0: lastItem = S[count - 1] hash1 = lastItem[0] hash2 = item[0] num_bits_different = dhash.get_num_bits_different(hash1, hash2) diff = 100 * num_bits_different / (imageSize * imageSize * 2) newFileName = str(count) + "_" + str(diff) + "%_" + item[1] if diff <= threshold: shutil.copy(origFile, os.path.join(similarPath, newFileName)) if count == 1: lastNewFileName = str(count - 1) + "_" + lastItem[1] else: lastNewFileName = str(count - 1) + "_" + str( lastDiff) + "%_" + lastItem[1] lastNewFilePath = os.path.join(similarPath, lastNewFileName) if not os.path.exists(lastNewFilePath): shutil.copy(os.path.join(path, lastItem[1]), lastNewFilePath) lastDiff = diff else: newFileName = str(count) + "_" + item[1] shutil.copy(origFile, os.path.join(sortedPath, newFileName)) count = count + 1
def predict_1d(self, X, raw=False): """Loops and compares over all images.""" #holds the predictions preds = np.zeros(self.n_images) hash_img = imgToFeatures(X) #compare with each image in baseline for j, base in enumerate(self.baseline): preds[j] = dhash.get_num_bits_different(base,hash_img) if raw: return np.min(preds), self.label_to_name[np.argmin(preds)] else: return self.label_to_name[np.argmin(preds)]
def is_image_duplicate(self, src, cam_id=''): #print("Checking Duplicate: " + str(cam_id)) if cam_id not in self.cache: return False meta = self.cache[cam_id] new_hash = self.get_image_hash(src) diff = dhash.get_num_bits_different(int(meta['image_hash'], 16), int(new_hash, 16)) if diff > 3: return False return True
def image_in_another(another: Image, image: Image, pos: list) -> bool: if type(image) is str: image = Image.open(image) if type(another) is str: another = Image.open(another) width = image.width height = image.height start_x, start_y = pos image_dhash = TaskIdentifier.dhash(image) another_dhash = TaskIdentifier.dhash(another.crop((start_x, start_y, start_x + width, start_y + height))) return dhash.get_num_bits_different(image_dhash, another_dhash) < 20
def image_in_another(another: Image, image: Image, pos: list=(0, 0), bit_diff=20) -> bool: if type(image) is str: image = Image.open(image) if type(another) is str: another = Image.open(another) width = image.width height = image.height start_x, start_y = pos image_dhash = dhash_calc(image) another_dhash = dhash_calc(another.crop((start_x, start_y, start_x + width, start_y + height))) return dhash.get_num_bits_different(image_dhash, another_dhash) < bit_diff
def sameImgCheck(imageData, imgHash): COMPARE_PERCENTAGE = 15 sameFlag = False b, g, r = cv2.split(imageData) rgbImg = cv2.merge([r, g, b]) img = Image.fromarray(rgbImg, 'RGB') mBash = getImgHash(img) if dhash.get_num_bits_different(mBash, imgHash) < COMPARE_PERCENTAGE: sameFlag = True else: pass return sameFlag
def predict(self, X, raw=False): """Loops and compares over all images.""" #holds the predictions preds = np.zeros((X.shape[0], self.n_images)) #for each image in X for i, img in enumerate(X): if i%12==0: print(i, 'of', X.shape[0], end='\r') hash_img = imgToFeatures(img) #compare with each image in baseline for j, base in enumerate(self.baseline): preds[i][j] = dhash.get_num_bits_different(base,hash_img) if raw: return preds else: return self.label_to_name[np.argmin(preds, axis=1)]
def isAnySimularImageByHashCode(self, image_hashes, key, hash): dim = image.shape[:2] if (dim[0] < 30 or dim[1] < 30): return True hashes = image_hashes[key] imageHash = hash # dhash_own(image) logging.debug("image_hash:", image_hash) self.image_hashes[key].append(imageHash) if (len(hashes) == 0): return False for _imageHash in hashes: delta = dhash.get_num_bits_different(imageHash, _imageHash) if (delta < hash_delta): #logging.debug( key, delta ) return True # elif ( compare_ssim(_image , image) > ssim_delta ): # return True return False
def match_painting(img): """ Compute hash bit differences between img and paintings in painting_db :param img: input image to use. :return: Matching painting (the one having less differences with img and below threshold) or None if all differences are above treshold. """ threshold = 20 img_row, img_col = dhash.dhash_row_col(img) img_hash = dhash.format_hex(img_row, img_col) img_hash = int(img_hash, 16) differences = [] # Check difference between img and painting_db for painting in painting_db: differences.append(dhash.get_num_bits_different(img_hash, painting.hash)) if min(differences) < threshold: return painting_db[differences.index(min(differences))] else: return None
async def scan_compare(self, ctx, first: int, second: int): """Scan two messages and report comparison result Arguments --------- first: Message ID second: Message ID """ hashes1 = repo_i.get_by_message(first) hashes2 = repo_i.get_by_message(second) if len(hashes1) == 0: return await ctx.send(self.text.get("comparison", "not_found", message_id=str(first))) if len(hashes2) == 0: return await ctx.send(self.text.get("comparison", "not_found", message_id=str(second))) text = [] text.append(self.text.get("comparison", "header", message_id=str(first))) for h in hashes1: text.append(self.text.get("comparison", "line", hash=str(h.dhash)[2:])) text.append("") text.append(self.text.get("comparison", "header", message_id=str(second))) for h in hashes2: text.append(self.text.get("comparison", "line", hash=str(h.dhash))) if len(hashes1) == 1 or len(hashes2) == 1: hash1 = int(hashes1[0].dhash, 16) hash2 = int(hashes2[0].dhash, 16) hamming = dhash.get_num_bits_different(hash1, hash2) prob = "{:.1f}".format((1 - hamming / 128) * 100) text.append("") text.append(self.text.get("comparison", "footer", percent=str(prob), bits=str(hamming))) await ctx.send("\n".join(text))
image = Image.open('osoba20a.png') # Original row, col = dhash.dhash_row_col(image) a = "0x" + dhash.format_hex(row, col) image2 = Image.open('osaba20a_pic_resized_BW.png') # Print Attack row, col = dhash.dhash_row_col(image2) b = "0x" + dhash.format_hex(row, col) image3 = Image.open('osoba20a_vertflip.png') row, col = dhash.dhash_row_col(image3) c = "0x" + dhash.format_hex(row, col) d = int(a,0) e = int(b,0) f = int(c,0) print("Original") print(dhash.get_num_bits_different(d,d)) print("Print Attack:") print(dhash.get_num_bits_different(d,e)) print("Vertical Flip") print(dhash.get_num_bits_different(d,f)) print("Time taken: ", (time.time() - start_time), "seconds") # Used for Data Collection: #valuelist = [0xc08701468687d7cf40003341dfaf1fff,0xc08700468f8fc74fc08033405fbfbfff,0xc04302264646c767800013215f3f9fff,0xc00f130c8e0f1f9780003f41bf7f7fbf,0xc0870744c68687c7c0001e4140be3fff,0xc087034c8e8cd3c6000033419fbdffff] #for i in valuelist: #print(dhash.get_num_bits_different(0xb2c3812707474767f08093202f3fdf3f,i))
def is_same_image(img1, img2): current_hash = dhash.dhash_int(img1) old_hash = dhash.dhash_int(img2) return dhash.get_num_bits_different(current_hash, old_hash) == 0
def start_splitting(video_id, video_download_path): split_path = video_download_path + video_id + "_images" if not os.path.exists(split_path): os.makedirs(split_path, 0o777) files_array = [] vidcap = cv2.VideoCapture(video_download_path + video_id + ".mp4") success, image = vidcap.read() x = 1 # splitting frames from video print("splitting video " + video_id + ".mp4....") while vidcap.isOpened(): frameId = vidcap.get(1) # current frame number ret, frame = vidcap.read() print(frameId) if not ret: break if frameId % 150 == 0: x += 1 cv2.imwrite(split_path + "/frame%d.jpg" % x, image) vidcap.release() # resizing images print("resizing frames..") for subdir, dirs, files in os.walk(split_path + "/"): for file in files: image = cv2.imread(split_path + "/" + file) img = cv2.resize(image, (640, 480)) cv2.imwrite(split_path + "/" + file, img) # insert images to file array for subdir, dirs, files in os.walk(video_download_path): for file in files: if file.endswith(".jpg"): files_array.append(file) # remove duplicates print("removing duplicate frames..") i = 0 while i < len(files_array): file = files_array[i] hash_value = SplitAndRemove.generate_hash(file, split_path) j = i + 1 while j < len(files_array): file1 = files_array[j] hash_value1 = SplitAndRemove.generate_hash(file1, split_path) hamming_distance = dhash.get_num_bits_different( hash_value, hash_value1) if (hamming_distance <= 5) and (file1 != file): os.remove(split_path + "/" + file1) files_array.remove(file1) j += 1 i += 1 print("Finish")
def isLogged(contentUrl, media, text, url, date, top, hot, new, subSettings, reddit): result = [] originalPostDate = [] finalTimePassed = [] precentageMatched = [] author = [] title = [] args = None postsToRemove = [] cntr = 0 returnResult = [] conn = sqlite3.connect('Posts{}.db'.format( sub( '([a-zA-Z])', lambda x: x.groups()[0].upper(), subSettings[0], 1, ))) c = conn.cursor() now = datetime.utcnow() then = datetime.fromtimestamp(date) timePassed = (now - then).days # ignore post if too old if subSettings[1] is not None and timePassed > subSettings[ 1] and top or subSettings[ 2] is not None and timePassed > subSettings[ 2] and hot or subSettings[ 3] is not None and timePassed > subSettings[3] and new: result = ['delete'] originalPostDate = [-1] finalTimePassed = [-1] precentageMatched = [-1] author = [-1] title = [-1] else: # check if post is already in database args = c.execute( 'SELECT COUNT(1) FROM Posts WHERE Url = ?;', (str(url), ), ) if list(args.fetchone())[0] != 0: args = c.execute( 'SELECT Location FROM Posts WHERE Url = ?;', (str(url), ), ) fullResult = list(args.fetchall()) # make sure the post is in the right category for i in fullResult: if i[0] != 'top' and top and (subSettings[1] is None or ( timePassed < subSettings[1] and (subSettings[2] is None or subSettings[1] > subSettings[2]) and (subSettings[3] is None or subSettings[1] > subSettings[3]))): updateDatabase(conn, url, 'top') if i[0] != 'hot' and hot and (subSettings[2] is None or ( timePassed < subSettings[2] and (subSettings[1] is None or subSettings[2] > subSettings[1]) and (subSettings[3] is None or subSettings[2] > subSettings[3]))): updateDatabase(conn, url, 'hot') if i[0] != 'new' and new and (subSettings[3] is None or ( timePassed < subSettings[3] and (subSettings[2] is None or subSettings[3] > subSettings[2]) and (subSettings[1] is None or subSettings[3] > subSettings[1]))): updateDatabase(conn, url, 'new') # ignore post result = ['delete'] originalPostDate = [-1] finalTimePassed = [-1] precentageMatched = [-1] author = [-1] title = [-1] # check if post is a repost else: # check for text if text != '​' and text != '': args = c.execute( 'SELECT COUNT(1) FROM Posts WHERE Content = ?;', (str(text), ), ) if list(args.fetchone())[0] != 0: args = c.execute( 'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;', (str(text), ), ) fullResult = list(args.fetchall()) for i in fullResult: addToFound( i, 100, result, originalPostDate, precentageMatched, author, title, ) args = c.execute( 'SELECT Url, Date, Author, Title, Content FROM posts;', ) for texts in args.fetchall(): if texts[0] not in result: textVar = texts[2] difference = distance(textVar, text) if difference < subSettings[7]: addToFound( texts, ((subSettings[7] - difference) / subSettings[7]) * 100, result, originalPostDate, precentageMatched, author, title, ) # check for v.reddit elif media != None and ( 'oembed' not in media or 'provider_name' not in media['oembed'] or (media['oembed']['provider_name'] != 'gfycat' and media['oembed']['provider_name'] != 'YouTube')): vidHash = hashVid(conn, media, url) if vidHash == 'invalid': result = ['delete'] originalPostDate = [-1] finalTimePassed = [-1] precentageMatched = [-1] author = [-1] title = [-1] if isInt(vidHash.replace(' ', '')): args = c.execute( 'SELECT COUNT(1) FROM Posts WHERE Content = ?;', (str(vidHash), ), ) if list(args.fetchone())[0] != 0: args = c.execute( 'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;', (str(vidHash), ), ) fullResult = list(args.fetchall()) for i in fullResult: addToFound(i, 100, result, originalPostDate, precentageMatched, author, title) args = c.execute( 'SELECT Url, Date, Author, Title Content FROM posts;', ) for hashed in args.fetchall(): if hashed[0] not in result: hashedReadable = hashed[2] if isInt(hashedReadable.replace(' ', '')): hashedDifference = hashVidDifference( hashedReadable, vidHash) if hashedDifference < subSettings[7]: addToFound( hashed, ((subSettings[7] - hashedDifference) / subSettings[7]) * 100, result, originalPostDate, precentageMatched, author, title, ) # check for image or gif elif contentUrl != '': args = c.execute( 'SELECT COUNT(1) FROM Posts WHERE Content = ?;', (str(contentUrl).replace( '&feature=youtu.be', '', ), ), ) if list(args.fetchone())[0] != 0: args = c.execute( 'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;', (str(contentUrl).replace( '&feature=youtu.be', '', ), ), ) fullResult = list(args.fetchall()) for i in fullResult: addToFound( i, 100, result, originalPostDate, precentageMatched, author, title, ) # check for gif if 'gif' in contentUrl and not (contentUrl.endswith('gifv') or 'gifs' in contentUrl): gifHash = hashGif(conn, contentUrl, url) if gifHash == 'invalid': result = ['delete'] originalPostDate = [-1] finalTimePassed = [-1] precentageMatched = [-1] author = [-1] title = [-1] if isInt(gifHash.replace(' ', '')): args = c.execute( 'SELECT COUNT(1) FROM Posts WHERE Content = ?;', (str(gifHash), ), ) if list(args.fetchone())[0] != 0: args = c.execute( 'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;', (str(gifHash), ), ) fullResult = list(args.fetchall()) for i in fullResult: addToFound( i, 100, result, originalPostDate, precentageMatched, author, title, ) args = c.execute( 'SELECT Url, Date, Author, Title, Content FROM posts;' ) for hashed in args.fetchall(): if hashed[0] not in result: hashedReadable = hashed[2] if isInt(hashedReadable.replace(' ', '')): hashedDifference = hashVidDifference( hashedReadable, gifHash) if hashedDifference < subSettings[7]: addToFound( hashed, ((subSettings[7] - hashedDifference ) / subSettings[7]) * 100, result, originalPostDate, precentageMatched, author, title, ) elif 'png' in contentUrl or 'jpg' in contentUrl: imgHash = hashImg(conn, contentUrl, url) if imgHash == 'invalid': result = ['delete'] originalPostDate = [-1] finalTimePassed = [-1] precentageMatched = [-1] author = [-1] title = [-1] if isInt(imgHash): args = c.execute( 'SELECT COUNT(1) FROM Posts WHERE Content = ?;', (str(imgHash), ), ) if list(args.fetchone())[0] != 0: args = c.execute( 'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;', (str(imgHash), ), ) fullResult = list(args.fetchall()) for i in fullResult: addToFound( i, 100, result, originalPostDate, precentageMatched, author, title, ) args = c.execute( 'SELECT Url, Date, Author, Title, Content FROM posts;' ) for hashed in args.fetchall(): if hashed[0] not in result: hashedReadable = hashed[2] if isInt(hashedReadable): hashedDifference = dhash.get_num_bits_different( imgHash, int(hashedReadable)) if hashedDifference < subSettings[7]: addToFound( hashed, ((subSettings[7] - hashedDifference ) / subSettings[7]) * 100, result, originalPostDate, precentageMatched, author, title, ) # delete post if it has been deleted for i in result: if i != '' and i != 'delete': if reddit.submission(url='https://reddit.com{}'.format( i)).selftext == '[deleted]': c.execute( 'DELETE FROM Posts WHERE Url = ?;', (str(i), ), ) postsToRemove.append([ i, originalPostDate[cntr], precentageMatched[cntr], author[cntr], title[cntr], ]) print('deleted {}'.format(i)) cntr += 1 c.close() for i in postsToRemove: result.remove(i[0]) originalPostDate.remove(i[1]) precentageMatched.remove(i[2]) author.remove(i[3]) title.remove(i[4]) for i in originalPostDate: then = datetime.fromtimestamp(i) timePassed = monthDelta(then, now) fullText = ('{} months ago'.format(str(timePassed))) if timePassed < 1: timePassed = (now - then).days fullText = ('{} days ago'.format(str(timePassed))) if timePassed < 1: timePassed = (now - then).total_seconds() // 3600 fullText = ('{} hours ago'.format(str(timePassed))) if timePassed < 1: timePassed = (now - then).total_seconds() // 60 fullText = ('{} minutes ago'.format(str(timePassed))) if timePassed < 1: timePassed = (now - then).total_seconds() fullText = ('{} seconds ago'.format(str(timePassed))) finalTimePassed.append(fullText) cntr = 0 for i in result: returnResult.append([ i, finalTimePassed[cntr], originalPostDate[cntr], precentageMatched[cntr], author[cntr], title[cntr], ]) cntr += 1 if returnResult != [['delete', -1, -1, -1, -1, -1]]: print('Found? {}'.format(returnResult)) return returnResult