Python get_num_bits_different示例，dhash.get_num_bits_different Python示例

示例#1

0

显示文件

文件： warden.py 项目： Matesxs/MatesDiscordBot

    async def check_for_duplicates(self,
                                   message: discord.Message,
                                   settings: Optional[WardenSettings] = None):
        hashes = [x async for x in self.generate_message_hash(message)]
        if not hashes:
            return

        duplicates = {}
        posts = self.warden_repo.get_all_channel_attachments(
            message.channel.id)
        for img_hash in hashes:
            hamming_min = 128
            duplicate_of = None

            for post in posts:
                if post.message_id == str(message.id):
                    continue

                post_hash = int(post.dhash, 16)
                hamming = dhash.get_num_bits_different(img_hash, post_hash)

                if hamming < hamming_min:
                    duplicate_of = post
                    hamming_min = hamming

            if duplicate_of is not None:
                duplicates[duplicate_of] = hamming_min

        for original, hamming_min in duplicates.items():
            if hamming_min <= LIMIT_SOFT:
                await self.__announce_duplicate(message, original, hamming_min,
                                                settings)
                break

示例#2

0

显示文件

文件： ImageSimilarFinder.py 项目： zhangjizxc/ImageSimilarFinder

def findSimilarImgs(baseImageFile, tarDir, hasCmpedList, step):
    count = 0
    image1 = load_image(baseImageFile)
    if image1 is None:
        return
    try:
        hash1 = dhash.dhash_int(image1, size=imageSize)
    except:
        return
    for path, d, filelist in os.walk(tarDir):
        if (not path.endswith('.git') and (not path.startswith(resultDir))):
            for filename in filelist:
                if (filename.endswith('jpg') or filename.endswith('png')):
                    count = count + 1
                    imageName = os.path.join(path, filename)
                    if (imageName not in hasCmpedList):
                        image2 = load_image(imageName)
                        if image2 is not None:
                            try:
                                hash2 = dhash.dhash_int(image2, size=imageSize)
                            except:
                                continue
                            num_bits_different = dhash.get_num_bits_different(
                                hash1, hash2)
                            diff = 100 * num_bits_different / (imageSize *
                                                               imageSize * 2)
                            if (diff <= limitDiff):
                                hasCmpedList.append(imageName)
                                print(baseImageFile + " is same with " +
                                      imageName)
                                movePicToResultDir(step, baseImageFile,
                                                   imageName, diff)

示例#3

0

显示文件

def find_duplicates(image_info, image_infos):
    duplicates = []
    for i, match_image_info in enumerate(image_infos):
        if dhash.get_num_bits_different(image_info.dhash,
                                        match_image_info.dhash) <= 2:
            duplicates.append(i)
    return duplicates

示例#4

0

显示文件

文件： warden.py 项目： Misha12/rubbergod

    async def checkDuplicate(self, message: disnake.Message):
        """Check if uploaded files are known"""
        hashes = [x async for x in self.saveMessageHashes(message)]

        if len(message.attachments) > 0 and len(hashes) == 0:
            return

        duplicates = {}
        posts = repo_i.getAll()
        for img_hash in hashes:
            hamming_min = 128
            duplicate = None
            for post in posts:
                # skip current message
                if post.message_id == message.id:
                    continue
                # do the comparison
                post_hash = int(post.dhash, 16)
                hamming = dhash.get_num_bits_different(img_hash, post_hash)
                if hamming < hamming_min:
                    duplicate = post
                    hamming_min = hamming

            duplicates[duplicate] = hamming_min

        for duplicate, hamming_min in duplicates.items():
            if hamming_min <= self.limit_soft:
                await self._announceDuplicate(message, duplicate, hamming_min)

示例#5

0

显示文件

文件： imagecompare.py 项目： miguelmcell/HackTX2018

def doComparison(photo1loc, photo2loc):
    original = cv2.imread(photo1loc)
    contrast = url_to_image(photo2loc)
    contrast = cv2.resize(contrast, (original.shape[1], original.shape[0]))

    original = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    contrast = cv2.cvtColor(contrast, cv2.COLOR_BGR2GRAY)
    MSE, SSIM = compareImages(original, contrast)

    image1 = Image.open(photo1loc)
    row1, col1 = dhash.dhash_row_col(image1)
    # print(dhash.format_hex(row1, col1))

    newfile = io.BytesIO(urllib.request.urlopen(photo2loc).read())

    image2 = Image.open(newfile)
    row2, col2 = dhash.dhash_row_col(image2)
    # print(dhash.format_hex(row2, col2))

    num_bits_different = dhash.get_num_bits_different(dhash.dhash_int(image1),
                                                      dhash.dhash_int(image2))
    # print(num_bits_different)

    #faceCompare = face.beginImageRec(photo1loc, photo2loc)

    faceCompare = 0.3

    return (MSE, SSIM, num_bits_different, faceCompare)

示例#6

0

显示文件

def compute_hamming_dist(hashes):
    bit_diffs = []

    for index, hash_elem in enumerate(hashes[:-1]):
        bit_diff = dhash.get_num_bits_different(int(hash_elem, 16),
                                                int(hashes[index + 1], 16))
        bit_diffs.append(bit_diff)
    print(bit_diffs)
    return bit_diffs

示例#7

0

显示文件

文件： static_analysis.py 项目： jfoucry/exodus-core

 def get_icon_similarity(phash_origin, phash_candidate):
     """
     Get icons similarity score [0,1.0]
     :param phash_origin: original icon
     :param phash_candidate: icon to be compared
     :return: similarity score [0,1.0]
     """
     diff = dhash.get_num_bits_different(phash_origin, phash_candidate)
     return 1 - 1. * diff / (PHASH_SIZE * PHASH_SIZE * 2)

示例#8

0

显示文件

文件： warden.py 项目： CrashTheEmperor/rubbergoddess

    async def checkDuplicate(self, message: discord.Message):
        """Check if uploaded files are known"""
        hashes = [x async for x in self.saveMessageHashes(message)]

        if len(message.attachments) > 0 and len(hashes) == 0:
            await message.add_reaction("▶")
            await asyncio.sleep(2)
            await message.remove_reaction("▶", self.bot.user)
            return

        duplicates = {}
        posts_all = None
        for image_hash in hashes:
            # try to look up hash directly
            posts_full = repo_i.getHash(str(hex(image_hash)))

            if len(posts_full) > 0:
                # full match found
                for post in posts_full:
                    # skip current message
                    if post.message_id == message.id:
                        continue
                    # add to duplicates
                    duplicates[post] = 0
                    await self.console.debug(message, "Full dhash match")
                    break

                # move on to the next hash
                continue

            # full match not found, iterate over whole database
            if posts_all is None:
                posts_all = repo_i.getAll()

            hamming_min = 128
            duplicate = None
            for post in posts_all:
                # skip current message
                if post.message_id == message.id:
                    continue
                # do the comparison
                post_hash = int(post.dhash, 16)
                hamming = dhash.get_num_bits_different(image_hash, post_hash)
                if hamming < hamming_min:
                    duplicate = post
                    hamming_min = hamming

            duplicates[duplicate] = hamming_min

            await self.console.debug(
                message, f"Closest Hamming distance: {hamming_min}/128 bits")

        for image_hash, hamming_distance in duplicates.items():
            if hamming_distance <= self.limit_soft:
                await self._announceDuplicate(message, image_hash,
                                              hamming_distance)

示例#9

0

显示文件

文件： Mohammed_alshareif_t1.py 项目： mohdsh85/taskAssesment

def compare_images(imageA, imageB, title):
    image1 = Image.open(imageA)
    image2 = Image.open(imageB)
    imageHashInt = dhash.dhash_int(image1, 8)
    imageHashInt2 = dhash.dhash_int(image2, 8)
    res = dhash.get_num_bits_different(imageHashInt, imageHashInt2)
    finalRes = str(100 - ((res / 128) * 100)) + '%'
    x = {"Similarity is ": finalRes}
    y = json.dumps(x)
    print(y)

示例#10

0

显示文件

文件： PriusImageCache.py 项目： paswani/PriusWatchML

    def compare_images(self, image1, image2):
        hash1 = self.get_image_hash(image1)
        hash2 = self.get_image_hash(image2)

        diff = dhash.get_num_bits_different(int(hash1, 16), int(hash2, 16))

        if diff > 3:
            return False

        return True

示例#11

0

显示文件

文件： PerceptualHashAlgorithm.py 项目： wzy6642/PerceptualHashAlgorithm

def dHash_use_package(img1, img2):
    image1 = Image.open(img1)
    image2 = Image.open(img2)
    row1, col1 = dhash.dhash_row_col(image1)
    row2, col2 = dhash.dhash_row_col(image2)
    a1 = int(dhash.format_hex(row1, col1), 16)
    a2 = int(dhash.format_hex(row2, col2), 16)
    result = dhash.get_num_bits_different(a1, a2)
    if result<=5:
        print('Same Picture')
    return result

示例#12

0

显示文件

文件： PriusImageCache.py 项目： paswani/PriusWatchML

    def is_image_duplicate(self, src, cam_id=''):
        meta = self.cache[self.get_hash_key(src)]

        if meta is None:
            return False

        new_hash = self.get_image_hash(src)
        diff = dhash.get_num_bits_different(int(meta['image_hash'], 16),
                                            int(new_hash, 16))
        if diff > 3:
            return False
        return True

示例#13

0

显示文件

文件： perceptualHashRepetition.py 项目： richDsInterview/TechnicalChallenge

def main(arguments):

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('infile', help="path to file to compare") #, type=argparse.FileType('r'))
    parser.add_argument('dbDir', help="path to 'Database' directory")
    parser.add_argument('--threshold', help="threshold val (default 0.75)", type=float, default=0.75)

    args = parser.parse_args(arguments)
    baseDir = os.path.abspath(args.dbDir)

    # test

    score = 0.0
    match = False
    # convert test image into numpy array
    try:
        imTest = PIL.Image.open(args.infile)
    except FileNotFoundError:
        # filename not an image file
        print("An error occured trying to read the test file. Can't compare")
        exit()

    # loop through each image in the Test database
    for dbImg in os.listdir(args.dbDir):
        # convert current DB image into numpy array
        try:
            imDB = PIL.Image.open(baseDir + "/" + dbImg)
        except OSError:
            # filename not an image file accessible by PIL. Ignore, quit this loop, and continue.
            continue

        if imTest is not None and imDB is not None:
            # hash, then compare the two images
            dh1 = dhash.dhash_int(imTest)
            dh2 = dhash.dhash_int(imDB)
            score = 1 - dhash.get_num_bits_different(dh1,dh2)/dh1.bit_length()
        else:
            print("comparison error with files %s and %s", imDB, imTest)

        if score > args.threshold:
            # alert about the match
            print("Matching image found in database directory: ", dbImg, " (score: ", str(score), ").")
            match = True

    if match:
        print("Match(es) found. Not adding")
    else:
        print("No match found. Adding ", args.infile, " to database directory: ", baseDir)
        shutil.copy(args.infile, baseDir)

示例#14

0

显示文件

def hashVidDifference(originalHash, newHash):
    cntr = 0
    originalHashList = originalHash.split()
    newHashList = newHash.split()
    frameDifferences = []
    minDifferences = []
    for i in originalHashList:
        for j in newHashList:
            frameDifferences.append(dhash.get_num_bits_different(int(i), int(j)))
            cntr += 1
        minDifferences.append(min(frameDifferences))
        frameDifferences = []
    print(sum(minDifferences)/len(minDifferences))
    return sum(minDifferences)/len(minDifferences)

示例#15

0

显示文件

文件： fishing.py 项目： llwslc/dualshock4-remap

def getFishLevel(imageData):
    COMPARE_PERCENTAGE = 15
    lowRankFlag = False

    b, g, r = cv2.split(imageData)
    rgbImg = cv2.merge([r, g, b])

    img = Image.fromarray(rgbImg, 'RGB')

    mBash = getImgHash(img)
    if dhash.get_num_bits_different(mBash,
                                    fishLv1ImgHash) < COMPARE_PERCENTAGE:
        lowRankFlag = True
    elif dhash.get_num_bits_different(mBash,
                                      fishLv2ImgHash) < COMPARE_PERCENTAGE:
        lowRankFlag = True
    elif dhash.get_num_bits_different(mBash,
                                      fishLv3ImgHash) < COMPARE_PERCENTAGE:
        lowRankFlag = True
    else:
        pass

    return lowRankFlag

示例#16

0

显示文件

def hash_vid_difference(original_hash, new_hash):
    cntr = 0
    original_hash_list = original_hash.split()
    new_hash_list = new_hash.split()
    frame_differences = []
    min_differences = []
    for i in original_hash_list:
        for j in new_hash_list:
            frame_differences.append(
                dhash.get_num_bits_different(int(i), int(j)))
            cntr += 1
        min_differences.append(min(frame_differences))
        frame_differences = []
    return sum(min_differences) / len(min_differences)

示例#17

0

显示文件

文件： __init__.py 项目： sinus-x/rubbergoddess

    async def check_message(self, message: discord.Message):
        """Check if message contains duplicate image."""
        image_hashes = [x async for x in self.save_hashes(message)]

        if len(message.attachments) > len(image_hashes):
            await message.add_reaction("▶")
            await asyncio.sleep(2)
            await message.remove_reaction("▶", self.bot.user)

        duplicates = {}
        all_images = None

        for image_hash in image_hashes:
            # try to look up hash directly
            images = repo_i.get_hash(str(hex(image_hash)))
            for image in images:
                # skip current message
                if image.message_id == message.id:
                    continue
                # add to duplicates
                duplicates[image] = 0
                await self.console.debug(message, "Full dhash match found.")
                break

            # move on to the next hash
            continue

            # full match not found, iterate over whole database
            if all_images is None:
                all_images = repo_i.get_all()

            minimal_distance = 128
            duplicate = None
            for image in all_images:
                # skip current image
                if image.message_id == message.id:
                    continue

                # do the comparison
                db_image_hash = int(image.dhash, 16)
                distance = dhash.get_num_bits_different(db_image_hash, image_hash)
                if distance < minimal_distance:
                    duplicate = image
                    minimal_distance = distance

            if minimal_distance < self.limit_soft:
                duplicates[duplicate] = minimal_distance

        for image_hash, distance in duplicates.items():
            await self.report_duplicate(message, image_hash, distance)

示例#18

0

显示文件

文件： ImageSimilarFinder.py 项目： zhangjizxc/ImageSimilarFinder

def sortPicsBySimilarity(allPicsDir, imageSize, threshold, sortedPicsDir,
                         similarPicsDir):
    allPicsDirLen = len(allPicsDir)
    for path, d, filelist in os.walk(allPicsDir):
        L = []
        for filename in filelist:
            fileNameWithPath = os.path.join(path, filename)
            image = load_image(fileNameWithPath)
            if image is not None:
                try:
                    hash = dhash.dhash_int(image, size=imageSize)
                except:
                    continue
            L.append((hash, filename))

        sortedPath = os.path.join(sortedPicsDir, path[allPicsDirLen + 1:])
        similarPath = os.path.join(similarPicsDir, path[allPicsDirLen + 1:])
        if not os.path.exists(sortedPath):
            os.mkdir(sortedPath)
        if not os.path.exists(similarPath):
            os.mkdir(similarPath)
        S = sorted(L, key=lambda l: l[0])
        count = 0
        for item in S:
            origFile = os.path.join(path, item[1])
            if count > 0:
                lastItem = S[count - 1]
                hash1 = lastItem[0]
                hash2 = item[0]
                num_bits_different = dhash.get_num_bits_different(hash1, hash2)
                diff = 100 * num_bits_different / (imageSize * imageSize * 2)
                newFileName = str(count) + "_" + str(diff) + "%_" + item[1]
                if diff <= threshold:
                    shutil.copy(origFile, os.path.join(similarPath,
                                                       newFileName))
                    if count == 1:
                        lastNewFileName = str(count - 1) + "_" + lastItem[1]
                    else:
                        lastNewFileName = str(count - 1) + "_" + str(
                            lastDiff) + "%_" + lastItem[1]
                    lastNewFilePath = os.path.join(similarPath,
                                                   lastNewFileName)
                    if not os.path.exists(lastNewFilePath):
                        shutil.copy(os.path.join(path, lastItem[1]),
                                    lastNewFilePath)
                lastDiff = diff
            else:
                newFileName = str(count) + "_" + item[1]
            shutil.copy(origFile, os.path.join(sortedPath, newFileName))
            count = count + 1

示例#19

0

显示文件

文件： modelo_hash.py 项目： max3a3/artifacthelper

    def predict_1d(self, X, raw=False):
        """Loops and compares over all images."""

        #holds the predictions
        preds = np.zeros(self.n_images)

        hash_img = imgToFeatures(X)
        #compare with each image in baseline
        for j, base in enumerate(self.baseline):
            preds[j] = dhash.get_num_bits_different(base,hash_img)

        if raw:
            return np.min(preds), self.label_to_name[np.argmin(preds)]
        else:
            return self.label_to_name[np.argmin(preds)]

示例#20

0

显示文件

文件： PriusImageCache.py 项目： paswani/PriusWatchML

    def is_image_duplicate(self, src, cam_id=''):
        #print("Checking Duplicate: " + str(cam_id))

        if cam_id not in self.cache:
            return False

        meta = self.cache[cam_id]
        new_hash = self.get_image_hash(src)

        diff = dhash.get_num_bits_different(int(meta['image_hash'], 16),
                                            int(new_hash, 16))

        if diff > 3:
            return False
        return True

示例#21

0

显示文件

文件： TaskIdentifier.py 项目： n-qber/among-tasks

    def image_in_another(another: Image, image: Image, pos: list) -> bool:
        if type(image) is str:
            image = Image.open(image)
        if type(another) is str:
            another = Image.open(another)

        width = image.width
        height = image.height

        start_x, start_y = pos

        image_dhash = TaskIdentifier.dhash(image)
        another_dhash = TaskIdentifier.dhash(another.crop((start_x, start_y, start_x + width, start_y + height)))

        return dhash.get_num_bits_different(image_dhash, another_dhash) < 20

示例#22

0

显示文件

文件： utils.py 项目： n-qber/among-tasks

def image_in_another(another: Image, image: Image, pos: list=(0, 0), bit_diff=20) -> bool:
    if type(image) is str:
        image = Image.open(image)
    if type(another) is str:
        another = Image.open(another)

    width = image.width
    height = image.height

    start_x, start_y = pos

    image_dhash = dhash_calc(image)
    another_dhash = dhash_calc(another.crop((start_x, start_y, start_x + width, start_y + height)))

    return dhash.get_num_bits_different(image_dhash, another_dhash) < bit_diff

示例#23

0

显示文件

def sameImgCheck(imageData, imgHash):
    COMPARE_PERCENTAGE = 15
    sameFlag = False

    b, g, r = cv2.split(imageData)
    rgbImg = cv2.merge([r, g, b])

    img = Image.fromarray(rgbImg, 'RGB')

    mBash = getImgHash(img)
    if dhash.get_num_bits_different(mBash, imgHash) < COMPARE_PERCENTAGE:
        sameFlag = True
    else:
        pass

    return sameFlag

示例#24

0

显示文件

文件： modelo_hash.py 项目： max3a3/artifacthelper

    def predict(self, X, raw=False):
        """Loops and compares over all images."""

        #holds the predictions
        preds = np.zeros((X.shape[0], self.n_images))

        #for each image in X
        for i, img in enumerate(X):
            if i%12==0: print(i, 'of', X.shape[0], end='\r')
            
            hash_img = imgToFeatures(img)
            #compare with each image in baseline
            for j, base in enumerate(self.baseline):
                preds[i][j] = dhash.get_num_bits_different(base,hash_img)
        
        if raw:
            return preds
        else:
            return self.label_to_name[np.argmin(preds, axis=1)]

示例#25

0

显示文件

    def isAnySimularImageByHashCode(self, image_hashes, key, hash):
        dim = image.shape[:2]
        if (dim[0] < 30 or dim[1] < 30): return True
        hashes = image_hashes[key]
        imageHash = hash  # dhash_own(image)
        logging.debug("image_hash:", image_hash)
        self.image_hashes[key].append(imageHash)
        if (len(hashes) == 0):
            return False

        for _imageHash in hashes:
            delta = dhash.get_num_bits_different(imageHash, _imageHash)
            if (delta < hash_delta):
                #logging.debug( key, delta )
                return True
#            elif ( compare_ssim(_image , image) > ssim_delta ):
#                return True

        return False

示例#26

0

显示文件

def match_painting(img):
    """
    Compute hash bit differences between img and paintings in painting_db
    :param img: input image to use.
    :return: Matching painting (the one having less differences with img and below threshold)
            or None if all differences are above treshold.
    """

    threshold = 20

    img_row, img_col = dhash.dhash_row_col(img)
    img_hash = dhash.format_hex(img_row, img_col)
    img_hash = int(img_hash, 16)
    differences = []

    # Check difference between img and painting_db
    for painting in painting_db:
        differences.append(dhash.get_num_bits_different(img_hash, painting.hash))

    if min(differences) < threshold:
        return painting_db[differences.index(min(differences))]
    else:
        return None

示例#27

0

显示文件

    async def scan_compare(self, ctx, first: int, second: int):
        """Scan two messages and report comparison result

        Arguments
        ---------
        first: Message ID
        second: Message ID
        """
        hashes1 = repo_i.get_by_message(first)
        hashes2 = repo_i.get_by_message(second)

        if len(hashes1) == 0:
            return await ctx.send(self.text.get("comparison", "not_found", message_id=str(first)))
        if len(hashes2) == 0:
            return await ctx.send(self.text.get("comparison", "not_found", message_id=str(second)))

        text = []
        text.append(self.text.get("comparison", "header", message_id=str(first)))
        for h in hashes1:
            text.append(self.text.get("comparison", "line", hash=str(h.dhash)[2:]))
        text.append("")
        text.append(self.text.get("comparison", "header", message_id=str(second)))
        for h in hashes2:
            text.append(self.text.get("comparison", "line", hash=str(h.dhash)))

        if len(hashes1) == 1 or len(hashes2) == 1:
            hash1 = int(hashes1[0].dhash, 16)
            hash2 = int(hashes2[0].dhash, 16)

            hamming = dhash.get_num_bits_different(hash1, hash2)
            prob = "{:.1f}".format((1 - hamming / 128) * 100)

            text.append("")
            text.append(self.text.get("comparison", "footer", percent=str(prob), bits=str(hamming)))

        await ctx.send("\n".join(text))

示例#28

0

显示文件

image = Image.open('osoba20a.png') # Original
row, col = dhash.dhash_row_col(image)
a = "0x" + dhash.format_hex(row, col)

image2 = Image.open('osaba20a_pic_resized_BW.png')  # Print Attack
row, col = dhash.dhash_row_col(image2)
b = "0x" + dhash.format_hex(row, col)

image3 = Image.open('osoba20a_vertflip.png')
row, col = dhash.dhash_row_col(image3)
c = "0x" + dhash.format_hex(row, col)

d = int(a,0)
e = int(b,0)
f = int(c,0)

print("Original")
print(dhash.get_num_bits_different(d,d))
print("Print Attack:")
print(dhash.get_num_bits_different(d,e))
print("Vertical Flip")
print(dhash.get_num_bits_different(d,f))
print("Time taken: ", (time.time() - start_time), "seconds")



# Used for Data Collection:
#valuelist = [0xc08701468687d7cf40003341dfaf1fff,0xc08700468f8fc74fc08033405fbfbfff,0xc04302264646c767800013215f3f9fff,0xc00f130c8e0f1f9780003f41bf7f7fbf,0xc0870744c68687c7c0001e4140be3fff,0xc087034c8e8cd3c6000033419fbdffff]

#for i in valuelist:
    #print(dhash.get_num_bits_different(0xb2c3812707474767f08093202f3fdf3f,i))

示例#29

0

显示文件

文件： generate_report.py 项目： dgaus/wordinserter

def is_same_image(img1, img2):
    current_hash = dhash.dhash_int(img1)
    old_hash = dhash.dhash_int(img2)
    return dhash.get_num_bits_different(current_hash, old_hash) == 0

示例#30

0

显示文件

文件： SplitAndRemove.py 项目： madupoorna/LMMSBackend_python

    def start_splitting(video_id, video_download_path):

        split_path = video_download_path + video_id + "_images"

        if not os.path.exists(split_path):
            os.makedirs(split_path, 0o777)

        files_array = []

        vidcap = cv2.VideoCapture(video_download_path + video_id + ".mp4")
        success, image = vidcap.read()
        x = 1

        # splitting frames from video
        print("splitting video " + video_id + ".mp4....")
        while vidcap.isOpened():
            frameId = vidcap.get(1)  # current frame number
            ret, frame = vidcap.read()
            print(frameId)
            if not ret:
                break
            if frameId % 150 == 0:
                x += 1
                cv2.imwrite(split_path + "/frame%d.jpg" % x, image)

        vidcap.release()

        # resizing images
        print("resizing frames..")
        for subdir, dirs, files in os.walk(split_path + "/"):
            for file in files:
                image = cv2.imread(split_path + "/" + file)
                img = cv2.resize(image, (640, 480))
                cv2.imwrite(split_path + "/" + file, img)

        # insert images to file array
        for subdir, dirs, files in os.walk(video_download_path):
            for file in files:
                if file.endswith(".jpg"):
                    files_array.append(file)

        # remove duplicates
        print("removing duplicate frames..")
        i = 0
        while i < len(files_array):
            file = files_array[i]
            hash_value = SplitAndRemove.generate_hash(file, split_path)

            j = i + 1
            while j < len(files_array):
                file1 = files_array[j]
                hash_value1 = SplitAndRemove.generate_hash(file1, split_path)
                hamming_distance = dhash.get_num_bits_different(
                    hash_value, hash_value1)
                if (hamming_distance <= 5) and (file1 != file):
                    os.remove(split_path + "/" + file1)
                    files_array.remove(file1)
                j += 1
            i += 1

        print("Finish")

示例#31

0

显示文件

def isLogged(contentUrl, media, text, url, date, top, hot, new, subSettings,
             reddit):
    result = []
    originalPostDate = []
    finalTimePassed = []
    precentageMatched = []
    author = []
    title = []
    args = None
    postsToRemove = []
    cntr = 0
    returnResult = []

    conn = sqlite3.connect('Posts{}.db'.format(
        sub(
            '([a-zA-Z])',
            lambda x: x.groups()[0].upper(),
            subSettings[0],
            1,
        )))
    c = conn.cursor()

    now = datetime.utcnow()
    then = datetime.fromtimestamp(date)
    timePassed = (now - then).days

    # ignore post if too old
    if subSettings[1] is not None and timePassed > subSettings[
            1] and top or subSettings[
                2] is not None and timePassed > subSettings[
                    2] and hot or subSettings[
                        3] is not None and timePassed > subSettings[3] and new:
        result = ['delete']
        originalPostDate = [-1]
        finalTimePassed = [-1]
        precentageMatched = [-1]
        author = [-1]
        title = [-1]

    else:

        # check if post is already in database
        args = c.execute(
            'SELECT COUNT(1) FROM Posts WHERE Url = ?;',
            (str(url), ),
        )
        if list(args.fetchone())[0] != 0:
            args = c.execute(
                'SELECT Location FROM Posts WHERE Url = ?;',
                (str(url), ),
            )
            fullResult = list(args.fetchall())

            # make sure the post is in the right category
            for i in fullResult:
                if i[0] != 'top' and top and (subSettings[1] is None or (
                        timePassed < subSettings[1] and
                    (subSettings[2] is None or subSettings[1] > subSettings[2])
                        and (subSettings[3] is None
                             or subSettings[1] > subSettings[3]))):
                    updateDatabase(conn, url, 'top')
                if i[0] != 'hot' and hot and (subSettings[2] is None or (
                        timePassed < subSettings[2] and
                    (subSettings[1] is None or subSettings[2] > subSettings[1])
                        and (subSettings[3] is None
                             or subSettings[2] > subSettings[3]))):
                    updateDatabase(conn, url, 'hot')
                if i[0] != 'new' and new and (subSettings[3] is None or (
                        timePassed < subSettings[3] and
                    (subSettings[2] is None or subSettings[3] > subSettings[2])
                        and (subSettings[1] is None
                             or subSettings[3] > subSettings[1]))):
                    updateDatabase(conn, url, 'new')

            # ignore post
            result = ['delete']
            originalPostDate = [-1]
            finalTimePassed = [-1]
            precentageMatched = [-1]
            author = [-1]
            title = [-1]

        # check if post is a repost
        else:

            # check for text
            if text != '&#x200B;' and text != '':
                args = c.execute(
                    'SELECT COUNT(1) FROM Posts WHERE Content = ?;',
                    (str(text), ),
                )
                if list(args.fetchone())[0] != 0:
                    args = c.execute(
                        'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;',
                        (str(text), ),
                    )
                    fullResult = list(args.fetchall())
                    for i in fullResult:
                        addToFound(
                            i,
                            100,
                            result,
                            originalPostDate,
                            precentageMatched,
                            author,
                            title,
                        )
                    args = c.execute(
                        'SELECT Url, Date, Author, Title, Content FROM posts;',
                    )
                    for texts in args.fetchall():
                        if texts[0] not in result:
                            textVar = texts[2]
                            difference = distance(textVar, text)
                            if difference < subSettings[7]:
                                addToFound(
                                    texts,
                                    ((subSettings[7] - difference) /
                                     subSettings[7]) * 100,
                                    result,
                                    originalPostDate,
                                    precentageMatched,
                                    author,
                                    title,
                                )

            # check for v.reddit
            elif media != None and (
                    'oembed' not in media
                    or 'provider_name' not in media['oembed'] or
                (media['oembed']['provider_name'] != 'gfycat'
                 and media['oembed']['provider_name'] != 'YouTube')):
                vidHash = hashVid(conn, media, url)
                if vidHash == 'invalid':
                    result = ['delete']
                    originalPostDate = [-1]
                    finalTimePassed = [-1]
                    precentageMatched = [-1]
                    author = [-1]
                    title = [-1]
                if isInt(vidHash.replace(' ', '')):
                    args = c.execute(
                        'SELECT COUNT(1) FROM Posts WHERE Content = ?;',
                        (str(vidHash), ),
                    )
                    if list(args.fetchone())[0] != 0:
                        args = c.execute(
                            'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;',
                            (str(vidHash), ),
                        )
                        fullResult = list(args.fetchall())
                        for i in fullResult:
                            addToFound(i, 100, result, originalPostDate,
                                       precentageMatched, author, title)
                    args = c.execute(
                        'SELECT Url, Date, Author, Title Content FROM posts;',
                    )
                    for hashed in args.fetchall():
                        if hashed[0] not in result:
                            hashedReadable = hashed[2]
                            if isInt(hashedReadable.replace(' ', '')):
                                hashedDifference = hashVidDifference(
                                    hashedReadable, vidHash)
                                if hashedDifference < subSettings[7]:
                                    addToFound(
                                        hashed,
                                        ((subSettings[7] - hashedDifference) /
                                         subSettings[7]) * 100,
                                        result,
                                        originalPostDate,
                                        precentageMatched,
                                        author,
                                        title,
                                    )

            # check for image or gif
            elif contentUrl != '':
                args = c.execute(
                    'SELECT COUNT(1) FROM Posts WHERE Content = ?;',
                    (str(contentUrl).replace(
                        '&feature=youtu.be',
                        '',
                    ), ),
                )
                if list(args.fetchone())[0] != 0:
                    args = c.execute(
                        'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;',
                        (str(contentUrl).replace(
                            '&feature=youtu.be',
                            '',
                        ), ),
                    )
                    fullResult = list(args.fetchall())
                    for i in fullResult:
                        addToFound(
                            i,
                            100,
                            result,
                            originalPostDate,
                            precentageMatched,
                            author,
                            title,
                        )

                # check for gif
                if 'gif' in contentUrl and not (contentUrl.endswith('gifv')
                                                or 'gifs' in contentUrl):
                    gifHash = hashGif(conn, contentUrl, url)
                    if gifHash == 'invalid':
                        result = ['delete']
                        originalPostDate = [-1]
                        finalTimePassed = [-1]
                        precentageMatched = [-1]
                        author = [-1]
                        title = [-1]
                    if isInt(gifHash.replace(' ', '')):
                        args = c.execute(
                            'SELECT COUNT(1) FROM Posts WHERE Content = ?;',
                            (str(gifHash), ),
                        )
                        if list(args.fetchone())[0] != 0:
                            args = c.execute(
                                'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;',
                                (str(gifHash), ),
                            )
                            fullResult = list(args.fetchall())
                            for i in fullResult:
                                addToFound(
                                    i,
                                    100,
                                    result,
                                    originalPostDate,
                                    precentageMatched,
                                    author,
                                    title,
                                )
                        args = c.execute(
                            'SELECT Url, Date, Author, Title, Content FROM posts;'
                        )
                        for hashed in args.fetchall():
                            if hashed[0] not in result:
                                hashedReadable = hashed[2]
                                if isInt(hashedReadable.replace(' ', '')):
                                    hashedDifference = hashVidDifference(
                                        hashedReadable, gifHash)
                                    if hashedDifference < subSettings[7]:
                                        addToFound(
                                            hashed,
                                            ((subSettings[7] - hashedDifference
                                              ) / subSettings[7]) * 100,
                                            result,
                                            originalPostDate,
                                            precentageMatched,
                                            author,
                                            title,
                                        )
                elif 'png' in contentUrl or 'jpg' in contentUrl:
                    imgHash = hashImg(conn, contentUrl, url)
                    if imgHash == 'invalid':
                        result = ['delete']
                        originalPostDate = [-1]
                        finalTimePassed = [-1]
                        precentageMatched = [-1]
                        author = [-1]
                        title = [-1]
                    if isInt(imgHash):
                        args = c.execute(
                            'SELECT COUNT(1) FROM Posts WHERE Content = ?;',
                            (str(imgHash), ),
                        )
                        if list(args.fetchone())[0] != 0:
                            args = c.execute(
                                'SELECT Url, Date, Author, Title FROM Posts WHERE Content = ?;',
                                (str(imgHash), ),
                            )
                            fullResult = list(args.fetchall())
                            for i in fullResult:
                                addToFound(
                                    i,
                                    100,
                                    result,
                                    originalPostDate,
                                    precentageMatched,
                                    author,
                                    title,
                                )
                        args = c.execute(
                            'SELECT Url, Date, Author, Title, Content FROM posts;'
                        )
                        for hashed in args.fetchall():
                            if hashed[0] not in result:
                                hashedReadable = hashed[2]
                                if isInt(hashedReadable):
                                    hashedDifference = dhash.get_num_bits_different(
                                        imgHash, int(hashedReadable))
                                    if hashedDifference < subSettings[7]:
                                        addToFound(
                                            hashed,
                                            ((subSettings[7] - hashedDifference
                                              ) / subSettings[7]) * 100,
                                            result,
                                            originalPostDate,
                                            precentageMatched,
                                            author,
                                            title,
                                        )

    # delete post if it has been deleted
    for i in result:
        if i != '' and i != 'delete':
            if reddit.submission(url='https://reddit.com{}'.format(
                    i)).selftext == '[deleted]':
                c.execute(
                    'DELETE FROM Posts WHERE Url = ?;',
                    (str(i), ),
                )
                postsToRemove.append([
                    i,
                    originalPostDate[cntr],
                    precentageMatched[cntr],
                    author[cntr],
                    title[cntr],
                ])
                print('deleted {}'.format(i))
        cntr += 1

    c.close()

    for i in postsToRemove:
        result.remove(i[0])
        originalPostDate.remove(i[1])
        precentageMatched.remove(i[2])
        author.remove(i[3])
        title.remove(i[4])

    for i in originalPostDate:
        then = datetime.fromtimestamp(i)
        timePassed = monthDelta(then, now)
        fullText = ('{} months ago'.format(str(timePassed)))
        if timePassed < 1:
            timePassed = (now - then).days
            fullText = ('{} days ago'.format(str(timePassed)))
        if timePassed < 1:
            timePassed = (now - then).total_seconds() // 3600
            fullText = ('{} hours ago'.format(str(timePassed)))
        if timePassed < 1:
            timePassed = (now - then).total_seconds() // 60
            fullText = ('{} minutes ago'.format(str(timePassed)))
        if timePassed < 1:
            timePassed = (now - then).total_seconds()
            fullText = ('{} seconds ago'.format(str(timePassed)))
        finalTimePassed.append(fullText)

    cntr = 0
    for i in result:
        returnResult.append([
            i,
            finalTimePassed[cntr],
            originalPostDate[cntr],
            precentageMatched[cntr],
            author[cntr],
            title[cntr],
        ])
        cntr += 1

    if returnResult != [['delete', -1, -1, -1, -1, -1]]:
        print('Found? {}'.format(returnResult))

    return returnResult