示例#1
0
def search_by_image(image_content: bytes) -> List[Tuple[float, model.Post]]:
    query_signature = image_hash.generate_signature(image_content)
    query_words = image_hash.generate_words(query_signature)
    """
    The unnest function is used here to expand one row containing the 'words'
    array into multiple rows each containing a singular word.

    Documentation of the unnest function can be found here:
    https://www.postgresql.org/docs/9.2/functions-array.html
    """

    dbquery = """
    SELECT s.post_id, s.signature, count(a.query) AS score
    FROM post_signature AS s, unnest(s.words, :q) AS a(word, query)
    WHERE a.word = a.query
    GROUP BY s.post_id
    ORDER BY score DESC LIMIT 100;
    """

    candidates = db.session.execute(dbquery, {"q": query_words})
    data = tuple(
        zip(*[(post_id, image_hash.unpack_signature(packedsig))
              for post_id, packedsig, score in candidates]))
    if data:
        candidate_post_ids, sigarray = data
        distances = image_hash.normalized_distance(sigarray, query_signature)
        return [(distance, try_get_post_by_id(candidate_post_id))
                for candidate_post_id, distance in zip(candidate_post_ids,
                                                       distances)
                if distance < image_hash.DISTANCE_CUTOFF]
    else:
        return []
示例#2
0
def test_signature_functions(read_asset, config_injector):
    sig1 = image_hash.generate_signature(read_asset('jpeg.jpg'))
    sig2 = image_hash.generate_signature(read_asset('jpeg-similar.jpg'))

    sig1_repacked = image_hash.unpack_signature(
        image_hash.pack_signature(sig1))
    sig2_repacked = image_hash.unpack_signature(
        image_hash.pack_signature(sig2))
    assert array_equal(sig1, sig1_repacked)
    assert array_equal(sig2, sig2_repacked)

    dist1 = image_hash.normalized_distance([sig1], sig2)
    assert abs(dist1[0] - 0.20599895341812172) < 1e-8

    dist2 = image_hash.normalized_distance([sig2], sig2)
    assert abs(dist2[0]) < 1e-8

    words1 = image_hash.generate_words(sig1)
    words2 = image_hash.generate_words(sig2)
    words_match = sum(word1 == word2 for word1, word2 in zip(words1, words2))
    assert words_match == 17
示例#3
0
def generate_post_signature(post: model.Post, content: bytes) -> None:
    try:
        unpacked_signature = image_hash.generate_signature(content)
        packed_signature = image_hash.pack_signature(unpacked_signature)
        words = image_hash.generate_words(unpacked_signature)

        db.session.add(model.PostSignature(
            post=post, signature=packed_signature, words=words))
    except errors.ProcessingError:
        if not config.config['allow_broken_uploads']:
            raise InvalidPostContentError(
                'Unable to generate image hash data.')
示例#4
0
def test_signature_avif(read_asset, config_injector):
    sig1 = image_hash.generate_signature(read_asset("avif.avif"))
    sig2 = image_hash.generate_signature(read_asset("avif-similar.avif"))

    sig1_repacked = image_hash.unpack_signature(
        image_hash.pack_signature(sig1))
    sig2_repacked = image_hash.unpack_signature(
        image_hash.pack_signature(sig2))
    assert array_equal(sig1, sig1_repacked)
    assert array_equal(sig2, sig2_repacked)

    dist1 = image_hash.normalized_distance([sig1], sig2)
    assert abs(dist1[0] - 0.22628712858355998) < 1e-8

    dist2 = image_hash.normalized_distance([sig2], sig2)
    assert abs(dist2[0]) < 1e-8

    words1 = image_hash.generate_words(sig1)
    words2 = image_hash.generate_words(sig2)
    words_match = sum(word1 == word2 for word1, word2 in zip(words1, words2))
    assert words_match == 12