Пример #1
0
def createVPTree(images, hashfile, treefile):
    imagePaths = list(paths.list_images(images))
    hashes = {}
    for (i, imagePath) in enumerate(imagePaths):

        # print("[INFO] processing image {}/{}".format(i + 1,
        # len(imagePaths)))
        image = cv2.imread(imagePath)

        # compute the hash for the image and convert it
        h = dhash(image)
        h = convert_hash(h)

        # update the hashes dictionary
        l = hashes.get(h, [])
        l.append(imagePath)
        hashes[h] = l

    # build the VP-Tree
    print("[INFO] building VP-Tree...")
    points = list(hashes.keys())
    tree = vptree.VPTree(points, hammingDistance)

    # serialize the VP-Tree to disk
    print("[INFO] serializing VP-Tree...")
    f = open(treefile, "wb")
    f.write(pickle.dumps(tree))
    f.close()

    # serialize the hashes to dictionary
    print("[INFO] serializing hashes...")
    f = open(hashfile, "wb")
    f.write(pickle.dumps(hashes))
    f.close()
Пример #2
0
def trackPerson(image, hashes, tree, distance=15):
    global PERSON_COUNT
    # compute the hash for the image and convert it
    h = dhash(image)
    h = convert_hash(h)

    person_id = -1
    # search for similar person
    if not tree == None:
        sim = sorted(tree.get_all_in_range(h, distance))
        if len(sim):
            p_ids = hashes.get(sim[0][1], [])
            if len(p_ids):
                person_id = p_ids[0]

    if person_id == -1:
        PERSON_COUNT += 1
        person_id = PERSON_COUNT

    # update the hashes dictionary
    l = hashes.get(h, [])
    l.append(person_id)
    hashes[h] = l

    points = list(hashes.keys())
    tree = vptree.VPTree(points, hammingDistance)

    return tree, hashes, person_id
Пример #3
0
    def index_person(self, image, distance=15):
        # compute the hash for the image and convert it
        h = dhash(image)
        h = convert_hash(h)

        person_id = -1
        # search for similar person
        if self.tree is not None:
            sim = sorted(tree.get_all_in_range(h, distance))
            if len(sim):
                p_ids = self.hashes.get(sim[0][1], [])
                if len(p_ids):
                    person_id = p_ids[0]

        if person_id == -1:
            PERSON_COUNT += 1
            person_id = PERSON_COUNT

        # update the hashes dictionary
        l = self.hashes.get(h, [])
        l.append(person_id)
        self.hashes[h] = l

        points = list(hashes.keys())
        self.tree = vptree.VPTree(points, hammingDistance)

        return person_id
Пример #4
0
def compute_hashes(img_paths, hashes={}):
    for img_path in img_paths:
        hashed = hash_distance(img_path)
        idx = convert_hash(hashed)
        loc = hashes.get(idx, [])
        loc.append(img_path)
        hashes[idx] = loc
    return hashes
Пример #5
0
                default=100,
                help="maximum hamming distance")
args = vars(ap.parse_args())

# load the VP-Tree and hashes dictionary
print("[INFO] loading VP-Tree and hashes...")
tree = pickle.loads(open(args["tree"], "rb").read())
hashes = pickle.loads(open(args["hashes"], "rb").read())

# load the input query image
image = cv2.imread(args["query"])
cv2.imshow("Query", image)

# compute the hash for the query image, then convert it
queryHash = dhash(image)
queryHash = convert_hash(queryHash)


def searchSimilarImages(distance):
    # perform the search
    print("[INFO] performing search...")
    start = time.time()
    results = tree.get_all_in_range(queryHash, distance)
    results = sorted(results)
    end = time.time()
    print("[INFO] search took {} seconds".format(end - start))

    return results


def main():
Пример #6
0
                help="maximum hamming distance")
# ^^^^ ADJUST THIS METRIC FOR QUERY THRESHOLD (larger distance=more images to compare [longer runtime])
ap.add_argument("-s",
                "--size",
                required=False,
                type=str,
                help="image resize (default is 8x8)")
args = vars(ap.parse_args())

# load the input query image
image = cv2.imread(args["query"])
#cv2.imshow("Query", image)

# compute the hash for the query image, then convert it
queryHash = hs.dhash(image, int(args["size"]))
queryHash = hs.convert_hash(queryHash)
print("the query image hash value is", queryHash)

# load the VP-Tree and hashes dictionary
print("[INFO] loading VP-Tree and hashes...")
#tree=pickle.loads(open(args["tree"], "rb").read())
hashes = pickle.loads(open(args["hashes"], "rb").read())

start = time.time()
resultsList = []  #Adds results of image query to this list
for pickleTree in glob.glob(args["tree"] + "/vptree_*.pickle"):
    print("[INFO] loading VP-Tree: {pickle}".format(pickle=pickleTree))
    with open(pickleTree, 'rb') as f:
        tree = pickle.load(f)
    #tree=pickle.loads(open(pickleTree, "rb").read())
Пример #7
0
folders='./'

#Import as Spark RDD
urlsRDD=sc.textFile("s3a://"+bucket+"/urls.txt")
#llist = urlsRDD.collect()

#urlsRDD.take(100).foreach(println)
#print(urlsRDD)
#impg.read_image_from_s3(bucket, url)
#Download and acquire image vectors
img_vectors=urlsRDD.map(lambda url: (url, impg.read_image_from_s3(bucket, url)))
#img_vectors.take(5)


#dHash function
img_hash=img_vectors.map(lambda img: (img[0], hs.convert_hash(hs.dhash(img[1], 32))))

#Makes dictionary from RDD continaing dHash (key) and URLs (value)
#dHash_dict=img_hash.map(lambda (url, dHash): (dHash, url))   ### python 2 code 
dHash_dict=img_hash.map(lambda url_dHash: (url_dHash[1], url_dHash[0]))    ### python 3 code

#dHash_dict.take(5).foreach(println)


#Pickles python hash dictionary
hs.pickleHash(dHash_dict.collectAsMap())

#Converts Image dHash into Sparse Vector (Required Input for LSH)
img_sparse=img_hash.map(lambda img: (img[0], str(img[1]), hs.sparse_vectorize(img[1])))

#Converts array of sparse img vectors into dataframe
Пример #8
0
import pickle

#S3 Bucket/Folder
bucket = 'vasco-imagenet-db'
folders = 'test_small'

#Import as Spark RDD
urlsRDD = sc.textFile("s3a://" + bucket + "/urls.txt")

#Download and acquire image vectors
img_vectors = urlsRDD.map(lambda url:
                          (url, impg.read_image_from_s3(bucket, url)))

#dHash function
img_hash = img_vectors.map(lambda img:
                           (img[0], hs.convert_hash(hs.dhash(img[1], 32))))

#Makes dictionary from RDD continaing dHash (key) and URLs (value)
dHash_dict = img_hash.map(lambda (url, dHash): (dHash, url))

#Pickles python hash dictionary
hs.pickleHash(dHash_dict.collectAsMap())

#Converts Image dHash into Sparse Vector (Required Input for LSH)
img_sparse = img_hash.map(lambda img:
                          (img[0], str(img[1]), hs.sparse_vectorize(img[1])))

#Converts array of sparse img vectors into dataframe
df = spark.createDataFrame(img_sparse, ["url", "dHash", "sparseHash"])

#MinHashLSH
Пример #9
0
            print(f" {i}")
    cv2.waitKey(0)


def check(img_path):
    if not os.path.isfile(img_path):
        sys.exit(1)
    filename, ext = os.path.splitext(img_path)
    if ext not in [".jpg", ".jpeg", ".png", ".bmp"]:
        sys.exit(1)
    return img_path


if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("-u", "--upload", required=True, type=str)
    args = vars(ap.parse_args())
    img_path = check(args["upload"])

    tree = pickle.loads(open("vptree.pickle", "rb").read())
    hashes = pickle.loads(open("hashes.pickle", "rb").read())

    query_hash = hash_distance(img_path)
    query_idx = convert_hash(query_hash)

    search_results = search(tree, query_idx)
    img_paths = get_imgs(search_results, hashes)
    display(img_paths)

##
Пример #10
0
def search():

    if request.method == "POST":

        RESULTS_ARRAY = []

        # get url
        image_url = request.form.get('img')
        print(image_url)
        try:
            # download image into array (from url)
            if 'http' in image_url:
                resp = urllib.request.urlopen(image_url)
                image = np.asarray(bytearray(resp.read()), dtype="uint8")
                image = cv2.imdecode(image, cv2.IMREAD_COLOR)
            else:
                # load the input query image (from webserver folder)
                image = cv2.imread('.' + image_url.split('..')[-1])

            # compute the hash for the query image, then convert it
            queryHash = hs.dhash(image,
                                 32)  #manually change to match indexed iumages
            queryHash = hs.convert_hash(queryHash)

            # load the VP-Tree and hashes dictionary
            print("[INFO] loading VP-Tree and hashes...")
            hashes = pickle.loads(
                open('static/pickles/img_hash_dictionary.pickle', "rb").read())

            start = time.time()
            resultsList = []  #Adds results of image query to this list
            for pickleTree in glob.glob("static/pickles/vptree_*.pickle"):
                #print("[INFO] loading VP-Tree: {pickle}".format(pickle=pickleTree))
                with open(pickleTree, 'rb') as f:
                    tree = pickle.load(f)

                #Perform search in VPTree
                #print("[INFO] performing search on {pickle}".format(pickle=pickleTree))
                results = tree.get_all_in_range(
                    queryHash, 50
                )  #Tune to lower computational time but yield at least four results
                results = sorted(results)

                #Loop through reults and add to resultsList
                counter = 0  #Ensure that only top 10 results are used
                for i, result in enumerate(results):
                    resultsList.append(result)
                    if i >= 1:
                        break  #Grabs first result (modifiable), moves on to next tree
                    else:
                        i += 1
            #Sort final list of all resutls
            resultsList = sorted(resultsList)
            end = time.time()
            print("[INFO] search took {} seconds".format(end - start))

            # loop over the results
            for (score, h) in resultsList[:10]:
                #grab all image paths in our dataset with the same hash
                resultPaths = [hashes.get(int(h), [])]
                print("[INFO] {} total images(s) with d: {}, h:{}".format(
                    len(resultPaths), score, h))
                # loop over the result paths
                for resultID in resultPaths:
                    #Remove URL Path Prefix (prefix is already included in output)
                    #resultID=str(resultID).split('/')[-1]
                    #                    print(resultID)
                    # load the result image and display it to our screeni
                    #                    RESULTS_ARRAY.append(
                    #                       {"image": str(resultID), "score": str(score)})
                    #                    print(RESULTS_ARRAY)
                    RESULTS_ARRAY.append({
                        "image":
                        'http://vasco-imagenet-db.s3-us-west-2.amazonaws.com/'
                        + str(resultID),
                        "score":
                        str(score)
                    })
                    #Change the bucket to match what is being queired and change view permissions
            # return success
            print(RESULTS_ARRAY)
            return jsonify(results=(RESULTS_ARRAY[:4]))

        except:

            # return error
            #return jsonify({"sorry": "Sorry, no results! Please try again."}), 500
            raise