def createVPTree(images, hashfile, treefile): imagePaths = list(paths.list_images(images)) hashes = {} for (i, imagePath) in enumerate(imagePaths): # print("[INFO] processing image {}/{}".format(i + 1, # len(imagePaths))) image = cv2.imread(imagePath) # compute the hash for the image and convert it h = dhash(image) h = convert_hash(h) # update the hashes dictionary l = hashes.get(h, []) l.append(imagePath) hashes[h] = l # build the VP-Tree print("[INFO] building VP-Tree...") points = list(hashes.keys()) tree = vptree.VPTree(points, hammingDistance) # serialize the VP-Tree to disk print("[INFO] serializing VP-Tree...") f = open(treefile, "wb") f.write(pickle.dumps(tree)) f.close() # serialize the hashes to dictionary print("[INFO] serializing hashes...") f = open(hashfile, "wb") f.write(pickle.dumps(hashes)) f.close()
def trackPerson(image, hashes, tree, distance=15): global PERSON_COUNT # compute the hash for the image and convert it h = dhash(image) h = convert_hash(h) person_id = -1 # search for similar person if not tree == None: sim = sorted(tree.get_all_in_range(h, distance)) if len(sim): p_ids = hashes.get(sim[0][1], []) if len(p_ids): person_id = p_ids[0] if person_id == -1: PERSON_COUNT += 1 person_id = PERSON_COUNT # update the hashes dictionary l = hashes.get(h, []) l.append(person_id) hashes[h] = l points = list(hashes.keys()) tree = vptree.VPTree(points, hammingDistance) return tree, hashes, person_id
def index_person(self, image, distance=15): # compute the hash for the image and convert it h = dhash(image) h = convert_hash(h) person_id = -1 # search for similar person if self.tree is not None: sim = sorted(tree.get_all_in_range(h, distance)) if len(sim): p_ids = self.hashes.get(sim[0][1], []) if len(p_ids): person_id = p_ids[0] if person_id == -1: PERSON_COUNT += 1 person_id = PERSON_COUNT # update the hashes dictionary l = self.hashes.get(h, []) l.append(person_id) self.hashes[h] = l points = list(hashes.keys()) self.tree = vptree.VPTree(points, hammingDistance) return person_id
def compute_hashes(img_paths, hashes={}): for img_path in img_paths: hashed = hash_distance(img_path) idx = convert_hash(hashed) loc = hashes.get(idx, []) loc.append(img_path) hashes[idx] = loc return hashes
default=100, help="maximum hamming distance") args = vars(ap.parse_args()) # load the VP-Tree and hashes dictionary print("[INFO] loading VP-Tree and hashes...") tree = pickle.loads(open(args["tree"], "rb").read()) hashes = pickle.loads(open(args["hashes"], "rb").read()) # load the input query image image = cv2.imread(args["query"]) cv2.imshow("Query", image) # compute the hash for the query image, then convert it queryHash = dhash(image) queryHash = convert_hash(queryHash) def searchSimilarImages(distance): # perform the search print("[INFO] performing search...") start = time.time() results = tree.get_all_in_range(queryHash, distance) results = sorted(results) end = time.time() print("[INFO] search took {} seconds".format(end - start)) return results def main():
help="maximum hamming distance") # ^^^^ ADJUST THIS METRIC FOR QUERY THRESHOLD (larger distance=more images to compare [longer runtime]) ap.add_argument("-s", "--size", required=False, type=str, help="image resize (default is 8x8)") args = vars(ap.parse_args()) # load the input query image image = cv2.imread(args["query"]) #cv2.imshow("Query", image) # compute the hash for the query image, then convert it queryHash = hs.dhash(image, int(args["size"])) queryHash = hs.convert_hash(queryHash) print("the query image hash value is", queryHash) # load the VP-Tree and hashes dictionary print("[INFO] loading VP-Tree and hashes...") #tree=pickle.loads(open(args["tree"], "rb").read()) hashes = pickle.loads(open(args["hashes"], "rb").read()) start = time.time() resultsList = [] #Adds results of image query to this list for pickleTree in glob.glob(args["tree"] + "/vptree_*.pickle"): print("[INFO] loading VP-Tree: {pickle}".format(pickle=pickleTree)) with open(pickleTree, 'rb') as f: tree = pickle.load(f) #tree=pickle.loads(open(pickleTree, "rb").read())
folders='./' #Import as Spark RDD urlsRDD=sc.textFile("s3a://"+bucket+"/urls.txt") #llist = urlsRDD.collect() #urlsRDD.take(100).foreach(println) #print(urlsRDD) #impg.read_image_from_s3(bucket, url) #Download and acquire image vectors img_vectors=urlsRDD.map(lambda url: (url, impg.read_image_from_s3(bucket, url))) #img_vectors.take(5) #dHash function img_hash=img_vectors.map(lambda img: (img[0], hs.convert_hash(hs.dhash(img[1], 32)))) #Makes dictionary from RDD continaing dHash (key) and URLs (value) #dHash_dict=img_hash.map(lambda (url, dHash): (dHash, url)) ### python 2 code dHash_dict=img_hash.map(lambda url_dHash: (url_dHash[1], url_dHash[0])) ### python 3 code #dHash_dict.take(5).foreach(println) #Pickles python hash dictionary hs.pickleHash(dHash_dict.collectAsMap()) #Converts Image dHash into Sparse Vector (Required Input for LSH) img_sparse=img_hash.map(lambda img: (img[0], str(img[1]), hs.sparse_vectorize(img[1]))) #Converts array of sparse img vectors into dataframe
import pickle #S3 Bucket/Folder bucket = 'vasco-imagenet-db' folders = 'test_small' #Import as Spark RDD urlsRDD = sc.textFile("s3a://" + bucket + "/urls.txt") #Download and acquire image vectors img_vectors = urlsRDD.map(lambda url: (url, impg.read_image_from_s3(bucket, url))) #dHash function img_hash = img_vectors.map(lambda img: (img[0], hs.convert_hash(hs.dhash(img[1], 32)))) #Makes dictionary from RDD continaing dHash (key) and URLs (value) dHash_dict = img_hash.map(lambda (url, dHash): (dHash, url)) #Pickles python hash dictionary hs.pickleHash(dHash_dict.collectAsMap()) #Converts Image dHash into Sparse Vector (Required Input for LSH) img_sparse = img_hash.map(lambda img: (img[0], str(img[1]), hs.sparse_vectorize(img[1]))) #Converts array of sparse img vectors into dataframe df = spark.createDataFrame(img_sparse, ["url", "dHash", "sparseHash"]) #MinHashLSH
print(f" {i}") cv2.waitKey(0) def check(img_path): if not os.path.isfile(img_path): sys.exit(1) filename, ext = os.path.splitext(img_path) if ext not in [".jpg", ".jpeg", ".png", ".bmp"]: sys.exit(1) return img_path if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument("-u", "--upload", required=True, type=str) args = vars(ap.parse_args()) img_path = check(args["upload"]) tree = pickle.loads(open("vptree.pickle", "rb").read()) hashes = pickle.loads(open("hashes.pickle", "rb").read()) query_hash = hash_distance(img_path) query_idx = convert_hash(query_hash) search_results = search(tree, query_idx) img_paths = get_imgs(search_results, hashes) display(img_paths) ##
def search(): if request.method == "POST": RESULTS_ARRAY = [] # get url image_url = request.form.get('img') print(image_url) try: # download image into array (from url) if 'http' in image_url: resp = urllib.request.urlopen(image_url) image = np.asarray(bytearray(resp.read()), dtype="uint8") image = cv2.imdecode(image, cv2.IMREAD_COLOR) else: # load the input query image (from webserver folder) image = cv2.imread('.' + image_url.split('..')[-1]) # compute the hash for the query image, then convert it queryHash = hs.dhash(image, 32) #manually change to match indexed iumages queryHash = hs.convert_hash(queryHash) # load the VP-Tree and hashes dictionary print("[INFO] loading VP-Tree and hashes...") hashes = pickle.loads( open('static/pickles/img_hash_dictionary.pickle', "rb").read()) start = time.time() resultsList = [] #Adds results of image query to this list for pickleTree in glob.glob("static/pickles/vptree_*.pickle"): #print("[INFO] loading VP-Tree: {pickle}".format(pickle=pickleTree)) with open(pickleTree, 'rb') as f: tree = pickle.load(f) #Perform search in VPTree #print("[INFO] performing search on {pickle}".format(pickle=pickleTree)) results = tree.get_all_in_range( queryHash, 50 ) #Tune to lower computational time but yield at least four results results = sorted(results) #Loop through reults and add to resultsList counter = 0 #Ensure that only top 10 results are used for i, result in enumerate(results): resultsList.append(result) if i >= 1: break #Grabs first result (modifiable), moves on to next tree else: i += 1 #Sort final list of all resutls resultsList = sorted(resultsList) end = time.time() print("[INFO] search took {} seconds".format(end - start)) # loop over the results for (score, h) in resultsList[:10]: #grab all image paths in our dataset with the same hash resultPaths = [hashes.get(int(h), [])] print("[INFO] {} total images(s) with d: {}, h:{}".format( len(resultPaths), score, h)) # loop over the result paths for resultID in resultPaths: #Remove URL Path Prefix (prefix is already included in output) #resultID=str(resultID).split('/')[-1] # print(resultID) # load the result image and display it to our screeni # RESULTS_ARRAY.append( # {"image": str(resultID), "score": str(score)}) # print(RESULTS_ARRAY) RESULTS_ARRAY.append({ "image": 'http://vasco-imagenet-db.s3-us-west-2.amazonaws.com/' + str(resultID), "score": str(score) }) #Change the bucket to match what is being queired and change view permissions # return success print(RESULTS_ARRAY) return jsonify(results=(RESULTS_ARRAY[:4])) except: # return error #return jsonify({"sorry": "Sorry, no results! Please try again."}), 500 raise