def get_most_similar(im): """ Extract the features from a given image (an np array) and compute the similarity between it and all the others that are in the indexed data structure. Feature extraction is performed using FeatureExtractor. :param im: a np array 256x256x3 representing an image :return: an np array (sorted) with the most similar images that have been indexed. The first column contains scores, the second one contains image names """ im_copy = im.copy() # extract features from FeatureExtractor lbp, rgb_hist, ycbcr_hist, ycbcr_statistics, rgb_statistics, sift_kp, neural = FeatureExtractor.extract_all_features( im_copy) im_features = list(lbp) + list(rgb_hist) + list(ycbcr_hist) + list( ycbcr_statistics) + list(rgb_statistics) + list( FeatureExtractor.extract_bow(Indexer.kmeans_model, sift_kp)) + list(neural) # apply MinMax normalization using the train scaler and the PCA on the neural features im_features = np.expand_dims(np.array(im_features), axis=0) im_features, _ = Matcher.__normalize_data(im_features, Matcher.__scaler) tmp_features, _ = Matcher.__apply_pca(im_features[:, 1016:], Matcher.__pca) im_features = np.concatenate((tmp_features, im_features[:, :1016]), axis=1) # compute cosine similarity between the image and the other indexed images rsv = cdist(im_features, Matcher.__data_structure["features"], metric="cosine") rsv = np.append(rsv.transpose(), np.array([Matcher.__data_structure["im_names"]], dtype="object").transpose(), axis=1) # sort the results according to the higher score sorted_rsv = rsv[rsv[:, 0].argsort()] return sorted_rsv
def build_data_structure(index_folder): """ Build the data structure that has to be used by Matcher. If a predefined data structure and KMeans model exist, it will use them. :param index_folder: the path to the images to be indexed :return: the data structure that has to be used by Matcher """ # if a predefined data structure and KMeans model exist, then load and use them if os.path.isfile( config.INDEXING_DATA_STRUCTURE_PATH) and os.path.isfile( config.INDEXING_KMEANS_PATH): print("Loading KMeans model and data_structure...") Indexer.kmeans_model = pickle.load( open(config.INDEXING_KMEANS_PATH, "rb")) data_structure = pickle.load( open(config.INDEXING_DATA_STRUCTURE_PATH, "rb")) print("Done.") return data_structure # build the data structure data_structure = { "im_names": [], "lbp": [], "rgb_hist": [], "ycbcr_hist": [], "rgb_statistics": [], "ycbcr_statistics": [], "neural": [], "sift_kp": [], "labels": [] } files = os.listdir(index_folder) files.sort() image_classes_dict = Indexer.extract_classes() # from each image, extract the features using FeatureExtractor for file in tqdm(files): im = Indexer.load_image(index_folder + file) try: lbp, rgb_hist, ycbcr_hist, ycbcr_statistics, rgb_statistics, sift_kp, neural = FeatureExtractor.extract_all_features( im) except FeatureExtractionException: continue if sift_kp is None or len(sift_kp.shape) == 0: print("Found image without any sift descriptors: ", file) continue data_structure["rgb_hist"].append(rgb_hist) data_structure["ycbcr_hist"].append(ycbcr_hist) data_structure["lbp"].append(lbp) data_structure["ycbcr_statistics"].append(ycbcr_statistics) data_structure["rgb_statistics"].append(rgb_statistics) data_structure["sift_kp"].append(sift_kp) data_structure["neural"].append(neural) data_structure["im_names"].append(file) data_structure["labels"].append(image_classes_dict[file]) # build the KMeans model for the BOW of SIFT descriptors kp_set = np.concatenate(data_structure["sift_kp"], axis=0) Indexer.kmeans_model = KMeans(n_clusters=100, random_state=123).fit(kp_set) # build the attribute containing the features for each image # data_structure["features"] is a matrix whose rows correspond to images and columns to features data_structure["features"] = [] for i in range(len(data_structure["im_names"])): tmp = list(data_structure["lbp"][i]) tmp += list(data_structure["rgb_hist"][i]) tmp += list(data_structure["ycbcr_hist"][i]) tmp += list(data_structure["ycbcr_statistics"][i]) tmp += list(data_structure["rgb_statistics"][i]) tmp += list( FeatureExtractor.extract_bow(Indexer.kmeans_model, data_structure["sift_kp"][i])) tmp += list(data_structure["neural"][i]) data_structure["features"].append(tmp) data_structure["features"] = np.array(data_structure["features"]) # remove useless attributes data_structure.pop('lbp', None) data_structure.pop('rgb_hist', None) data_structure.pop('ycbcr_hist', None) data_structure.pop('neural', None) data_structure.pop('ycbcr_statistics', None) data_structure.pop('rgb_statistics', None) data_structure.pop('sift_kp', None) return data_structure