def __init__(self, extractor_model, dimension_reduction_model, k_value, label=None, image_metadata=False, subject_subject=False, folder_metadata=None, metadata_collection=None, matrix=None, conversion=True, save_model=False): self.constants = GlobalConstants() self.mongo_wrapper = MongoWrapper(self.constants.Mongo().DB_NAME) self.extractor_model = extractor_model self.dimension_reduction_model = dimension_reduction_model self.label = label self.k_value = k_value self.binary_image_metadata = image_metadata self.subject_subject = subject_subject self.folder_metadata = folder_metadata self.metadata_collection = metadata_collection self.matrix = matrix self.conversion = conversion self.save_model = save_model
def img_ids(): constants = GlobalConstants() image_names = list( MongoWrapper(constants.Mongo().DB_NAME).find(constants.CM.lower(), {}, { "_id": 0, "imageId": 1 })) return list(map(lambda x: x['imageId'], image_names))
def save_model_file(): constants = GlobalConstants() results = MongoWrapper(constants.Mongo().DB_NAME).find( constants.CM.lower(), {}, { "_id": 0, "featureVector": 1 }) featurearray = np.array( list(map(lambda x: x['featureVector'], list(results)))) model = Model() model.save_model(featurearray, 'cm_np')
def task5b(query, top, visualize=False, combine_models=False): constants = GlobalConstants() lsh = Model().load_model(constants.LSH_OBJECT) imageids, feat_vectors, query_vector = lsh.query(query, top) print(imageids[:top]) if visualize: result = [] for rank, image in enumerate(imageids[:top]): res = { 'path': os.path.join("Hands", image), 'imageId': image, 'rank': rank + 1 } result.append(res) if combine_models: extract = "HOG + CM" else: extract = "HOG" title = { "Search": "Locality Sensitive Hashing (LSH)", "Feature Extraction": extract, "L": lsh.get_l(), "K": lsh.get_k(), "Dimensionality Reduction": "NMF", "t": 20, "Distance": "Euclidean" } print(os.path.abspath(os.path.join("Hands", query))) show_images(os.path.abspath(os.path.join("Hands", query)), result, title, rank=True) return imageids, feat_vectors, query_vector
def dimension_reduction(): # save_model_file() constants = GlobalConstants() model = Model() features = model.load_model('cm_np') redn = DimensionReduction(dimension_reduction_model=constants.PCA, extractor_model=constants.CM, matrix=features, conversion=True, k_value=500) redn.execute() pass
def query(self, query_id, top): model = Model() overall_count = 0 constants = GlobalConstants() l_hash, l_bucket = model.load_model( constants.LSH_L_HASHES), model.load_model(constants.LSH_L_BUCKETS) choices_per_layer, choices_per_hash = [], [] query_vector = self.data[self.image_ids.index(query_id)] imageid_distances = [] for layer in range(self.layers): k_hash = l_hash["L{}".format(layer)] k_bucket = l_bucket["L{}".format(layer)] choices_per_hash = [] for kid in range(self.khash_count): choices_per_hash.append(k_bucket["K{}".format(kid)].get( k_hash["K{}".format(kid)].get(query_id))) intersected_choices = set.intersection(*map(set, choices_per_hash)) overall_count += len(intersected_choices) choices_per_layer.append(intersected_choices) choices = set.union(*map(set, choices_per_layer)) for image_id in choices: if image_id != query_id: vector = self.data[self.image_ids.index(image_id)] imageid_distances.append( (image_id, euclidean(query_vector, vector))) imageid_distances = sorted(imageid_distances, key=operator.itemgetter(1)) choices = [image for (image, distance) in imageid_distances] if len(choices) < top: # new_choices = search_neighbors() pass feat_vectors = {} print("Overall images: {}".format(overall_count)) print("Unique images: {}".format(len(choices))) for i in choices[:top]: feat_vectors[i] = self.data[self.image_ids.index(i)] return choices[:top], feat_vectors, query_vector
def task5a(layers=10, k=10, combine_models=False): constants = GlobalConstants() xt = time.time() model = Model() if combine_models: data0 = model.load_model(constants.HOG_NMF_MODEL_FILE) data1 = model.load_model(constants.CM_PCA_MODEL_FILE) data = np.concatenate((data0, data1), axis=1) else: data = model.load_model(constants.HOG_NMF_MODEL_FILE) lsh = LSH(layers=layers, khash_count=k, w=constants.LSH_W, image_ids=img_ids(), data=data) l_hashes, l_buckets = lsh.create_index() model.save_model(lsh, constants.LSH_OBJECT) model.save_model(l_hashes, constants.LSH_L_HASHES) model.save_model(l_buckets, constants.LSH_L_BUCKETS) print(time.time() - xt)
class DimensionReduction: """ Class for performing Dimensionality Reduction """ def __init__(self, extractor_model, dimension_reduction_model, k_value, label=None, image_metadata=False, subject_subject=False, folder_metadata=None, metadata_collection=None, matrix=None, conversion=True, save_model=False): self.constants = GlobalConstants() self.mongo_wrapper = MongoWrapper(self.constants.Mongo().DB_NAME) self.extractor_model = extractor_model self.dimension_reduction_model = dimension_reduction_model self.label = label self.k_value = k_value self.binary_image_metadata = image_metadata self.subject_subject = subject_subject self.folder_metadata = folder_metadata self.metadata_collection = metadata_collection self.matrix = matrix self.conversion = conversion self.save_model = save_model def get_object_feature_matrix(self): """ Returns The Object feature Matrix :param mapping: Default: False, if mapping is True, Returns the object feature matrix with image mappings :return: The Object Feature Matrix """ if self.folder_metadata: filter_images_list = self.filter_images_by_dir() condition = { "imageId": { "$in": filter_images_list }, "path": { "$exists": True } } else: condition = {"path": {"$exists": True}} cursor = self.mongo_wrapper.find(self.extractor_model.lower(), condition, {'_id': 0}) if cursor.count() > 0: df = pd.DataFrame(list(cursor)) if self.extractor_model == self.constants.CM and \ self.dimension_reduction_model in [self.constants.LDA, self.constants.NMF]: histogram_matrix = [] feature_vector_list = df['featureVector'].tolist() min_val = np.min(feature_vector_list) max_val = np.max(feature_vector_list) for featureVector in df['featureVector'].tolist(): value, value_range = np.histogram( featureVector, bins=self.constants.CM_BIN_COUNT, range=(min_val, max_val + 1)) histogram_matrix.append(value) df['featureVector'] = histogram_matrix if self.label: filter_images_list = self.filter_images_by_label( df['imageId'].tolist()) df = df[df.imageId.isin(filter_images_list)] return df else: print( "No records found in Mongo collection: {}. Please run Task2 from Phase1" .format(self.extractor_model.lower())) sys.exit(1) def get_binary_image_metadata_matrix(self): """ Gets the Binary Image Metadata Matrix :return: The Binary Image Metadata Matrix """ images_list = [ i for i in os.listdir(self.folder_metadata) if i.endswith(self.constants.JPG_EXTENSION) ] metadata = self.get_metadata("imageName", images_list, {"_id": 0}) metadata['male'] = [ 1 if i == "male" else 0 for i in metadata['gender'].tolist() ] metadata['female'] = [ 1 if i == "female" else 0 for i in metadata['gender'].tolist() ] metadata['without accessories'] = np.array( [1] * len(metadata['accessories'])) - np.array( metadata['accessories']) metadata['dorsal'] = [ 1 if "dorsal" in i else 0 for i in metadata['aspectOfHand'] ] metadata['palmar'] = [ 1 if "palmar" in i else 0 for i in metadata['aspectOfHand'] ] metadata['left'] = [ 1 if "left" in i else 0 for i in metadata['aspectOfHand'] ] metadata['right'] = [ 1 if "right" in i else 0 for i in metadata['aspectOfHand'] ] metadata['featureVector'] = metadata[[ 'male', 'female', 'dorsal', 'palmar', 'accessories', 'without accessories', 'left', 'right' ]].values.tolist() binary_image_metadata = metadata[[ 'imageName', 'featureVector', 'male', 'female', 'dorsal', 'palmar', 'accessories', 'without accessories', 'left', 'right' ]] binary_image_metadata = binary_image_metadata.rename( columns={"imageName": "imageId"}) return binary_image_metadata def filter_images_by_dir(self): images_list = [ i for i in os.listdir(self.folder_metadata) if i.endswith(self.constants.JPG_EXTENSION) ] images_list = sorted(images_list) """Fetches the list of images by dir""" query = {"imageName": {"$in": images_list}} filter_images_list = [ d['imageName'] for d in list( self.mongo_wrapper.find(self.metadata_collection, query, { "imageName": 1, "_id": 0 })) ] return filter_images_list def filter_images_by_label(self, images_list): """Fetches the list of images by label""" query = {"imageName": {"$in": images_list}} if self.label == "left-hand" or self.label == "right-hand" or self.label == "dorsal" or self.label == "palmar": query['aspectOfHand'] = { "$regex": re.sub('-hand$', '', self.label) } elif self.label == "male" or self.label == "female": query['gender'] = self.label elif self.label == "with accessories": query['accessories'] = 1 elif self.label == "without accessories": query['accessories'] = 0 else: raise Exception("Incorrect Label") filter_images_list = [ d['imageName'] for d in list( self.mongo_wrapper.find(self.constants.METADATA, query, { "imageName": 1, "_id": 0 })) ] return filter_images_list def execute(self): """Performs dimensionality reduction""" return getattr(DimensionReduction, self.dimension_reduction_model.lower())(self) def pca(self): # method to perform Principal Component Analysis on n-dimensional features # data = self.get_object_feature_matrix() if not self.matrix: data = self.get_object_feature_matrix() data_feature_matrix = np.array(data['featureVector'].tolist()) else: data = self.matrix data_feature_matrix = data # get object-feature vectors matrix k = self.k_value if not data_feature_matrix.size == 0: # normalize feature vector data for PCA normalize(data_feature_matrix) # apply PCA to features features_pca_decomposition = PCA(n_components=k, copy=False) features_pca_decomposition.fit_transform(data_feature_matrix) # get latent feature components feature_components = features_pca_decomposition.components_ data_pca_decomposition = PCA(n_components=k, copy=False) # transpose matrix to feature-data matrix feature_data_matrix = np.transpose(data_feature_matrix) # normalize feature vector data for PCA normalize(feature_data_matrix) # apply PCA to features fit = data_pca_decomposition.fit_transform(feature_data_matrix) # get latent data components data_components = np.transpose(data_pca_decomposition.components_) if self.save_model: model = Model() model.save_model( data_components, "{}_{}_w".format(self.extractor_model.lower(), self.dimension_reduction_model.lower())) model.save_model( feature_components, "{}_{}_h".format(self.extractor_model.lower(), self.dimension_reduction_model.lower())) return self # map imageID with principal components img_dim_mapping = pd.DataFrame({ "imageId": data['imageId'], "reducedDimensions": data_components.tolist() }) return img_dim_mapping, feature_components, features_pca_decomposition raise \ Exception("Data is empty in database, run Task 2 of Phase 1 (Insert feature extracted records in db )\n\n") # def svd(self): data = self.get_object_feature_matrix() obj_feature = np.array(data['featureVector'].tolist()) k = self.k_value if obj_feature is not None: # Singular-value decomposition svd_model = TruncatedSVD(n_components=k) U = svd_model.fit_transform(obj_feature) U = pd.DataFrame({ "imageId": data['imageId'], "reducedDimensions": U.tolist() }) VT = svd_model.components_ return U, VT, svd_model def nmf(self): """ Performs NMF dimensionality reduction :return: """ constants = self.constants.Nmf() if self.binary_image_metadata: data = self.get_binary_image_metadata_matrix() elif self.subject_subject: data = self.matrix else: if not self.matrix: data = self.get_object_feature_matrix() else: data = self.matrix if not data.size == 0: obj_feature = np.array(data['featureVector'].tolist()) if (obj_feature < 0).any(): print("NMF does not accept negative values") return model = NMF(n_components=self.k_value, beta_loss=constants.BETA_LOSS_KL, init=constants.INIT_MATRIX, random_state=0, solver='mu', max_iter=1000) w = model.fit_transform(obj_feature) h = model.components_ if self.save_model: model = Model() model.save_model( w, "{}_{}_w".format(self.extractor_model.lower(), self.dimension_reduction_model.lower())) model.save_model( h, "{}_{}_h".format(self.extractor_model.lower(), self.dimension_reduction_model.lower())) return self if not self.conversion: return w, h tt1 = time.time() data_lat = pd.DataFrame({ "imageId": data['imageId'], "reducedDimensions": w.tolist() }) # for i in range(h.shape[0]): # print("Latent Feature: {}\n{}".format(i + 1, sorted(((i, v) for i, v in enumerate(h[i])), # key=lambda x: x[1], reverse=True))) # print("\n\nTime Taken for NMF {}\n".format(time.time() - tt1)) return data_lat, h, model raise \ Exception("Data in database is empty, Run Task 2 of Phase 1 (Insert feature extracted records in db )\n\n") def lda(self): """ Performs LDA Dimensionality reduction :return: """ data = self.get_object_feature_matrix() obj_feature = np.array(data['featureVector'].tolist()) if (obj_feature < 0).any(): print("LDA does not accept negative values") return model = LatentDirichletAllocation(n_components=self.k_value, learning_method='batch', n_jobs=-1) # topic_word_prior=0.05, doc_topic_prior=0.01)#learning_method='online') lda_transformed = model.fit_transform(obj_feature) data_lat = pd.DataFrame({ "imageId": data['imageId'], "reducedDimensions": lda_transformed.tolist() }) # Compute model_component in terms of probabilities model_comp = model.components_ / model.components_.sum( axis=1)[:, np.newaxis] return data_lat, model.components_, model def compute_query_image(self, model, folder, image): """ Computes the reduced dimensions for the new query image :param model: Learned model :param folder: Folder in which the query image is :param image: Filename of the query image :return: Reduced Dimensions for the new vector """ feature_extractor = ExtractFeatures(folder, self.extractor_model) result = feature_extractor.execute(image) if self.extractor_model == self.constants.CM and \ self.dimension_reduction_model in [self.constants.LDA, self.constants.NMF]: cursor = self.mongo_wrapper.find(self.extractor_model.lower(), {"path": { "$exists": True }}, {'_id': 0}) df = pd.DataFrame(list(cursor)) feature_vector_lsit = df['featureVector'].tolist() min_val = np.min(feature_vector_lsit) max_val = np.max(feature_vector_lsit) result, value_range = np.histogram( result, bins=self.constants.CM_BIN_COUNT, range=(min_val, max_val + 1)) return model.transform([result]) def find_m_similar_images(self, model, m, folder, image, dist_func): """ Finds m similar images to the given query image :param m: The integer value of m :param model: The learned model which is saved :param folder: Folder in which the given query image is present :param image: Filename of the query image :param dist_func: Distance function to be used :return: m similar images with their scores """ obj_feature = self.get_object_feature_matrix() cursor = self.mongo_wrapper.find(self.constants.METADATA, {}) if cursor.count() > 0: metadata = pd.DataFrame(list(cursor)) else: metadata = pd.DataFrame() query_reduced_dim = self.compute_query_image(model, folder, image) dist = [] score = [] for index, row in obj_feature.iterrows(): dist.append( getattr(utils.distancemeasure, dist_func)(query_reduced_dim[0], model.transform([row['featureVector']])[0])) for d in dist: if dist_func == "nvsc1": score.append(d * 100) else: score.append((1 - d / max(dist)) * 100) obj_feature['dist'] = dist obj_feature['score'] = score obj_feature = obj_feature.sort_values(by="score", ascending=False) result = [] for index, row in islice(obj_feature.iterrows(), m): rec = dict() rec['imageId'] = row['imageId'] rec['score'] = row['score'] rec['path'] = row['path'] if not metadata.empty: rec['subject'] = ( (metadata.loc[metadata['imageName'] == row['imageId']] )['id']).tolist()[0] result.append(rec) return result def get_metadata(self, column, values, filter_query=None): query = {column: {"$in": values}} cursor = self.mongo_wrapper.find(self.constants.METADATA, query, filter_query) if cursor.count() > 0: df = pd.DataFrame(list(cursor)) return df else: return pd.DataFrame() def get_metadata_collection(self, column, values, collection_name, filter_query=None): query = {column: {"$in": values}, "aspectOfHand": {"$ne": None}} result = self.mongo_wrapper.find(collection_name, query) if result.count() > 0: collection = collection_name else: collection = self.constants.METADATA cursor = self.mongo_wrapper.find(collection, query, filter_query) if cursor.count() > 0: df = pd.DataFrame(list(cursor)) return df else: return pd.DataFrame() def get_metadata_unique_values(self, column): cursor = self.mongo_wrapper.find(self.constants.METADATA, '') if cursor.count() > 0: distinct_values = cursor.distinct(column) if len(distinct_values) > 0: return distinct_values return list()
import os from classes.globalconstants import GlobalConstants from classes.mongo import MongoWrapper global_constants = GlobalConstants() mongo_wrapper = MongoWrapper() def get_input_folder(folder_type): """Get input from User for folder""" folder = str(input("{}: ".format(folder_type))) if not folder or not os.path.isdir(folder): print("Please enter a valid folder path") return get_input_folder(folder_type) return folder def get_input_image(folder): """Get input from User for image""" image = str(input("Enter the Image filename: ")) if not image or not os.path.isfile(os.path.join(folder, image)): print("The image does not exist in the folder") return get_input_image(folder) elif not image.endswith(global_constants.JPG_EXTENSION): print("Please enter a valid Image filename") return get_input_image(folder) return image def get_input_image_list(folder):
import pandas as pd import numpy as np import random as random import operator import utils.imageviewer as imgvwr from utils.excelcsv import CSVReader import phase2.task1 as p1task1 import phase2.task5 as p2task5 import phase2.task6 as p2task6 import time import warnings from sklearn.linear_model import LogisticRegression warnings.filterwarnings("ignore") model_interact = Model() global_constants = GlobalConstants() mongo_wrapper = MongoWrapper(global_constants.Mongo().DB_NAME) csv_reader = CSVReader() def compute_latent_semantic_for_label(fea_ext_mod, dim_red_mod, label, k_value, folder): # p2task5.run_task3(fea_ext_mod, dim_red_mod, label, k_value) dim_reduction = DimensionReduction(fea_ext_mod, dim_red_mod, k_value, label, folder_metadata=folder, metadata_collection="labelled")
def __init__(self): self.constants = GlobalConstants() self.mongo_wrapper = MongoWrapper(self.constants.Mongo().DB_NAME) pass
class CSVReader: def __init__(self): self.constants = GlobalConstants() self.mongo_wrapper = MongoWrapper(self.constants.Mongo().DB_NAME) pass def save_hand_csv_mongo(self, filename): """Reads the HandsInfo CSV and saves it to Mongo collection Metadata""" data = pandas.read_csv(filename) data_json = json.loads(data.to_json(orient='records')) self.mongo_wrapper.drop_collection( self.constants.METADATA) # Drop Metadata Collection self.mongo_wrapper.bulk_insert(self.constants.METADATA, data_json) # Insert new Metadata def save_csv_multiple(self, input_data): """ Reads the csv files and saves it to the collection :param input_data: a json of the form {"collectionName": ["filename1.csv", "filename2.csv"]} :return: """ for inp in input_data: self.mongo_wrapper.drop_collection(inp) # Drop Collection for filename in input_data[inp]: data = pandas.read_csv(filename) data_json = json.loads(data.to_json(orient='records')) self.mongo_wrapper.bulk_insert( inp, data_json) # Insert new Metadata # method to format rows to output to csv def prepare_rows(self, latent_semantics): round_digits = 6 result = [] for i, ls in enumerate(latent_semantics): term_weight = {} for x in ls: term_weight[x[0]] = round(x[1], round_digits) result.append( ("LS" + str(i + 1) + ", " + str(term_weight)[1:-1]).split(',')) return result # method to save latent semantics to csv def save_to_csv(self, data_latent_semantics, feature_latent_semantics, filename, subject_subject=False, image_metadata=False): current_path = path.dirname(path.dirname(path.realpath(__file__))) _finalPath = path.join(current_path, "output") if not path.exists(_finalPath): makedirs(_finalPath) images = data_latent_semantics['imageId'].tolist() data_latent_semantics = np.array( data_latent_semantics['reducedDimensions'].tolist()) data_tw = tw.get_data_latent_semantics(data_latent_semantics, data_latent_semantics.shape[1], images) with open(path.join(_finalPath, filename + ".csv"), mode='w', newline='') as csv_file: csv_writer = csv.writer(csv_file, delimiter=",") if subject_subject: csv_writer.writerow(["Top-k Latent Semantics"]) elif image_metadata: csv_writer.writerow(["LS in Image Space"]) else: csv_writer.writerow(["Data Latent Semantics"]) csv_writer.writerows(self.prepare_rows(data_tw)) if not subject_subject: feature_tw = tw.get_feature_latent_semantics( feature_latent_semantics, feature_latent_semantics.shape[0], image_metadata=image_metadata) if image_metadata: csv_writer.writerow(["LS in Metadata Space"]) else: csv_writer.writerow(["Feature Latent Semantics"]) csv_writer.writerows(self.prepare_rows(feature_tw))