def dimensionality_reduction(self): # self.set_database_matrix() # Note : when we have number of images <=20 or features <=20 , we are getting an error # this is because the database_matrix has <=20 images and the reduction models, # should have n_components parameters <= n,m # Hence, we have to take the min(min(len(self.database_matrix[0]),len(self.database_matrix)),20) if self.decomposition_name == 'PCA': self.decomposition_model = PCAModel(self.database_matrix, self.k_components, self.database_image_id) elif self.decomposition_name == 'SVD': self.decomposition_model = SVD(self.database_matrix, self.k_components, self.database_image_id) elif self.decomposition_name == 'NMF': self.decomposition_model = NMFModel(self.database_matrix, self.k_components, self.database_image_id) elif self.decomposition_name == 'LDA': self.decomposition_model = LDAModel(self.database_matrix, self.k_components, self.database_image_id) self.decomposition_model.decompose() print('Decomposition Complete') decomposed_database_matrix = self.decomposition_model.get_decomposed_data_matrix() reduced_dimension_folder_images_dict = {} for image_id, reduced_feature_vector in zip(self.database_image_id, decomposed_database_matrix): reduced_dimension_folder_images_dict[image_id] = reduced_feature_vector if self.metadata_label != '': misc.save2pickle(reduced_dimension_folder_images_dict, self.reduced_pickle_file_folder, feature=(self.feature_extraction_model_name+'_'+self.decomposition_name+ '_' + self.metadata_label)) else: misc.save2pickle(reduced_dimension_folder_images_dict, self.reduced_pickle_file_folder, feature=(self.feature_extraction_model_name + '_' + self.decomposition_name))
def save_label_decomposed_features(self, label, decomposed_label): features = misc.load_from_pickle(self.reduced_dimension_pickle_path, decomposed_label) if self.images_metadata is None: self.set_images_metadata() filtered_images_metadata = self.images_metadata if self.test_images_list is not None: filtered_images_metadata = filtered_images_metadata[ (filtered_images_metadata['imageName'].isin(self.test_images_list))] filtered_images_metadata = filtered_images_metadata[ (filtered_images_metadata['aspectOfHand'].str.contains(label))] images_list = filtered_images_metadata['imageName'].tolist() label_features_dict = {} for image_id in images_list: label_features_dict[image_id] = features[image_id] misc.save2pickle(label_features_dict, self.reduced_dimension_pickle_path, feature=(decomposed_label + '_' + label)) return
def compute_features_images_folder(self): if self.model is None: raise Exception("No model is defined") else: folder = os.path.join(Path(os.path.dirname(__file__)).parent, self.folder_path) files_in_directory = misc.get_images_in_directory(folder) features_image_folder = [] for file, path in tqdm(files_in_directory.items()): image_feature = self.compute_image_features(path, print_arr=False) features_image_folder.append(image_feature) images = list(files_in_directory.keys()) folder_images_features_dict = {} for i in range(len(images)): folder_images_features_dict[images[i]] = features_image_folder[i] # print(folder_images_features_dict) # if self.model_name == 'SIFT': # misc.save2pickle(folder_images_features_dict, os.path.dirname(__file__), # feature=self.model_name + "_OLD") # folder_images_features_dict_sift_new = self.compute_sift_new_features(folder_images_features_dict) # misc.save2pickle(folder_images_features_dict_sift_new, os.path.dirname(__file__), # feature=self.model_name) # else: # misc.save2pickle(folder_images_features_dict, os.path.dirname(__file__), feature=self.model_name) misc.save2pickle(folder_images_features_dict, os.path.dirname(__file__), feature=self.model_name)
def get_sift_features(): parent_directory_path = Path(os.path.dirname(__file__)).parent pickle_file_directory = os.path.join(parent_directory_path, 'Phase1') dataset_images_features = misc.load_from_pickle(pickle_file_directory, 'SIFT') input_k_means = [] sum = 0 images_num = 0 # To store the key_point descriptors in a 2-d matrix of size (k1+k2+k3...+kn)*128 for image_id, feature_vector in dataset_images_features.items(): for feature_descriptor in feature_vector: # Note : haven't used x,y,scale,orientation input_k_means.append(feature_descriptor[4:]) sum = sum + len(feature_vector) images_num = images_num + 1 n_clusters = int(sum / images_num) kmeans = KMeans(n_clusters) print( 'Applying k-means algorithm on all the keypoint descriptors of all images' ) tqdm(kmeans.fit(input_k_means)) row_s = 0 row_e = 0 k = 0 image_features = {} print('Equating the number of features for all the images : ') for image_id, feature_vector in tqdm(dataset_images_features.items()): row_s = row_s + k k = len(feature_vector) row_e = row_e + k closest_cluster = kmeans.predict(input_k_means[row_s:row_e]) reduced_feature_img = [0] * n_clusters for cluster_num in closest_cluster: reduced_feature_img[ cluster_num] = reduced_feature_img[cluster_num] + 1 image_features[image_id] = reduced_feature_img folder_images_features_dict = {} for image_id, feature_vector in dataset_images_features.items(): folder_images_features_dict[image_id] = image_features[image_id] print(len(image_features)) reduced_pickle_file_folder = os.path.join(os.path.dirname(__file__), 'pickle_files') misc.save2pickle(folder_images_features_dict, reduced_pickle_file_folder, 'SIFT_NEW')
def svm(): model = "HOG" decomposition_model = "PCA" phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): # phase = "train" training_features,images_list = test.compute_features_folder(labelled_set_path, phase) metadata_filepath = metadata_file_path + "/" + metadata_file_name csv_labels=test.make_labels(metadata_filepath) binary_labels=[] for i in csv_labels: if "dorsal" in i: binary_labels.append(0) else: binary_labels.append(1) my_model = svmc(.001, .01, 1000) my_model.fit(training_features,binary_labels) misc.save2pickle(my_model, reduced_pickle_file_folder,feature = (model + '_svm')) if(phase == "test"): # phase = "test" testing_features,images_list = test.compute_features_folder(unlabelled_set_path, phase) my_model = misc.load_from_pickle(reduced_pickle_file_folder, feature = (model + '_svm')) value=[] value = my_model.predict(testing_features) ans = [] for i in value: if(i == -1): ans.append("dorsal") else: ans.append("palmar") svm_dict={} i=0 for img in images_list: svm_dict[img]=ans[i] i+=1 print("HI") print(svm_dict)
def set_features(self): if self.decomposition_name != '': self.decomposition = Decomposition(self.decomposition_name, 100, self.feature_name, self.labelled_dataset_path) self.decomposition.dimensionality_reduction() else: test_dataset_folder_path = os.path.abspath( os.path.join( Path(os.getcwd()).parent, self.labelled_dataset_path)) print('Getting the Model Features from Phase1') features_obj = FeaturesImages(self.feature_name, test_dataset_folder_path) features_obj.compute_features_images_folder() self.unlabelled_dataset_features = self.get_unlabelled_images_decomposed_features( ) misc.save2pickle(self.unlabelled_dataset_features, self.reduced_pickle_file_folder, feature='unlabelled_' + self.decomposed_feature) print("Getting features for dorsal_images ") self.dorsal_features = self.get_features('dorsal') print("Getting features for palmar images") self.palmar_features = self.get_features('palmar')
def save_result(self, result): reduced_pickle_file_folder = os.path.join( Path(os.path.dirname(__file__)).parent, 'Phase2', 'pickle_files') misc.save2pickle(result, reduced_pickle_file_folder, 'Task_5_Result')
def ppr_classifier(): model = "CM" decomposition_model = "NMF" k = 8 phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): decomposition = Decomposition(decomposition_model, k, model, labelled_set_path, phase) decomposition.dimensionality_reduction() reduced_dim_folder_images_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_' + phase)) image_image_graph_keys = () columns = list(reduced_dim_folder_images_dict.keys()) image_image_graph_keys = list(it.combinations(reduced_dim_folder_images_dict.keys(),2)) image_image_df = pd.DataFrame(0.00, columns = reduced_dim_folder_images_dict.keys(), index = reduced_dim_folder_images_dict.keys()) image_image_df_top_features = pd.DataFrame(0.00, columns = reduced_dim_folder_images_dict.keys(), index = reduced_dim_folder_images_dict.keys()) image_image_df = norm_distance(image_image_df, reduced_dim_folder_images_dict, image_image_graph_keys, 2) misc.save2pickle(image_image_df, reduced_pickle_file_folder, feature=(model + '_' + decomposition_model + '_image_image_df' )) if (phase == "test"): phase = "test" decomposition = Decomposition(decomposition_model, k, model, unlabelled_set_path, phase) decomposition.dimensionality_reduction() labelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_train')) unlabelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_test')) # K = int(input("Enter the number of dominant images - ")) K = 9 prediction = {} image_image_df = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + "_image_image_df")) for unlabelled_img in unlabelled_images_list: # unlabelled_img = "Hand_0000070.jpg" # new_col_dict = {} new_col = [] for labelled_img in labelled_images_feature_dict.keys(): features1 = unlabelled_images_feature_dict.get(unlabelled_img) features2 = labelled_images_feature_dict.get(labelled_img) ind_distance = 0.00 distance = 0.00 # print(features1) # print("--------------") # print(features2) for i in range(len(features1)): ind_distance = abs(features1[i] - features2[i]) distance += (ind_distance ** 2) distance = distance ** (1/float(2)) # new_col_dict[labelled_img] = distance new_col.append(distance) # print(new_col) # new_row = pd.DataFrame(new_col, columns=image_image_df.columns, index=[unlabelled_img]) # image_image_df = image_image_df.append(new_row) image_image_df = image_image_df.append(pd.Series(new_col, index=image_image_df.columns, name=unlabelled_img)) # image_image_df = pd.concat([image_image_df, new_row_df]) new_col.append(0) # print(new_col) image_image_df = image_image_df.assign(unlabelled_img = new_col) image_image_df = image_image_df.rename({'unlabelled_img' : unlabelled_img},axis = 1) image_image_df = image_image_df.loc[:,~image_image_df.columns.duplicated()] image_image_df = image_image_df[~image_image_df.index.duplicated(keep='first')] image_image_features_df = k_neighbour_graph(image_image_df, image_image_df.columns, 8) dominant_img_list = steady_state([unlabelled_img],image_image_features_df, image_image_features_df.columns, K) # print(dominant_img_list) palmar = 0 dorsal = 0 for img in dominant_img_list: if img not in unlabelled_img: class_list = metadata_df['aspectOfHand'].where(metadata_df['imageName']==img) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list) + img ) if(class_list[0].split()[0] == "palmar"): palmar += 1 if(class_list[0].split()[0] == "dorsal"): dorsal += 1 if(dorsal >= palmar): prediction[unlabelled_img] = "dorsal" else: prediction[unlabelled_img] = "palmar" image_image_df = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + "_image_image_df")) print(prediction) correct = 0 class_list = unlabelled_metadata_df['imageName'].tolist() actual_class_list = unlabelled_metadata_df['aspectOfHand'].tolist() # print(actual_class_list) for image_name in prediction.keys(): class_list = unlabelled_metadata_df['aspectOfHand'].where(unlabelled_metadata_df['imageName']==image_name) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list[0].split()[0]) + "--" + image_name ) if(class_list[0].split()[0] == prediction.get(image_name)): correct += 1 print(correct/len(prediction.keys()))
def decision_tree_input(): model = "CM" decomposition_model = "NMF" k = 8 phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): # phase = "train" decomposition = Decomposition(decomposition_model, k, model, labelled_set_path, phase) decomposition.dimensionality_reduction() labelled_images_feature_dict = misc.load_from_pickle(r"D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_train')) y_train = get_labels(labelled_images_feature_dict.keys(),metadata_df) X_train = np.vstack(labelled_images_feature_dict.values()) y_train = np.asarray(y_train).reshape(len(y_train),1) dataset = np.concatenate((X_train,y_train) ,axis=1) tree = build_tree(dataset, 6, 1) misc.save2pickle(tree, reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + '_tree')) if(phase == "test"): # phase = "test" tree = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + '_tree')) decomposition = Decomposition(decomposition_model, k, model, unlabelled_set_path, phase) decomposition.dimensionality_reduction() unlabelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_test')) y_test = get_labels(unlabelled_images_feature_dict.keys(),unlabelled_metadata_df) X_test = np.vstack(unlabelled_images_feature_dict.values()) # print(y_train.shape) # print(y_test) # print(tree) prediction = {} for key in unlabelled_images_feature_dict.keys(): est_val = predict(tree, unlabelled_images_feature_dict[key]) if (est_val == 1): prediction[key] = "palmar" else: prediction[key] = "dorsal" print(prediction) correct = 0 class_list = unlabelled_metadata_df['imageName'].tolist() actual_class_list = unlabelled_metadata_df['aspectOfHand'].tolist() # print(actual_class_list) for image_name in prediction.keys(): class_list = unlabelled_metadata_df['aspectOfHand'].where(unlabelled_metadata_df['imageName']==image_name) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list[0].split()[0]) + "--" + image_name ) if(class_list[0].split()[0] == prediction.get(image_name)): correct += 1 print(correct/len(prediction.keys()))