def main(): """Main function for the Task 8""" k_value = get_input_k() while k_value > 8: print("Please enter a value of k within 8") k_value = get_input_k() folder = get_input_folder() print(global_constants.LINE_SEPARATOR) print("User Inputs summary") print(global_constants.LINE_SEPARATOR) print("k-value: {}\nFolder: {}".format(k_value, folder)) print(global_constants.LINE_SEPARATOR) dim_red = DimensionReduction(None, "NMF", k_value, image_metadata=True, folder_metadata=folder) w, h, model = dim_red.execute() # printing the term weight print_tw(w, h, image_metadata=True) # save to csv filename = "task8" + "_" + str(k_value) CSVReader().save_to_csv(w, h, filename, image_metadata=True) print("Please check the CSV file: output/{}.csv".format(filename))
def main(): """Main function for the task 4""" feature_extraction_model = get_input_feature_extractor_model() dimension_reduction_model = get_input_dimensionality_reduction_model() k_value = get_input_k() label = get_input_image_label() folder = get_input_folder() image_name = get_input_image(folder) m_value = get_input_m() if dimension_reduction_model != "NMF": dist_func = "euclidean" elif feature_extraction_model in ["CM", "LBP"]: dist_func = "nvsc1" else: dist_func = "euclidean" # dist_func = "cosine" # dist_func = "chebyshev" # dist_func = "manhattan" # dist_func = "chi_square" # dist_func = "euclidean" print(global_constants.LINE_SEPARATOR) print("User Inputs summary") print(global_constants.LINE_SEPARATOR) print("Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}\nLabel: {}\nFolder: {}\n" "Image: {}\nm-value: {}".format(feature_extraction_model, dimension_reduction_model, k_value, label, folder, image_name, m_value)) print(global_constants.LINE_SEPARATOR) # Saves the returned model filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model, dimension_reduction_model, label.replace(" ", ''), str(k_value)) model = model_interact.load_model(filename=filename) if not model: print("Please run Task 3 for {}, {}, {} and {}".format(feature_extraction_model, dimension_reduction_model, label, k_value)) sys.exit(1) # Compute the reduced dimensions for the new query image and find m similar images dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value, label) result = dim_reduction.find_m_similar_images(model, m_value, folder, image_name, dist_func) print(global_constants.LINE_SEPARATOR) print("Similar Images") print(global_constants.LINE_SEPARATOR) for rec in result: print(rec) print(global_constants.LINE_SEPARATOR) title = { "Feature Extraction": feature_extraction_model, "Dimension Reduction": dimension_reduction_model, "k": k_value, "Label": label, "Distance": dist_func } show_images(os.path.abspath(os.path.join(folder, image_name)), result, title)
def dimension_reduction(): # save_model_file() constants = GlobalConstants() model = Model() features = model.load_model('cm_np') redn = DimensionReduction(dimension_reduction_model=constants.PCA, extractor_model=constants.CM, matrix=features, conversion=True, k_value=500) redn.execute() pass
def get_x_train(fea_ext_mod, dim_red_mod, k_value, train_set): model_interact = Model() dim_reduction = DimensionReduction(fea_ext_mod, dim_red_mod, k_value, folder_metadata=train_set, metadata_collection="labelled") obj_lat, feat_lat, model = dim_reduction.execute() filename = "{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, str(k_value), os.path.basename(train_set)) model_interact.save_model(model=model, filename=filename) return obj_lat
def get_y(fea_ext_mod, dim_red_mod, k_value, collection_name, red_dim=None, obj_lat=None): dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value) if collection_name == 'unlabelled': aspect = dim_red.get_metadata( "imageName", red_dim['imageId'].tolist())['aspectOfHand'].tolist() else: aspect = dim_red.get_metadata_collection( "imageName", obj_lat['imageId'].tolist(), collection_name)['aspectOfHand'].tolist() return [i.split(' ')[0] for i in aspect]
def run_task3(feature_extraction_model, dimension_reduction_model, label, k_value): """Main function for the Task3""" # Performs the dimensionality reduction dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value, label) obj_feature = dim_reduction.get_object_feature_matrix() obj_lat, feat_lat, model = dim_reduction.execute() # Saves the returned model filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model, dimension_reduction_model, label.replace(" ", ''), str(k_value)) model_interact.save_model(model=model, filename=filename)
def get_x_test(fea_ext_mod, dim_red_mod, k_value, train_set, test_set): model_interact = Model() dim_reduction = DimensionReduction(fea_ext_mod, dim_red_mod, k_value) filename = "{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, str(k_value), os.path.basename(train_set)) model = model_interact.load_model(filename=filename) red_dims = [] unlabelled_image_list = os.listdir(test_set) for image in unlabelled_image_list: red_dim = dim_reduction.compute_query_image(model, test_set, image) red_dims.append(red_dim[0]) df = pd.DataFrame({ "imageId": unlabelled_image_list, "reducedDimensions": red_dims }) return df
def compute_latent_semantic_for_label(fea_ext_mod, dim_red_mod, label, k_value, folder): # p2task5.run_task3(fea_ext_mod, dim_red_mod, label, k_value) dim_reduction = DimensionReduction(fea_ext_mod, dim_red_mod, k_value, label, folder_metadata=folder, metadata_collection="labelled") obj_lat, feat_lat, model = dim_reduction.execute() # Saves the returned model filename = "{0}_{1}_{2}_{3}_{4}".format(fea_ext_mod, dim_red_mod, label, str(k_value), os.path.basename(folder)) model_interact.save_model(model=model, filename=filename) return obj_lat, feat_lat, model
def run_task4(feature_extraction_model, dimension_reduction_model, folder, image_name, dist_func, label, k_value, m_value): """Main function for the Task4""" # Saves the returned model filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model, dimension_reduction_model, label.replace(" ", ''), str(k_value)) model = model_interact.load_model(filename=filename) # Compute the reduced dimensions for the new query image and find m similar images dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value, label) obj_feature = dim_reduction.get_object_feature_matrix() result = dim_reduction.find_m_similar_images(model, m_value, folder, image_name, dist_func) return result
def main(): """Main function for the Task3""" feature_extraction_model = get_input_feature_extractor_model() dimension_reduction_model = get_input_dimensionality_reduction_model() k_value = get_input_k() label = get_input_image_label() print(global_constants.LINE_SEPARATOR) print("User Inputs summary") print(global_constants.LINE_SEPARATOR) print( "Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}" .format(feature_extraction_model, dimension_reduction_model, k_value)) print(global_constants.LINE_SEPARATOR) print(global_constants.LINE_SEPARATOR) print("Saving the metadata to MongoDB") print(global_constants.LINE_SEPARATOR) csv_reader.save_hand_csv_mongo("HandInfo.csv") print(global_constants.LINE_SEPARATOR) # Performs the dimensionality reduction dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value, label) obj_lat, feat_lat, model = dim_reduction.execute() # Saves the returned model filename = "{0}_{1}_{2}_{3}".format(feature_extraction_model, dimension_reduction_model, label.replace(" ", ''), str(k_value)) model_interact.save_model(model=model, filename=filename) # Printing the term weight pairs data_tw = print_tw(obj_lat, feat_lat) # save term weight pairs to csv filename = "task3_{}_{}_{}_{}".format(feature_extraction_model, dimension_reduction_model, label, k_value) csv_reader.save_to_csv(obj_lat, feat_lat, filename) print("Please check the CSV file: output/{}.csv".format(filename))
def main(): """Main function for the task 1""" feature_extraction_model = get_input_feature_extractor_model() dimension_reduction_model = get_input_dimensionality_reduction_model() k_value = get_input_k() print(global_constants.LINE_SEPARATOR) print("User Inputs summary") print(global_constants.LINE_SEPARATOR) print( "Feature Extraction Model: {}\nDimensionality Reduction Model: {}\nk-value: {}" .format(feature_extraction_model, dimension_reduction_model, k_value)) print(global_constants.LINE_SEPARATOR) # Performs the dimensionality reduction dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value) obj_lat, feat_lat = save_model(dim_reduction, feature_extraction_model, dimension_reduction_model, k_value) # Print term weight pairs to terminal data_tw = print_tw(obj_lat, feat_lat) # save term weight pairs to csv filename = "task1" + '_' + feature_extraction_model + '_' + dimension_reduction_model + '_' + str( k_value) CSVReader().save_to_csv(obj_lat, feat_lat, filename) print("Please check the CSV file: output/{}.csv".format(filename)) data = dim_reduction.get_object_feature_matrix() title = { "Feature Extraction": feature_extraction_model, "Dimensionality Reduction": dimension_reduction_model, "k": k_value, } if k_value <= 20: print("Generating Visualization ...") show_data_ls(data, data_tw, title) print("Generating Visualization ...") show_feature_ls(data, feat_lat, title)
def main(): feature_extraction_model = task6.feature_extraction_model dimension_reduction_model = task6.dimension_reduction_model k_value_for_ss_similarity = 10 given_k_value = get_input_k() print(global_constants.LINE_SEPARATOR) print("User Inputs summary") print(global_constants.LINE_SEPARATOR) print("k-value: {}".format(given_k_value)) print(global_constants.LINE_SEPARATOR) dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value_for_ss_similarity) # original feature vectors obj_feat_matrix = dim_reduction.get_object_feature_matrix() # get the img IDs from the database for images in the fit model img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']}) # get the metadata for each image with given subject id subject_data = dim_reduction.get_metadata("imageName", list(set(img_set["imageId"].tolist()))) # unique subject IDs in dataset dataset_subject_ids = set((subject_data)["id"]) subject_subject_matrix = [] m_value = len(img_set) starttime = time.time() model = task6.load_model(dim_reduction, feature_extraction_model, dimension_reduction_model, k_value_for_ss_similarity) folder = path.basename(path.dirname(obj_feat_matrix['path'][0])) for i, subjectid in enumerate(dataset_subject_ids): given_subject_images = dim_reduction.get_metadata("id", list([subjectid]))["imageName"].tolist() image_list_for_subject = list(set(given_subject_images).intersection(set(img_set["imageId"].tolist()))) similar_subjects = task6.find_similar_subjects(subjectid, image_list_for_subject, model, img_set, dim_reduction, m_value, folder) subject_subject_matrix.append(np.asarray(list(similar_subjects.values()))) print("\nTime taken to create subject subject matrix: {}\n".format(time.time() - starttime)) # perform nmf on subject_subject_matrix # given_k_value = 1 matrix = pd.DataFrame(data={'imageId': list(dataset_subject_ids), 'featureVector': subject_subject_matrix}) dim_red = DimensionReduction(None, "NMF", given_k_value, subject_subject=True, matrix=matrix) w, h, model = dim_red.execute() # display latent semantics # printing the term weight print_tw(w, h, subject_subject=True) # save to csv filename = "task7" + '_' + str(given_k_value) CSVReader().save_to_csv(w, None, filename, subject_subject=True) print("Please check the CSV file: output/{}.csv".format(filename))
def main(): """Main function for the script""" feature_extraction_model = "HOG" dimension_reduction_model = "PCA" k_value = get_input_k("k") K_value = get_input_k("K") folder = get_input_folder("Folder") dim_k_value = 40 query_images = get_input_image_list(folder) start = time.time() dim_red = DimensionReduction(feature_extraction_model, dimension_reduction_model, dim_k_value, folder_metadata=folder, metadata_collection="labelled") obj_feat = dim_red.get_object_feature_matrix() features_list = np.array(obj_feat['featureVector'].tolist()) images_list = np.array(obj_feat['imageId']) cos_sim = cosine_similarity(features_list) sim_graph = sim_graph_from_sim_max(cos_sim, images_list, k_value) results = ppr(sim_graph, images_list, query_images) results = results[:K_value] print("Top {} images from Personalized page Rank are:".format(K_value)) for r in results: r["path"] = os.path.abspath(os.path.join(folder, r['imageId'])) print(r) query_images_list = [ os.path.abspath(os.path.join(folder, img)) for img in query_images ] title = {"Model": "Personalized Page Rank", "k": k_value, "K": K_value} show_images_ppr(query_images_list, title, results) print("Execution time: {} seconds".format(time.time() - start))
def main(): fea_ext_mod = "HOG" dim_red_mod = "PCA" dist_func = "euclidean" k_value = 30 training_set = os.path.abspath(get_input_folder("Labelled")) test_set = os.path.abspath(get_input_folder("Classify")) # training_set = os.path.abspath('Dataset3\Labelled\Set1') # test_set = os.path.abspath('Dataset3\\Unlabelled\Set 1') label = "dorsal" obj_lat, feat_lat, model = compute_latent_semantic_for_label( fea_ext_mod, dim_red_mod, label, k_value, training_set) filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, label, str(k_value)) csv_reader.save_to_csv(obj_lat, feat_lat, filename) label_p = 'palmar' obj_lat_p, feat_lat_p, model_p = compute_latent_semantic_for_label( fea_ext_mod, dim_red_mod, label_p, k_value, training_set) filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, label_p, str(k_value)) csv_reader.save_to_csv(obj_lat_p, feat_lat_p, filename) x_train = obj_lat['reducedDimensions'].tolist() x_train += (obj_lat_p['reducedDimensions'].tolist()) red_dim_unlabelled_images = reduced_dimensions_for_unlabelled_folder( fea_ext_mod, dim_red_mod, k_value, label, training_set, test_set) x_test = red_dim_unlabelled_images['reducedDimensions'].tolist() dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value) labelled_aspect = dim_red.get_metadata_collection( "imageName", obj_lat['imageId'].tolist(), "labelled")['aspectOfHand'].tolist() y_train = [i.split(' ')[0] for i in labelled_aspect] labelled_aspect = dim_red.get_metadata_collection( "imageName", obj_lat_p['imageId'].tolist(), "labelled")['aspectOfHand'].tolist() y_train += ([i.split(' ')[0] for i in labelled_aspect]) unlabelled_aspect = dim_red.get_metadata_collection( "imageName", red_dim_unlabelled_images['imageId'].tolist(), "unlabelled")['aspectOfHand'].tolist() y_test = [i.split(' ')[0] for i in unlabelled_aspect] # makes into arrays and transforms the training labels into 1 for "dorsal", -1 for "palmar" data points x_train = np.array(x_train) y_train = list(map(lambda x: 1 if x == "dorsal" else -1, y_train)) y_train = np.array(y_train) # shuffling the training data indices = np.arange(x_train.shape[0]) np.random.shuffle(indices) x_train = x_train[indices] y_train = y_train[indices] x_test = np.array(x_test) # creates the SVM classifier clf = SupportVectorMachine(gaussian_kernel, C=500) clf.fit(x_train, y_train) predictions = clf.predict(x_test) # transforms the testing labels into 1 for "dorsal", -1 for "palmar" data points y_test = list(map(lambda x: 1 if x == "dorsal" else -1, y_test)) # calculates and prints the results onto the console correct = np.sum(predictions == y_test) print("---------------------------") accuracy = (correct / len(predictions)) * 100 print("Accuracy: " + str(accuracy) + "%") unlabelled_images = red_dim_unlabelled_images['imageId'] predicted_labels = list( map(lambda x: "dorsal" if x == 1 else "palmar", predictions)) actual_labels = list( map(lambda x: "dorsal" if x == 1 else "palmar", y_test)) print("---------------------------") print("Results:") print("Image ID, Prediction, Actual") for image_id, p, a in zip(unlabelled_images, predicted_labels, actual_labels): print("(" + image_id + ", " + p + ", " + a + ")")
def main(): fea_ext_mod = "HOG" dim_red_mod = "SVD" dist_func = "euclidean" k_value = get_input_k("k-value") training_set = os.path.abspath(get_input_folder("Labelled")) test_set = os.path.abspath(get_input_folder("Classify")) label = "dorsal" obj_lat, feat_lat, model = compute_latent_semantic_for_label( fea_ext_mod, dim_red_mod, label, k_value, training_set) filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, label, str(k_value)) csv_reader.save_to_csv(obj_lat, feat_lat, filename) x_train = obj_lat['reducedDimensions'].tolist() red_dim_unlabelled_images = reduced_dimensions_for_unlabelled_folder( fea_ext_mod, dim_red_mod, k_value, label, training_set, test_set) x_test = red_dim_unlabelled_images['reducedDimensions'].tolist() dim_red = DimensionReduction(fea_ext_mod, dim_red_mod, k_value) labelled_aspect = dim_red.get_metadata_collection( "imageName", obj_lat['imageId'].tolist(), "labelled")['aspectOfHand'].tolist() y_train = [i.split(' ')[0] for i in labelled_aspect] label_p = 'palmar' obj_lat_p, feat_lat_p, model_p = compute_latent_semantic_for_label( fea_ext_mod, dim_red_mod, label_p, k_value, training_set) filename = "p3task1_{0}_{1}_{2}_{3}".format(fea_ext_mod, dim_red_mod, label_p, str(k_value)) csv_reader.save_to_csv(obj_lat_p, feat_lat_p, filename) x_train += (obj_lat_p['reducedDimensions'].tolist()) labelled_aspect = dim_red.get_metadata_collection( "imageName", obj_lat_p['imageId'].tolist(), "labelled")['aspectOfHand'].tolist() y_train += ([i.split(' ')[0] for i in labelled_aspect]) zip_train = list(zip(x_train, y_train)) random.shuffle(zip_train) x_train, y_train = zip(*zip_train) unlabelled_aspect = dim_red.get_metadata_collection( "imageName", red_dim_unlabelled_images['imageId'].tolist(), "unlabelled")['aspectOfHand'].tolist() y_test = [i.split(' ')[0] for i in unlabelled_aspect] lr = LogisticRegression(penalty='l2', random_state=np.random.RandomState(42), solver='lbfgs', max_iter=300, multi_class='ovr', class_weight='balanced', n_jobs=-1, l1_ratio=0) lr.fit(x_train, y_train) # y_pred = lr.predict(x_test) predictions = lr.predict(x_test) unlabelled_images = red_dim_unlabelled_images['imageId'].tolist() predicted_labels = list(predictions) actual_labels = list(y_test) print("---------------------------") print(" Results:") print("---------------------------") print(" Accuracy:", lr.score(x_test, y_test)) print("---------------------------") print(" Image ID | Prediction | Actual") for image_id, p, a in zip(unlabelled_images, predicted_labels, actual_labels): print(" " + image_id + " | " + p + " | " + a)
def main(): # given subject id given_subject_id = get_input_subject_id() k_value = 40 master_folder = "Hands" dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value) # original feature vectors obj_feat_matrix = dim_reduction.get_object_feature_matrix() # extract model saved from task 1 model = load_model(dim_reduction, feature_extraction_model, dimension_reduction_model, k_value) # get the img IDs from the database for images in the fit model img_set = pd.DataFrame({"imageId": obj_feat_matrix['imageId']}) # image count to rank against current image m_value = len(img_set) print(global_constants.LINE_SEPARATOR) print("User Inputs summary") print(global_constants.LINE_SEPARATOR) print("Query Subject Id: {}".format(given_subject_id)) print(global_constants.LINE_SEPARATOR) # given_subject_id = 55 # similar subjects to find similar_subject_count = 3 # get metadata for given subject's images metadata = dim_reduction.get_metadata("id", list([given_subject_id])) # get a list of img IDs for the particular subject in the dataset image_list_for_given_subject = random.sample( list(set(metadata["imageName"].tolist())), 5) image_list = list(set(img_set["imageId"].tolist())) starttime = time.time() # method call to find similar subjects subject_similarity = find_similar_subjects(given_subject_id, image_list_for_given_subject, model, img_set, dim_reduction, m_value, master_folder) # sort the similarity scores in descending order sorted_subject_similarity = sorted(subject_similarity.items(), key=operator.itemgetter(1), reverse=True) print() print("Subject : Score") list_subjects = [] max = similar_subject_count counter = 0 while counter < max: subject = sorted_subject_similarity[counter] if subject[0] != given_subject_id: print(subject[0], " : ", subject[1]) list_subjects.append([subject[0], subject[1]]) else: max += 1 counter += 1 print() # print(sorted_subject_similarity) image_list_for_similar_subjects_abs_path = [] similarity_scores = [] folder_path = os.path.dirname(obj_feat_matrix['path'][0]) # create list of images for each subject to visualize most similar subjects for subject in (sorted_subject_similarity): if subject[0] != given_subject_id: metadata = dim_reduction.get_metadata("id", list([subject[0]])) similarity_scores.append(subject[1]) image_list_for_similar_subject = list( set(metadata["imageName"].tolist()).intersection( set(img_set["imageId"].tolist()))) image_list_for_one_similar_subject_abs_path = [] for image in image_list_for_similar_subject: image_list_for_one_similar_subject_abs_path.append( (os.path.join(folder_path, image))) image_list_for_similar_subjects_abs_path.append( image_list_for_one_similar_subject_abs_path) similar_subject_count -= 1 if (similar_subject_count <= 0): break # Create image list for given subject image_list_for_given_subject_abs_path = [] # pick 5 images of given subject at random from master dataset for image in image_list_for_given_subject: image_list_for_given_subject_abs_path.append( os.path.abspath(os.path.join(master_folder, image))) output_path = os.path.abspath(os.path.join("output")) if not os.path.exists(output_path): os.makedirs(output_path) fig_filename = os.path.join( output_path, "task6_{0}_{1}_{2}_{3}_{4}.png".format(feature_extraction_model, dimension_reduction_model, str(k_value), dist_func, given_subject_id)) # show images on a plot imgvwr.show_subjectwise_images(given_subject_id, image_list_for_given_subject_abs_path, list_subjects, image_list_for_similar_subjects_abs_path, fig_filename) print("\nTime taken for task 6: {}\n".format(time.time() - starttime))
def main(): """Main function for the script""" feature_extraction_model = "HOG" # feature_extraction_models = ["CM", "HOG"] feature_extraction_model_1 = "CM" dimension_reduction_model = "PCA" k_value = 10 dim_k_value = 40 # K_value = 20 # lab_folder = "Dataset3/Labelled/Set1" # unlab_folder = "Dataset3/Unlabelled/Set 2" lab_folder = get_input_folder("Labelled Folder") unlab_folder = get_input_folder("Classify") start = time.time() # ================================================================================================================ # labelled Images dim_red = DimensionReduction(feature_extraction_model, dimension_reduction_model, dim_k_value, folder_metadata=lab_folder, metadata_collection="labelled") obj_feat_lab = dim_red.get_object_feature_matrix() features_list_lab = np.array(obj_feat_lab['featureVector'].tolist()) images_list_lab = np.array(obj_feat_lab['imageId']) # filtering the labelled set dorsal_list, palmar_list = filter_images_by_label(images_list_lab) # unlabelled images dim_red = DimensionReduction(feature_extraction_model, dimension_reduction_model, dim_k_value, folder_metadata=unlab_folder, metadata_collection="unlabelled") obj_feat_unlab = dim_red.get_object_feature_matrix() features_list_unlab = np.array(obj_feat_unlab['featureVector'].tolist()) images_list_unlab = np.array(obj_feat_unlab['imageId']) # ================================================================================================================ # labelled Images dim_red = DimensionReduction(feature_extraction_model_1, dimension_reduction_model, dim_k_value, folder_metadata=lab_folder, metadata_collection="labelled") obj_feat_lab_1 = dim_red.get_object_feature_matrix() features_list_lab_1 = np.array(obj_feat_lab_1['featureVector'].tolist()) # images_list_lab = np.array(obj_feat_lab_1['imageId']) # filtering the labelled set # unlabelled images dim_red = DimensionReduction(feature_extraction_model_1, dimension_reduction_model, dim_k_value, folder_metadata=unlab_folder, metadata_collection="unlabelled") obj_feat_unlab_1 = dim_red.get_object_feature_matrix() features_list_unlab_1 = np.array( obj_feat_unlab_1['featureVector'].tolist()) # images_list_unlab = np.array(obj_feat_unlab['imageId']) features_list_lab = np.concatenate( (features_list_lab, features_list_lab_1), axis=1) features_list_unlab = np.concatenate( (features_list_unlab, features_list_unlab_1), axis=1) # ================================================================================================================ dorsal_list, palmar_list = filter_images_by_label(images_list_lab) features_list = np.concatenate((features_list_lab, features_list_unlab)) images_list = np.concatenate((images_list_lab, images_list_unlab)) images_list = list(images_list) # Finding Similarity Matrix cos_sim = cosine_similarity(features_list) sim_graph = np.empty((0, len(cos_sim))) for row in cos_sim: k_largest = np.argsort(-np.array(row))[1:k_value + 1] sim_graph_row = [d if i in k_largest else 0 for i, d in enumerate(row)] sim_graph = np.append(sim_graph, np.array([sim_graph_row]), axis=0) row_sums = sim_graph.sum(axis=1) sim_graph = sim_graph / row_sums[:, np.newaxis] idx = 0 results_dorsal = ppr(sim_graph, images_list, dorsal_list) results_palmar = ppr(sim_graph, images_list, palmar_list) final_results = {} for img in images_list_unlab: if results_dorsal[img] < results_palmar[img]: final_results[img] = "dorsal" else: final_results[img] = "palmar" actual_labels = fetch_actual_labels(images_list_unlab) print("Classification") no_correct = 0 correctly_classified = [] incorrectly_classified = [] print("| ImageId | Prediction | Actual |") for r in final_results: print("| {} | {} | {} |".format(r, final_results[r], actual_labels[r])) if final_results[r] == actual_labels[r]: correctly_classified.append(r) no_correct += 1 else: incorrectly_classified.append(r) print("Correctly classified: {}\n".format(correctly_classified)) print("InCorrectly classified: {}\n".format(incorrectly_classified)) print("Classification Accuracy: {}%".format(no_correct / len(images_list_unlab) * 100)) print("Execution time: {} seconds".format(time.time() - start))
def main(): """Main function for the script""" start = time.time() feature_extraction_model = "HOG" # feature_extraction_models = ["CM", "HOG"] feature_extraction_model_1 = "CM" dimension_reduction_model = "PCA" k_value = 5 dim_k_value = 40 # K_value = 20 lab_folder = "Dataset3/Labelled/Set1" unlab_folder = "Dataset3/Unlabelled/Set 2" # ================================================================================================================ # labelled Images dim_red = DimensionReduction(feature_extraction_model, dimension_reduction_model, dim_k_value, folder_metadata=lab_folder, metadata_collection="labelled") obj_feat_lab = dim_red.get_object_feature_matrix() features_list_lab = np.array(obj_feat_lab['featureVector'].tolist()) images_list_lab = np.array(obj_feat_lab['imageId']) # filtering the labelled set dorsal_list, palmar_list = filter_images_by_label(images_list_lab) # unlabelled images dim_red = DimensionReduction(feature_extraction_model, dimension_reduction_model, dim_k_value, folder_metadata=unlab_folder, metadata_collection="unlabelled") obj_feat_unlab = dim_red.get_object_feature_matrix() # features_list_unlab = np.array(obj_feat_unlab['featureVector'].tolist()) images_list_unlab = np.array(obj_feat_unlab['imageId']) # ================================================================================================================ # labelled Images dim_red = DimensionReduction(feature_extraction_model_1, dimension_reduction_model, dim_k_value, folder_metadata=lab_folder, metadata_collection="labelled") obj_feat_lab_1 = dim_red.get_object_feature_matrix() features_list_lab_1 = np.array(obj_feat_lab_1['featureVector'].tolist()) # images_list_lab = np.array(obj_feat_lab_1['imageId']) # filtering the labelled set # # unlabelled images # dim_red = DimensionReduction(feature_extraction_model_1, dimension_reduction_model, dim_k_value, # folder_metadata=unlab_folder, # metadata_collection="unlabelled") # obj_feat_unlab_1 = dim_red.get_object_feature_matrix() # features_list_unlab_1 = np.array(obj_feat_unlab_1['featureVector'].tolist()) # # images_list_unlab = np.array(obj_feat_unlab['imageId']) # ================================================================================================================ features_list_lab = np.concatenate( (features_list_lab, features_list_lab_1), axis=1) # features_list_unlab = np.concatenate((features_list_unlab, features_list_unlab_1), axis=1) # mongo_wrap = MongoWrapper() # res = mongo_wrap.find(feature_extraction_model.lower(), {"imageId": que}) # res1 = mongo_wrap.find(feature_extraction_model_1.lower(), ) dorsal_list, palmar_list = filter_images_by_label(images_list_lab) # features_list = np.concatenate((features_list_lab, features_list_unlab)) # print(features_list.shape) feature_list = features_list_lab # images_list = np.concatenate((images_list_lab, images_list_unlab)) # images_list = list(images_list) # Finding Similarity Matrix mongo_wrap = MongoWrapper() final_results = {} for img in images_list_unlab: # print(len(images_list_lab)) images_list = np.concatenate((images_list_lab, [img])) # print(images_list) res = mongo_wrap.find(feature_extraction_model.lower(), {"imageId": img})[0] res1 = mongo_wrap.find(feature_extraction_model_1.lower(), {"imageId": img})[0] feature_query = np.concatenate( (np.array(res["featureVector"]), np.array(res1["featureVector"]))) features_list = np.vstack((feature_list, feature_query)) cos_sim = cosine_similarity(features_list) sim_graph = np.empty((0, len(cos_sim))) for row in cos_sim: k_largest = np.argsort(-np.array(row))[1:k_value + 1] # sim_graph_row = [d if i in k_largest else 0 for i, d in enumerate(row)] sim_graph_row = [ d if i in k_largest else 0 for i, d in enumerate(row) ] sim_graph = np.append(sim_graph, np.array([sim_graph_row]), axis=0) row_sums = sim_graph.sum(axis=1) sim_graph = sim_graph / row_sums[:, np.newaxis] idx = 0 results = ppr(sim_graph, images_list, [img]) dorsal_count = 0 palmar_count = 0 # print("{}: {}".format(img, results)) for r in results: if r != img: # print("{} {}".format(" " * 10, r)) if r in dorsal_list: dorsal_count += 1 elif r in palmar_list: palmar_count += 1 if dorsal_count + palmar_count >= 5: if dorsal_count > palmar_count: final_results[img] = "dorsal" else: final_results[img] = "palmar" break # results_dorsal = ppr(sim_graph, images_list, dorsal_list) # results_palmar = ppr(sim_graph, images_list, palmar_list) # for img in images_list_unlab: # if results_dorsal[img] < results_palmar[img]: # final_results[img] = "dorsal" # else: # final_results[img] = "palmar" actual_labels = fetch_actual_labels(final_results.keys()) print("Classification") no_correct = 0 print(len(final_results)) for r in final_results: print("Image Id: {}, Label:{} Actual Label: {}".format( r, final_results[r], actual_labels[r])) if final_results[r] == actual_labels[r]: no_correct += 1 print("Classification Accuracy: {}".format( (no_correct / len(final_results)) * 100)) # for palm in # print("Clustering Results") # for r in results: # r["path"] = os.path.abspath(os.path.join(lab_folder, r['imageId'])) # print(r) # query_images_list = [os.path.abspath(os.path.join(folder, img)) for img in query_images] # title = {"Model": "Personalized Page Rank", "k": k_value, "K": K_value} # show_images_ppr(query_images_list, title, results) print("Execution time: {} seconds".format(time.time() - start))
def main(): k = get_input_k("C") training_set = get_input_folder("Labelled") test_set = get_input_folder("Classify") k_value = 30 dim_reduction = DimensionReduction(feature_extraction_model, dimension_reduction_model, k_value) # obj_lat, feat_lat, model = dim_reduction.execute() label = 'dorsal' obj_lat, feat_lat, model = p3task1.compute_latent_semantic_for_label( feature_extraction_model, dimension_reduction_model, label, k_value, training_set) label_p = 'palmar' obj_lat_p, feat_lat_p, model_p = p3task1.compute_latent_semantic_for_label( feature_extraction_model, dimension_reduction_model, label_p, k_value, training_set) red_dim = p3task1.reduced_dimensions_for_unlabelled_folder( feature_extraction_model, dimension_reduction_model, k_value, label, training_set, test_set) #input for project df = obj_lat[['reducedDimensions', 'imageId']] df_p = obj_lat_p[['reducedDimensions', 'imageId']] #inputt for scikit tf = obj_lat['reducedDimensions'] tf_p = obj_lat_p['reducedDimensions'] a = [] a_p = [] for x in tf: a.append(x) for x in tf_p: a_p.append(x) X = df.values Y = df_p.values # k clusters # k=5 # km = KMeans(n_clusters=k, random_state=0, n_init=30, init='k-means++', precompute_distances=True, n_jobs=-1).fit(a) km_p = KMeans(n_clusters=k, random_state=0, n_init=30, init='k-means++', precompute_distances=True, n_jobs=-1).fit(a_p) # print(km.labels_) counter = np.zeros(k) counter_p = np.zeros(k) for k_m in km.labels_: counter[k_m] += 1 # print(counter) for k_m_p in km_p.labels_: counter_p[k_m_p] += 1 # print(counter_p) # d_cluster = km.predict(red_dim['reducedDimensions'].tolist()) p_cluster = km_p.predict(red_dim['reducedDimensions'].tolist()) unlabelled_aspect = dim_reduction.get_metadata_collection( "imageName", red_dim['imageId'].tolist(), "unlabelled")['aspectOfHand'].tolist() y_test = [i.split(' ')[0] for i in unlabelled_aspect] #min max test good = 0 bad = 0 # for ind in range(len(red_dim['reducedDimensions'])): # cc_dorsal = km.cluster_centers_[d_cluster[ind]] # cc_palmar = km_p.cluster_centers_[p_cluster[ind]] # dist_dorsal = np.linalg.norm(red_dim['reducedDimensions'][ind]-cc_dorsal) # dist_palmar = np.linalg.norm(red_dim['reducedDimensions'][ind]-cc_palmar) # if dist_dorsal<dist_palmar: # #print(red_dim['imageId'][ind], label, y_test[ind]) # if y_test[ind] == label: # good +=1 # else: # bad+=1 # else: # #print(red_dim['imageId'][ind], 'palmar', y_test[ind]) # if y_test[ind] == label_p: # good +=1 # else: # bad+=1 # print ("good",good) # print("bad",bad) # km.score() def kmeans_implementation(X): random = np.random.choice(len(X), size=k, replace=False) centroid = {} classes = {} classes2 = {} # for cen in range(k): # for im in range(0,len(X)): # distance=[np.linalg.norm(np.asarray(X[im][0]) - np.asarray(centroid[0])))] for i in range(k): centroid[i] = X[random[i]][0] for iter in range(500): for i in range(k): classes[i] = [] classes2[i] = [] distance = [] for x in X: # print(x[1]) distance = [ np.linalg.norm( np.asarray(x[0]) - np.asarray(centroid[ind])) for ind in range(len(centroid)) ] classification = distance.index(min(distance)) classes[classification].append(x) classes2[classification].append(x[0]) previous = dict(centroid) for classification in classes2: centroid[classification] = np.average(classes2[classification], axis=0) opti = 0 for c in centroid: og_c = previous[c] current = centroid[c] if (np.array_equal(current, og_c)): opti += 1 if (opti == (k)): # print(iter) break return classes, centroid classes, centroid = kmeans_implementation(X) classes_p, centroid_p = kmeans_implementation(Y) #predict loop red_dimension is the query folder def predict_class(red_dim, centroid): query_classes = {} for i in range(k): query_classes[i] = [] for ind in range(len(red_dim['reducedDimensions'])): cluster_distance = [] cluster_distance = [ np.linalg.norm(red_dim['reducedDimensions'][ind] - np.asarray(centroid[q])) for q in range(len(centroid)) ] query_classification = cluster_distance.index( min(cluster_distance)) query_classes[query_classification].append(red_dim['imageId'][ind]) return query_classes query_classes_dorsal = predict_class(red_dim, centroid) query_classes_palmar = predict_class(red_dim, centroid) correct = 0 wrong = 0 def centroid_mean(centroid): res_list = [0] * k_value mean_centroid = [] for i in range(k): res_list = [a + b for a, b in zip(res_list, centroid[i])] for x in res_list: mean_centroid.append(x / k) return mean_centroid mean_centroid_dorsal = centroid_mean(centroid) mean_centroid_palmar = centroid_mean(centroid_p) dorsal_images = [] palmar_images = [] for ind in range(len(red_dim['reducedDimensions'])): image_center_dorsal = 0 image_center_palmar = 0 image_name = red_dim['imageId'][ind] for i in range(k): if (image_name in query_classes_dorsal[i]): image_center_dorsal = i if (image_name in query_classes_palmar[i]): image_center_palmar = i dorsal_distance = np.linalg.norm(red_dim['reducedDimensions'][ind] - centroid[image_center_dorsal]) palmar_distance = np.linalg.norm(red_dim['reducedDimensions'][ind] - centroid_p[image_center_palmar]) if dorsal_distance < palmar_distance: #print(red_dim['imageId'][ind], label, y_test[ind])´ dorsal_images.append(red_dim['imageId'][ind]) if y_test[ind] == label: correct += 1 else: wrong += 1 else: #print(red_dim['imageId'][ind], 'palmar', y_test[ind]) palmar_images.append(red_dim['imageId'][ind]) if y_test[ind] == label_p: correct += 1 else: wrong += 1 print("correct" + str(correct)) print("wrong" + str(wrong)) print("\nClick here: http://localhost:{0}/result\n".format(port_g)) print("\nClick here: http://localhost:{0}/dorsal\n".format(port_g)) print("\nClick here: http://localhost:{0}/palmar\n".format(port_g)) # APP_ROOT = os.path.dirname(os.path.abspath(__file__)) @app.route('/Dataset2/<filename>') def send_image(filename): return send_from_directory((training_set), filename) @app.route('/test_set/<filename>') def send_image_result(filename): return send_from_directory((test_set), filename) @app.route('/dorsal') def get_gallery(): image_names = [classes, k] return render_template("demo.html", image_names=image_names) @app.route('/palmar') def get_gallery_p(): image_names_p = [classes_p, k] return render_template("demo_p.html", image_names_p=image_names_p) @app.route('/result') def get_gallery_result(): results = [dorsal_images, palmar_images] return render_template("task2.html", results=results) app.run(port=port_g)