def main(): relevant_images = set() irrelevant_images = set() prev_results = None task5_query, prev_results = get_task5_results() if not task5_query or not prev_results: print("Please run Task 5 first. Exiting.") sys.exit(0) meta = fetch_image_meta(prev_results) images_to_display = len(prev_results) while True: feedback_system = take_feedback_system_input() relevant_images = relevant_images.union( take_images_input("relevant", meta)) irrelevant_images = irrelevant_images.union( take_images_input("irrelevant", meta)) if not relevant_images and not irrelevant_images: print( "No relevant images or irrelevant images provided! Doing nothing." ) continue new_relevant_images = feedback_systems[feedback_system]( list(relevant_images), list(irrelevant_images), images_to_display, task5_query, prev_results) write_to_file("task6.html", "task6-{}.html".format(feedback_system), relevant=relevant_images, irrelevant=irrelevant_images, result=new_relevant_images, title="TEST") prev_results = new_relevant_images
# Mask all numbers below nth largest to 0 img_img[img_img < nth[:, None]] = 0 # Softmax to make all edges add upto 1, so as to interpret edge weight as # probabilities img_img = (img_img.T / img_img.sum(axis=1)).T seed_vector = np.array([ 1 / len(query_images) if img in query_images else 0 for idx, img in enumerate(images) ]) if args.math: steady_state = math_method(img_img, alpha, seed_vector) else: steady_state, num_iter = power_iteration(img_img, alpha, seed_vector) print("Converged after {} iterations".format(num_iter)) #image_indices = np.flip(steady_state.argsort())[:args.k_dominant] image_indices = np.flip(steady_state.argsort())[:args.k_dominant + len(query_images)] result = [(images[i], steady_state[i]) for i in image_indices if images[i] not in query_images][:args.k_dominant] output.write_to_file( "task3.html", "task3-{}-{}-{}.html".format(args.k_edges, args.k_dominant, '-'.join(args.image_ids)), ranks=result, keys=query_images, title="TEST")
dorsalI = dorsalI + cos_similarity(row,row1) #dot(row, row1)/(norm(row)*norm(row1)) dorsal.append(dorsalI) # Calculate Cosine similarity with every test image with every palmar image and store them in 'palmar' list for row2 in reduced_test_data: row2 = row2 * test_variance_ratio palmarI = 0 for row3 in reduced_palmar_vectors: row3 = row3 * palmar_variance_ratio palmarI = palmarI + cos_similarity(row2,row3) #dot(row2, row3)/(norm(row2)*norm(row3)) palmar.append(palmarI) # Calculation of accuracy scores p_label = [] j=0 for i in range(len(reduced_test_data)): p_label.append(0) if palmar[i] < dorsal[i] else p_label.append(1) # Change 1 to Dorsal and 0 to Palmar for Visualization purpose final_list = [] final_list = ["Dorsal" if i == 1 else "Palmar" for i in p_label] # Code to visualise the output. Check browser or output folder. write_to_file("task4.html", "task1-{}.html".format(k_each), predictions=zip(test_data_paths,final_list), title="Task1")
return zip(u_images, prediction) def svm_driver(args, evaluate=False): model = settings.SVM.CLASSIFIER.MODEL k = settings.SVM.CLASSIFIER.K frt = settings.SVM.CLASSIFIER.FRT image_paths, pred = run_svm(evaluate, model, k, frt) return zip(image_paths, pred) classifiers = { 'ppr': ppr_driver, 'decision': decision_tree_driver, 'svm': svm_driver } if __name__ == "__main__": parser = prepare_parser() args = parser.parse_args() predictions = classifiers[args.classifier](args, args.evaluate) output.write_to_file("task4.html", "task4-{}.html".format(args.classifier), predictions=[( item[0], "palmar" if item[1] == 1.0 else "dorsal", ) for item in predictions], title="TEST")
#Get centroids and centroid_labels for dorsal and palmar vectors print("Clustering dorsal vectors") dorsal_kmeans = Kmeans(dorsal_vectors, n_clusters) dorsal_kmeans.cluster() print("Clustering Palmar vectors") palmar_kmeans = Kmeans(palmar_vectors, n_clusters) palmar_kmeans.cluster() #compare distance to dorsal and palmar centroid to label vec_func = np.vectorize(predict_label) labels = [ predict_label(each, dorsal_kmeans, palmar_kmeans) for each in test_data ] write_to_file("task4.html", "task2-{}.html".format(n_clusters), predictions=zip(test_data_paths, labels), title="TEST") #write cluster image paths to HTML temp = pd.DataFrame(list(zip(dorsal_paths, dorsal_kmeans.closest)), columns=['path', 'cluster']) dorsal_clusters = list( temp.groupby(['cluster'])['path'].apply(lambda x: x.values.tolist())) temp = pd.DataFrame(list(zip(palmar_paths, palmar_kmeans.closest)), columns=['path', 'cluster']) palmar_clusters = list( temp.groupby(['cluster'])['path'].apply(lambda x: x.values.tolist())) write_to_file("clusters.html", "cluster.html", dorsal_clusters=dorsal_clusters, palmar_clusters=palmar_clusters,
sub_to_idx = {sub: idx for idx, sub in enumerate(subs)} # index to sub id idx_to_sub = [0] * len(sub_to_idx) for sub in sub_to_idx: idx_to_sub[sub_to_idx[sub]] = sub # A subject subject similarity index sub_sub = np.zeros((len(subs), len(subs),)) for sub1 in sub_to_idx: for sub2 in sub_to_idx: sub_sub[sub_to_idx[sub1], sub_to_idx[sub2]] = img_img[subs[sub1],:].take(subs[sub2], axis=1).mean() w, _, h = reducer(sub_sub, args.k_latent_semantics, "nmf") # Print term weigth pairs get_term_weight_pairs(w, "task7_{}.csv".format(args.k_latent_semantics)) sub_weight = [ sorted([("z{}".format(idx), weight,) for idx, weight in enumerate(row)], key=lambda x: x[1]) for row in w ] output.write_to_file("visualize_task7.html", "task7-{}.html".format(args.k_latent_semantics), vectors=sub_weight, subs=subs, idx_to_sub=idx_to_sub, images=images, sub_meta=sub_meta, title="TEST")
query = str(data_path / query) index = images.index(query) query_vec = data_matrix[index] results = query_relevant_images(query_vec, t, layers, planes_per_layer, data_matrix, images) images = results[0] member_count = results[1] unique_member_count = results[2] image_paths = [] all_images = [] for img in images: image_paths.append([img[0].split('/')[-1], img[0]]) all_images.append(img[0]) print("Total no. of unique images considered: ", unique_member_count) print("Total no. of overall images: ", member_count) #Store output for task 5 store_output(query, all_images) output.write_to_file("task5.html", f"task5-{to_output}.html", key=query, items=image_paths, title="Task5")
except KeyError: raise Exception("Invalid metadata detected") vectors, eigen_values, latent_vs_old = reducer(img_meta, args.k_latent_semantics, "nmf") get_term_weight_pairs(vectors, "task8_{}.csv".format(args.k_latent_semantics)) get_term_weight_pairs(latent_vs_old, "task8_{}.csv".format(args.k_latent_semantics)) # Extra Credit # image path with a vector in the latent semantic space data_z = zip(images, vectors) # image path for each latenet semantic in h feature_z = [(idx, images[np.argmax(np.dot(img_meta, i))]) for idx, i in enumerate(latent_vs_old)] output.write_to_file("visualize_data_z.html", "task8-data-z-{}.html".format( args.k_latent_semantics), data_z=data_z, title="TEST") output.write_to_file("visualize_feat_z.html", "task8-feat-z-{}.html".format( args.k_latent_semantics), feature_z=feature_z, title="TEST")