def main (k, m="means", init_type="random"): # Starting clustering timer start_cluster = timeit.default_timer() # Initialize clusters if init_type == "random": initial_clusters = Initialize.random_centers(k) else: init_type = "kplusplus" initial_clusters = Initialize.kmeans_plusplus(k, train_images_flat,\ dist_fn=Distance.sumsq) # Run clustering algorithm final_responsibilities, final_clusters = Kmeans.kmeans(k,train_images_flat, initial_clusters, distfn = Distance.sumsq, method=m) # Find and print clustering time end_cluster = timeit.default_timer() clustering_time = end_cluster - start_cluster print "Time spent clustering : ", clustering_time # Save representative images to file. title = m + "_" + init_type + "_cluster" + str(k) File.save_images(k, train_images, final_responsibilities, final_clusters, title) ########################################################################### # Calculate Accuracy # ########################################################################### # Calculate final accuracy for clusters final, cluster_set = Accuracy.final_accuracy(final_responsibilities, train_labels, train_images_flat, final_clusters) # Now see how well we can classify the dataset start_cluster_test = timeit.default_timer() predictions = ClassifyClusters.classify(cluster_set, test_images_flat, test_labels, distfn = Distance.sumsq) finish_cluster_test = timeit.default_timer() # find time it took to test testing_time = finish_cluster_test - start_cluster_test print "Time spent testing : ", testing_time ########################################################################### # Outputs # ########################################################################### # k, prediction level, cluster_set, results = {"k" : k, "prediction_accuracy" : predictions[1], "cluster_means" : cluster_set, "cluster_stats" : final, "clustering_time" : clustering_time, "testing_time" : testing_time} with open('./results/' + title + '/' + title + '_results.json', 'w') as outfile: json.dump(results, outfile, cls=File.NumpyEncoder)