def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20) print "Distortion of hierarchical clusters is ", str(compute_distortion(cluster_list, data_table)) print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Distortion of k-means clusters is ", str(compute_distortion(cluster_list, data_table)) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
for cluster in cluster_list: distortion += cluster.cluster_error(data_table) return distortion data_table = load_data_table(DATA_896_URL) #hierarchical_clustering distortion_hc = [] for num_cluster in range(6, 21): singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = pj3.hierarchical_clustering(singleton_list, num_cluster) distortion_hc.append(compute_distortion(cluster_list, data_table)) print "Distortion of hierarchical clusters is ", str(compute_distortion(cluster_list, data_table)) print "Displaying", len(cluster_list), "hierarchical clusters" #kmeans_clustering distortion_kc = [] for num_cluster in range(6, 21): singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = pj3.kmeans_clustering(singleton_list, num_cluster, 5) distortion_kc.append(compute_distortion(cluster_list, data_table)) print "Distortion of k-means clusters is ", str(compute_distortion(cluster_list, data_table)) print "Displaying", len(cluster_list), "k-means clusters"