def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) #data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # ************** Here we have to choose the type of clustering we want to use for visualization ******************** #cluster_list = sequential_clustering(singleton_list, 15); print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9); print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" if DESKTOP: # draw the clusters using matplotlib or simplegui alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) cluster_list = hierarchical_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = closest_pairs_and_clustering_algorithms.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = closest_pairs_and_clustering_algorithms.kmeans_clustering( singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_question(number, data_set): """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """ global DESKTOP print "Loading data table ..." data_table = load_data_table(data_set) print "Data table loaded. Creating clusters ..." singleton_list = [] # set correct number of clusters if number in [2, 3]: num_clusters = 15 elif number in [5, 6]: num_clusters = 9 print "\nQuestion number: ", number print "Number of clusters to be calculated: ", num_clusters # parse data_table into cluster objects for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) print "\nCluster list created. Passing list to hierarchical_clustering ..." # calculate clusters if number == 0: cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" elif number in [2, 5]: cluster_list = cpf.hierarchical_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "hierarchical clusters" elif number in [3, 6]: cluster_list = cpf.kmeans_clustering(singleton_list, num_clusters, 5) print "Displaying", len(cluster_list), "k-means clusters" else: "Please pass a valid number to run_question. Valid options are 0, 2, 3, 5, or 6." # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_kmeans_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) def compute_distortion(cluster_list): error = 0 for cluster in cluster_list: error += cluster.cluster_error(data_table) return error error = [] for cluster_num in range(6, 21): cluster_list = kmeans_clustering(singleton_list, cluster_num, 5) error.append(compute_distortion(cluster_list)) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) return error print("Displaying", len(cluster_list), "kmeans clusters") # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def Question5(): data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = module3_project.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # Question 2 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_3108_URL) # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # Question 3 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_3108_URL) # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 15, 5) # print "Displaying", len(cluster_list), "k-means clusters" # Question 5 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_111_URL) # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # Question 6 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_111_URL) # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) #data_table = load_data_table(DATA_111_URL) #data_table = load_data_table(DATA_290_URL) data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "sequential clusters" #start_time = time.clock() cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "hierarchical:",compute_distortion(cluster_list) #print "time taken for plot:"+ str(time.clock() - start_time) print "Displaying", len(cluster_list), "hierarchical clusters" #start_time = time.clock() #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "kmeans: ",compute_distortion(cluster_list) #print "time taken for plot:"+ str(time.clock() - start_time) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers #print "" else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3_solution.hierarchical_clustering( singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # calculate cluster_error of hierarchical_clustering and kmeans_clustering hc_error = compute_distortion( alg_project3_solution.hierarchical_clustering(singleton_list, 9), data_table) kmc_error = compute_distortion( alg_project3_solution.kmeans_clustering(singleton_list, 9, 5), data_table) print "cluster_error:\nhierarchical_clustring: ", hc_error, "\nkmeans_clustering: ", kmc_error # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_question(number, data_set): """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """ global DESKTOP print "Loading data table ..." data_table = load_data_table(data_set) print "Data table loaded. Creating clusters ..." singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) print "Cluster list created. Passing list to hierarchical_clustering ..." if number == 0: cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" elif number in [2, 5]: cluster_list = cpf.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" elif number in [3, 6]: cluster_list = cpf.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" else: "Please pass a valid number to run_question." # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(table_type, clustering_algorithm, num_cluster, plot=True): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(table_type) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "sequential clusters" if clustering_algorithm == "h": cluster_list = alg_project3_solution.hierarchical_clustering( singleton_list, num_cluster) print "get", len(cluster_list), "hierarchical clusters" elif clustering_algorithm == "k": cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, num_cluster, 5) print "get", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if plot: if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers return (cluster_list, data_table)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_290_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16) # distortion = application3.compute_distortion(cluster_list, data_table) # print "Displaying", len(cluster_list), "hierarchical clusters, distortion:", distortion cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, 16, 5) distortion = application3.compute_distortion(cluster_list, data_table) print "Displaying", len( cluster_list), "k-means clusters, distortion:", distortion # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table('unifiedCancerData_111.csv') singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print 'hierarchical error', app3_7.compute_distortion(cluster_list, data_table) # print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, 9, 5) print 'k means error', app3_7.compute_distortion(cluster_list, data_table) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(table, method): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) #data_table = load_data_table(DATA_290_URL) data_table = load_data_table(table) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_distortion_dict = {} start = 20 end = 6 count = start new_list = list(singleton_list) while count >= end: if method == 'h_cluster': cluster_list = alg_project3_solution.hierarchical_clustering( new_list, count) cluster_distortion_dict[count] = compute_distortion( cluster_list, data_table) new_list = cluster_list elif method == 'k_cluster': cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, count, 5) cluster_distortion_dict[count] = compute_distortion( cluster_list, data_table) #new_list = cluster_list count -= 1 #print "Displaying", len(cluster_list), "hierarchical clusters" #print "Displaying", len(cluster_list), "hierarchical clusters cluster error" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" #print "Displaying", len(cluster_list), "k-means clusters cluster error" # draw the clusters using matplotlib or simplegui if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers #print compute_distortion(cluster_list, data_table) return cluster_distortion_dict else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ algo_used = 1 # 1: sequential clusters, 2: hierarchical clusters, 3: k-means clusters data_urls = [DATA_3108_URL, DATA_896_URL, DATA_290_URL, DATA_111_URL] source = 3 # pick which data source url data_table = load_data_table(data_urls[source - 1]) def clustering(algo_used, num_clusters, num_iter = 5): """ Uses specified algorithm to cluster data input: int for specified algorithm, data_table output: cluster_list """ singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) if algo_used == 1: cluster_list = sequential_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "sequential clusters" elif algo_used == 2: cluster_list = prj3.hierarchical_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "hierarchical clusters" elif algo_used == 3: cluster_list = prj3.kmeans_clustering(singleton_list, num_clusters, num_iter) print "Displaying", len(cluster_list), "k-means clusters" return cluster_list def gen_random_clusters(num_clusters): """ Creates a list of clusters where each cluster in this list corresponds to one randomly generated point in the 2 x 2 square Input: number of clusters (int) Output: list of random clusters that is num_clusters long (list) """ cluster_list = [] for cluster in xrange(num_clusters): x = random.choice([1, -1]) * random.random() y = random.choice([1, -1]) * random.random() cluster_list.append(alg_cluster.Cluster(set([]), x, y, 1, 0)) return cluster_list def question_one(): """ Function for answering first question """ xvals = range(2, 200) slow_yvals = [] fast_yvals = [] for num in xvals: cluster_list = gen_random_clusters(num) initial = time.time() answer = prj3.slow_closest_pairs(cluster_list) final = time.time() slow_yvals.append(final - initial) for num in xvals: cluster_list = gen_random_clusters(num) initial = time.time() answer = prj3.fast_closest_pair(cluster_list) final = time.time() fast_yvals.append(final - initial) slow_line = plt.plot(xvals, slow_yvals, color='r', label="Slow Closest Pair") fast_line = plt.plot(xvals, fast_yvals, color='b', label="Fast Closest Pair") plt.legend(loc=2) plt.title("Efficiency of Slow and Fast Closest Pairs Algorithms") plt.xlabel("Number of Clusters") plt.ylabel("Run Times in Milliseconds") plt.show() def compute_distortion(cluster_list): """ Takes a list of clusters and uses cluster_error to compute its distortion. input: list of clusters, original data table output: cluster distortion int """ distortion = 0 for cluster in cluster_list: distortion += cluster.cluster_error(data_table) return distortion def question_ten(): """ Function for answering question 10 """ xvals = xrange(6, 21) kmeans_y = [] high_y = [] for clusters in xvals: kmeans_y.append(compute_distortion(clustering(3, clusters))) for clusters in xvals: high_y.append(compute_distortion(clustering(2, clusters))) kmeans_line = plt.plot(xvals, kmeans_y, color='r', label="K-Means Clustering") high_line = plt.plot(xvals, high_y, color='b', label="Hierarchical Clustering") plt.legend() plt.title("Distortion Comparison Between Clustering Methods on 290 County Data Set") plt.xlabel("Number of Output Clusters") plt.ylabel("Distortion") plt.show() #question_one() #question_ten() # draw the clusters using matplotlib or simplegui cluster_list = clustering(1, 5) if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # question 5 # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # question 6 # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # question 7 # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # kmeans_result = alg_project3_solution.compute_distortion(cluster_list, data_table) # print("Displaying", kmeans_result, "kmeans_result") # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # hierarchical_result = alg_project3_solution.compute_distortion(cluster_list, data_table) # print("Displaying", hierarchical_result, "hierarchical_result") # question 10 kmeans_res = [] for clusters_number in range(6, 21): cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, clusters_number, 5) kmeans_res.append([ clusters_number, alg_project3_solution.compute_distortion(cluster_list, data_table) ]) hier_res = [] for clusters_number in range(20, 5, -1): cluster_list = alg_project3_solution.hierarchical_clustering( singleton_list, clusters_number) hier_res.append([ clusters_number, alg_project3_solution.compute_distortion(cluster_list, data_table) ]) hier_res.reverse() # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers create_separate_plots(kmeans_res, hier_res) else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers