def run_kmeans(): # print(len(argv)) if len(argv) < 4: print( 'Not enough arguments provided. Please provide 3 arguments: K, num_iterations, path_to_input' ) exit(1) k = int(argv[1]) num_iterations = int(argv[2]) input_path = argv[3] if len(argv) == 5: random_seed = int(argv[4]) else: random_seed = 0 if k <= 1 or num_iterations <= 0: print('Please provide correct parameters') exit(1) if not os.path.exists(input_path): print('Input file does not exist') exit(1) points = load_data(input_path) if k >= len(points): print('Please set K less than size of dataset') exit(1) runner = KMeans(k, num_iterations) runner.run(points, random_seed) runner.print_results()
def run_kmeans(): list_seed = [1, 1, 1, 12, 12, 12] list_k = [3, 4, 5, 3, 4, 5] print("seed k sl1") for index, value_seed in enumerate(list_seed): k = list_k[index] num_iterations = 10 input_path = "colors_dataset_ready.txt" random_seed = value_seed if k <= 1 or num_iterations <= 0: print('Please provide correct parameters') exit(1) if not os.path.exists(input_path): print('Input file does not exist') exit(1) points = load_data(input_path) if k >= len(points): print('Please set K less than size of dataset') exit(1) runner = KMeans(k, num_iterations) runner.run(points, random_seed) print(list_seed[index], end=" ") print(list_k[index], end=" ") runner.print_results()
def run_test(self): test = KMeans(self.k, self.num_of_iters) total_sse = [] total_sum = 0 for seed in range(self.max_seeds): test.run(self.points, seed) sse = test.compute_sse() total_sse.append(sse) total_sum += sse minimal_sse = min(total_sse) mean_sse = total_sum / 10 maximal_sse = max(total_sse) return [ f"0-{self.max_seeds - 1}", self.k, self.num_of_iters, minimal_sse, mean_sse, maximal_sse ]
class ClusteringGui(QMainWindow): # ui layout: QVBoxLayout # Main Layout in which everything else is contained browse_btn: QPushButton # Load Dataset button k_selector: QSpinBox # Selector for k value repetitions_selector: QSpinBox # Select how many times K-Means is ran per dataset run_btn: QPushButton # Run button step_btn: QPushButton # Next button dimensions_label: QLabel # Filled with vector dimensions found from dataset recommended_k_label: QLabel # Filled with recommended K value as calculated using SSE / Elbow method plot_canvas: FigureCanvas # Canvas containing matplotlib plot path: str # Path of current loaded dataset kmeans: KMeans figure: plt.Figure elbow_chart: QWidget def __init__(self): self.kmeans = KMeans() super(ClusteringGui, self).__init__() uic.loadUi(main_interface_file, self) self.browse_btn = self.findChild(QPushButton, 'browse_button') self.browse_btn.clicked.connect(self.on_browse_click) self.k_selector = self.findChild(QSpinBox, 'k_val_selector') self.k_selector.valueChanged.connect(self.on_update_k) self.repetitions_selector = self.findChild(QSpinBox, 'k_repetitions_selector') self.repetitions_selector.valueChanged.connect(self.on_set_repetitions) self.run_btn = self.findChild(QPushButton, 'run_button') self.run_btn.clicked.connect(self.on_run_click) self.run_btn.setEnabled(False) self.step_btn = self.findChild(QPushButton, 'step_button') self.step_btn.clicked.connect(self.on_step_click) self.step_btn.setEnabled(False) self.elbow_btn = self.findChild(QPushButton, 'elbow_chart_button') self.elbow_btn.clicked.connect(self.on_show_elbow) self.elbow_btn.setEnabled(False) self.layout = self.findChild(QVBoxLayout, 'layout') self.dimensions_label = self.findChild(QLabel, 'dimensions_label') self.dimensions_label.setText("") self.show() @pyqtSlot() def on_browse_click(self): self.path, _ = QFileDialog.getOpenFileName(self, "Open Dataset", "", "CSV Files (*.csv)") try: self.kmeans.open_dataset(filepath=self.path) self.dimensions_label.setText("Dimensions: {}".format(self.kmeans.dimensions)) self.add_matplotlib_canvas() self.setWindowTitle('K-Means Clustering: {}'.format(self.path)) self.elbow_btn.setEnabled(True) self.run_btn.setEnabled(True) self.step_btn.setEnabled(True) except FileNotFoundError: print("Exception") error_dialog = QErrorMessage() error_dialog.showMessage("File Not Found, try again") error_dialog.exec() @pyqtSlot() def on_show_elbow(self): sse = [] # Calculate SSE for values of K ranging from 1 to 10. for i in range(1,10): print("Calculating SSE for K =", i) self.kmeans.update_k(i) # Cluster and update centroids 3 times for each K value self.kmeans.cluster_points() self.kmeans.cluster_points() self.kmeans.cluster_points() sse.append(self.kmeans.calculate_sse()) self.elbow_chart = ElbowChartGui() self.elbow_chart.plot(sse, self.path) # Handles K value being changed @pyqtSlot() def on_update_k(self): self.kmeans.update_k(self.k_selector.value()) # If there's data being displayed, update it now if self.kmeans.data_displayed: self.update_matplotlib() @pyqtSlot() def on_run_click(self): self.kmeans.run() self.update_matplotlib() @pyqtSlot() def on_set_repetitions(self): self.kmeans.repetitions = self.repetitions_selector.value() # Handles 'Next' button being pressed @pyqtSlot() def on_step_click(self): self.kmeans.step() self.update_matplotlib() # Adds the matplotlib canvas to the UI def add_matplotlib_canvas(self): try: self.figure.clear() except AttributeError: self.figure = plt.figure() self.plot_canvas = FigureCanvas(self.figure) self.layout.addWidget(self.plot_canvas) ax = self.figure.add_subplot(1, 1, 1) x = [point[0] for point in self.kmeans.vectors] y = [point[1] for point in self.kmeans.vectors] ax.scatter(x, y, alpha=0.8) self.plot_canvas.draw() # Updates the matplotlib UI def update_matplotlib(self): self.figure.clear() ax = self.figure.add_subplot(1, 1, 1) for cluster in self.kmeans.clusters: x = [point[0] for point in cluster] y = [point[1] for point in cluster] ax.scatter(x, y, alpha=0.5) for centroid in self.kmeans.centroids: x, y = centroid[0], centroid[1] ax.scatter(x, y, c='k', alpha=0.8) self.plot_canvas.draw()
def itirate_kmeans(k, num_iterations, random_seed, points): runner = KMeans(k, num_iterations) runner.run(points, random_seed) runner.print_distance(random_seed)
# Plot initial map plot = Plot() plot.plot(data_map, False) # Run simulated annealing experiments for i in range(1, 5): print 'Running simulated annealing for type %d:' % i SA = SimulatedAnnealing(i) new_map = SA.anneal(data_map) cost = SA_final.cost(new_map) print 'Cost: %d' % cost averages[i] += cost plot.plot(new_map, False, i) # Run k-means dropoff_zones = data_map.dropoff_zones.keys() K = len(dropoff_zones) locations = data_map.coordinates.keys() k_means = KMeans(K, dropoff_zones, locations, data_map) print 'Running k-means clustering:' kmeans_cost = k_means.run() print 'Cost: %d' % kmeans_cost averages[5] += kmeans_cost # Print average costs print map(lambda x: x / num_iterations, averages)