Exemplo n.º 1
0
def run_kmeans():
    # print(len(argv))
    if len(argv) < 4:
        print(
            'Not enough arguments provided. Please provide 3 arguments: K, num_iterations, path_to_input'
        )
        exit(1)
    k = int(argv[1])
    num_iterations = int(argv[2])
    input_path = argv[3]
    if len(argv) == 5:
        random_seed = int(argv[4])
    else:
        random_seed = 0

    if k <= 1 or num_iterations <= 0:
        print('Please provide correct parameters')
        exit(1)
    if not os.path.exists(input_path):
        print('Input file does not exist')
        exit(1)

    points = load_data(input_path)
    if k >= len(points):
        print('Please set K less than size of dataset')
        exit(1)

    runner = KMeans(k, num_iterations)
    runner.run(points, random_seed)
    runner.print_results()
Exemplo n.º 2
0
def run_kmeans():
    list_seed = [1, 1, 1, 12, 12, 12]
    list_k = [3, 4, 5, 3, 4, 5]

    print("seed k sl1")

    for index, value_seed in enumerate(list_seed):
        k = list_k[index]
        num_iterations = 10
        input_path = "colors_dataset_ready.txt"
        random_seed = value_seed

        if k <= 1 or num_iterations <= 0:
            print('Please provide correct parameters')
            exit(1)
        if not os.path.exists(input_path):
            print('Input file does not exist')
            exit(1)

        points = load_data(input_path)
        if k >= len(points):
            print('Please set K less than size of dataset')
            exit(1)

        runner = KMeans(k, num_iterations)
        runner.run(points, random_seed)
        print(list_seed[index], end=" ")
        print(list_k[index], end=" ")
        runner.print_results()
Exemplo n.º 3
0
 def run_test(self):
     test = KMeans(self.k, self.num_of_iters)
     total_sse = []
     total_sum = 0
     for seed in range(self.max_seeds):
         test.run(self.points, seed)
         sse = test.compute_sse()
         total_sse.append(sse)
         total_sum += sse
     minimal_sse = min(total_sse)
     mean_sse = total_sum / 10
     maximal_sse = max(total_sse)
     return [
         f"0-{self.max_seeds - 1}", self.k, self.num_of_iters, minimal_sse,
         mean_sse, maximal_sse
     ]
Exemplo n.º 4
0
class ClusteringGui(QMainWindow):
    # ui
    layout: QVBoxLayout  # Main Layout in which everything else is contained
    browse_btn: QPushButton  # Load Dataset button
    k_selector: QSpinBox  # Selector for k value
    repetitions_selector: QSpinBox  # Select how many times K-Means is ran per dataset
    run_btn: QPushButton  # Run button
    step_btn: QPushButton  # Next button
    dimensions_label: QLabel  # Filled with vector dimensions found from dataset
    recommended_k_label: QLabel  # Filled with recommended K value as calculated using SSE / Elbow method
    plot_canvas: FigureCanvas  # Canvas containing matplotlib plot

    path: str  # Path of current loaded dataset
    kmeans: KMeans
    figure: plt.Figure
    elbow_chart: QWidget

    def __init__(self):
        self.kmeans = KMeans()
        super(ClusteringGui, self).__init__()
        uic.loadUi(main_interface_file, self)

        self.browse_btn = self.findChild(QPushButton, 'browse_button')
        self.browse_btn.clicked.connect(self.on_browse_click)
        self.k_selector = self.findChild(QSpinBox, 'k_val_selector')
        self.k_selector.valueChanged.connect(self.on_update_k)
        self.repetitions_selector = self.findChild(QSpinBox, 'k_repetitions_selector')
        self.repetitions_selector.valueChanged.connect(self.on_set_repetitions)
        self.run_btn = self.findChild(QPushButton, 'run_button')
        self.run_btn.clicked.connect(self.on_run_click)
        self.run_btn.setEnabled(False)
        self.step_btn = self.findChild(QPushButton, 'step_button')
        self.step_btn.clicked.connect(self.on_step_click)
        self.step_btn.setEnabled(False)
        self.elbow_btn = self.findChild(QPushButton, 'elbow_chart_button')
        self.elbow_btn.clicked.connect(self.on_show_elbow)
        self.elbow_btn.setEnabled(False)
        self.layout = self.findChild(QVBoxLayout, 'layout')
        self.dimensions_label = self.findChild(QLabel, 'dimensions_label')
        self.dimensions_label.setText("")

        self.show()

    @pyqtSlot()
    def on_browse_click(self):
        self.path, _ = QFileDialog.getOpenFileName(self, "Open Dataset", "", "CSV Files (*.csv)")
        try:
            self.kmeans.open_dataset(filepath=self.path)
            self.dimensions_label.setText("Dimensions: {}".format(self.kmeans.dimensions))
            self.add_matplotlib_canvas()
            self.setWindowTitle('K-Means Clustering: {}'.format(self.path))
            self.elbow_btn.setEnabled(True)
            self.run_btn.setEnabled(True)
            self.step_btn.setEnabled(True)
        except FileNotFoundError:
            print("Exception")
            error_dialog = QErrorMessage()
            error_dialog.showMessage("File Not Found, try again")
            error_dialog.exec()

    @pyqtSlot()
    def on_show_elbow(self):
        sse = []
        # Calculate SSE for values of K ranging from 1 to 10.
        for i in range(1,10):
            print("Calculating SSE for K =", i)
            self.kmeans.update_k(i)
            # Cluster and update centroids 3 times for each K value
            self.kmeans.cluster_points()
            self.kmeans.cluster_points()
            self.kmeans.cluster_points()
            sse.append(self.kmeans.calculate_sse())

        self.elbow_chart = ElbowChartGui()
        self.elbow_chart.plot(sse, self.path)

    # Handles K value being changed
    @pyqtSlot()
    def on_update_k(self):
        self.kmeans.update_k(self.k_selector.value())
        # If there's data being displayed, update it now
        if self.kmeans.data_displayed:
            self.update_matplotlib()

    @pyqtSlot()
    def on_run_click(self):
        self.kmeans.run()
        self.update_matplotlib()

    @pyqtSlot()
    def on_set_repetitions(self):
        self.kmeans.repetitions = self.repetitions_selector.value()

    # Handles 'Next' button being pressed
    @pyqtSlot()
    def on_step_click(self):
        self.kmeans.step()
        self.update_matplotlib()

    # Adds the matplotlib canvas to the UI
    def add_matplotlib_canvas(self):
        try:
            self.figure.clear()
        except AttributeError:
            self.figure = plt.figure()
            self.plot_canvas = FigureCanvas(self.figure)
            self.layout.addWidget(self.plot_canvas)

        ax = self.figure.add_subplot(1, 1, 1)

        x = [point[0] for point in self.kmeans.vectors]
        y = [point[1] for point in self.kmeans.vectors]
        ax.scatter(x, y, alpha=0.8)

        self.plot_canvas.draw()

    # Updates the matplotlib UI
    def update_matplotlib(self):
        self.figure.clear()

        ax = self.figure.add_subplot(1, 1, 1)

        for cluster in self.kmeans.clusters:
            x = [point[0] for point in cluster]
            y = [point[1] for point in cluster]
            ax.scatter(x, y, alpha=0.5)

        for centroid in self.kmeans.centroids:
            x, y = centroid[0], centroid[1]
            ax.scatter(x, y, c='k', alpha=0.8)

        self.plot_canvas.draw()
Exemplo n.º 5
0
def itirate_kmeans(k, num_iterations, random_seed, points):
    runner = KMeans(k, num_iterations)
    runner.run(points, random_seed)
    runner.print_distance(random_seed)
Exemplo n.º 6
0
    # Plot initial map
    plot = Plot()
    plot.plot(data_map, False)

    # Run simulated annealing experiments
    for i in range(1, 5):
        print 'Running simulated annealing for type %d:' % i

        SA = SimulatedAnnealing(i)

        new_map = SA.anneal(data_map)
        cost = SA_final.cost(new_map)
        print 'Cost: %d' % cost
        averages[i] += cost
        plot.plot(new_map, False, i)

    # Run k-means
    dropoff_zones = data_map.dropoff_zones.keys()
    K = len(dropoff_zones)
    locations = data_map.coordinates.keys()

    k_means = KMeans(K, dropoff_zones, locations, data_map)

    print 'Running k-means clustering:'
    kmeans_cost = k_means.run()
    print 'Cost: %d' % kmeans_cost
    averages[5] += kmeans_cost

# Print average costs
print map(lambda x: x / num_iterations, averages)