def k_means(data_points, n_centroids, tot_iteration=100): centroids = kmeans.create_centroids(data_points, n_centroids) [cluster_label, new_centroids] = kmeans.iterate_k_means(data_points, centroids, tot_iteration) # kmeans.print_label_data([cluster_label, new_centroids]) return [cluster_label, new_centroids]
def kmeansModel(self): print("build the model and Get Visualization") print("Number of clusters k : ", self.e1.get()) print("Number of runs : ", self.e2.get()) if self.e1.get() == "" or self.e2.get() == "": messagebox.showinfo("Error", "One of the parameters is missing ") else: if self.bool: if self.e1.get().isdigit() and self.e2.get().isdigit(): if int(self.e1.get()) > 2 and int( self.e1.get()) < 165 and int(self.e2.get()) < 50: clustering = KMeansClustering.Clustering( self.preprocess.data_frame) clustering.activate_k_means_algorithm( int(self.e1.get()), int(self.e2.get())) clustering.create_scatter_generosity_social_support() clustering.create_country_map() messagebox.showinfo( "clustering", "Clustering completed successfully!") self.showImg() else: messagebox.showinfo( "Error", "One of the parameters doesn't make sense \n" "The k must be between 2 and 165 (number of countries)\n" "And Number of runs must be under 50 ") else: messagebox.showinfo("Error", "The parameters must be numbers !") else: messagebox.showinfo("Error", "Must first do pre-processing ")
def preProcessFunc(self): # create preprocess print("Got The Data") path = self.labelBrowse.get() if path == "": messagebox.showinfo("Error", "You have to input path ") else: if path.endswith(".xlsx"): self.bool = True self.preprocess = KMeansClustering.Preprocess(path) # clean_na self.preprocess.clean_na() # normalize self.preprocess.standardization() # aggregate by country self.preprocess.aggregate_by_country() messagebox.showinfo("Pre-processing", "Preprocessing completed successfully!") else: messagebox.showinfo( "Error", "You should Enter Path And Ends with xlsx !")
def execute_scheme(self): #model = TimeSeriesClassificationNeuralNet(self.settings) #model = TimeSeriesPredictionNeuralNet(self.settings) connection = SQLAConnection() query_generator = QueryGenerator( self.settings.sensors, self.settings.start_date, self.settings.end_date ) report_generator = ReportGenerator(self.settings) link_generator = LinkGenerator(self.settings) #data = RegularityData(link_generator,connection) data = AnomalyData(link_generator,connection) #data.generate_metadata_report(ReportGenerator(self.settings)) #data.make_df() #data.save_df(name=self.settings.dataset_name) #data.find_discontinuities() #data.split_at_discontinuities() #data.plot_data() #data.add_temp() #data.save_dfs(name=self.settings.dataset_name) #data.load_dfs(date='2020-11-01') #data.load_extend_dfs(date='2020-11-13') startdate = datetime.strptime('2020-11-01',config.dateformat) data.load_dfs(date=datetime.strftime(startdate,config.dateformat)) dates_ahead = 4 mode = 'while' if mode == 'for': for i in range(dates_ahead): data.load_extend_dfs(date=datetime.strftime(startdate+timedelta(days=i), config.dateformat)) elif mode == 'while': tdate = startdate while tdate.date() != date.today(): try: data.load_extend_dfs(date=datetime.strftime(tdate, config.dateformat)) except FileNotFoundError: pass tdate = tdate+timedelta(days=1) data.purge_empty_dfs() data.preprocess() data.merge_dfs() #data.plot_data() #data.find_correlation() anomaly_settings = AnomalySettings() kmeans_settings = KMeansSettings() start_hour = '00:00:00' end_hour = '23:59:59' data.filter_hours(start_hour,end_hour) data.purge_empty_time_filtered_dfs() #data.plot_filtered_hours(plot_objects=False) data.set_object_settings(anomaly_settings) anomaly_name = f"{startdate}_{mode}_{start_hour}_{end_hour}_{anomaly_settings.anomaly_sensor}_anomaly" print(os.listdir(config.anomaly_path)) print(anomaly_name) if f"{anomaly_name}.json" in os.listdir(config.anomaly_path): data.load_objects(name=f"{anomaly_name}.json") print(f"{anomaly_name} loaded") else: for feature in anomaly_settings.anomaly_sensor: #data.locate_anomalies_filtered_dfs(feature) data.locate_objects_dfs(feature) #data.save_plots(feature) #data.plot_filtered_hours(foi = feature) data.save_objects(name=anomaly_name) kmeans = KMeansClustering(data.objects,kmeans_settings) kmeans.fit_Kmeans() #sensor_prediction = SensorPrediction(data.anomalies,self.settings) data.plot_filtered_hours(foi = 'acc1_ch_x')#,project_anomalies = 'acc1_ch_z') pca = PCAAnomalies(data.objects,self.settings) pca.fit_PCA() pca.save_pca(f'{anomaly_name}_pca') pca.set_labels(kmeans.send_labels()) #pca.get_cov() #anomaly_key, df_number = pca.get_argmax(col='sigma') #data.plot_regularities() pca.plot_components_labels(n_categories = kmeans_settings.n_clusters) pca.scree_plot() pca.plot_hist_pca() #pca.plot_components_3d() pca.plot_components(features = ['Duration','frequency'])
import EpipolarGeometry import ImageFeaturesHomography import KMeansClustering # start task 1 task1 = ImageFeaturesHomography.ImageFeaturesHomography() task1.start() # start task 2 task2 = EpipolarGeometry.EpipolarGeometry() task2.start() # start task 3 task3 = KMeansClustering.KMeansClustering() task3.start()
################################################################## ################################################################## ################################################################## #parse the input into a list of Point objects points = [] data = open("toy_data.txt", 'r') for line in data: curr_args = list(map(lambda x : float(x), line.split())) curr_point = Point(*curr_args) points.append(curr_point) runs_info = [] best_WCSS = -1 for run in range(20): centers, cluster_index, WCSS_list = KMeansClustering.k_means_clustering(points, 4, True) runs_info.append(WCSS_list) curr_WCSS = WCSS_list[-1] if best_WCSS == -1 or curr_WCSS < best_WCSS: best_cluster_index = cluster_index best_WCSS = curr_WCSS # make first graph - points in clusters graph x_clusters = [ [] for i in range(4) ] y_clusters = [ [] for i in range(4) ] for i in range(len(best_cluster_index)): x_clusters[best_cluster_index[i]].append(points[i].x) y_clusters[best_cluster_index[i]].append(points[i].y) colors = ["bo", "go", "ro", "yo"] for i in range(4):
''' Created on 23.02.2016 @author: Masus04 ''' import time import KMeansClustering # timer startTime = time.time() blackThreshold = 0 KMeansClustering.buildClusters(10) # timer print('Execution time: ' + str(int(time.time() - startTime)) + 's')
################################################################################# ################################################################################# # 128 * 128 pixel image image = Image.open("bird_small.tiff") pixels = image.load() # initialize points in 3D space with coordinates as pixel's RGB values points = [] for x in range(128): for y in range(128): R, G, B = pixels[x, y] points.append( Point(R, G, B, x, y) ) centers = [] for i in range(16): while True: RGB = Point(randrange(0, 256), randrange(0, 256), randrange(0, 256), 0, 0) if RGB not in centers: centers.append(RGB) break centers, cluster_index = KMeansClustering.k_means_clustering(points, 16, False, True, centers) for i in range(len(points)): pnt = points[i] R = centers[cluster_index[i]].R G = centers[cluster_index[i]].G B = centers[cluster_index[i]].B pixels[pnt.x, pnt.y] = (int(R), int(G), int(B)) image.save("output-bird.tiff")