Пример #1
0
def k_means(data_points, n_centroids, tot_iteration=100):
    centroids = kmeans.create_centroids(data_points, n_centroids)
    [cluster_label,
     new_centroids] = kmeans.iterate_k_means(data_points, centroids,
                                             tot_iteration)
    # kmeans.print_label_data([cluster_label, new_centroids])

    return [cluster_label, new_centroids]
    def kmeansModel(self):
        print("build the model and Get Visualization")
        print("Number of clusters k : ", self.e1.get())
        print("Number of runs : ", self.e2.get())
        if self.e1.get() == "" or self.e2.get() == "":
            messagebox.showinfo("Error", "One of the parameters is missing ")
        else:
            if self.bool:
                if self.e1.get().isdigit() and self.e2.get().isdigit():
                    if int(self.e1.get()) > 2 and int(
                            self.e1.get()) < 165 and int(self.e2.get()) < 50:
                        clustering = KMeansClustering.Clustering(
                            self.preprocess.data_frame)
                        clustering.activate_k_means_algorithm(
                            int(self.e1.get()), int(self.e2.get()))
                        clustering.create_scatter_generosity_social_support()
                        clustering.create_country_map()
                        messagebox.showinfo(
                            "clustering", "Clustering completed successfully!")
                        self.showImg()
                    else:
                        messagebox.showinfo(
                            "Error",
                            "One of the parameters doesn't make sense \n"
                            "The k must be between 2 and 165 (number of countries)\n"
                            "And Number of runs must be under 50 ")

                else:
                    messagebox.showinfo("Error",
                                        "The parameters must be numbers !")
            else:
                messagebox.showinfo("Error", "Must first do pre-processing ")
    def preProcessFunc(self):
        # create preprocess
        print("Got The Data")

        path = self.labelBrowse.get()
        if path == "":
            messagebox.showinfo("Error", "You have to input path ")
        else:
            if path.endswith(".xlsx"):
                self.bool = True
                self.preprocess = KMeansClustering.Preprocess(path)
                # clean_na
                self.preprocess.clean_na()
                # normalize
                self.preprocess.standardization()
                # aggregate by country
                self.preprocess.aggregate_by_country()
                messagebox.showinfo("Pre-processing",
                                    "Preprocessing completed successfully!")
            else:
                messagebox.showinfo(
                    "Error", "You should Enter Path And Ends with xlsx !")
Пример #4
0
 def execute_scheme(self):
     #model = TimeSeriesClassificationNeuralNet(self.settings)
     #model = TimeSeriesPredictionNeuralNet(self.settings)
     connection = SQLAConnection()
     query_generator = QueryGenerator(
         self.settings.sensors,
         self.settings.start_date,
         self.settings.end_date
         )
     report_generator = ReportGenerator(self.settings)
     link_generator = LinkGenerator(self.settings)
     #data = RegularityData(link_generator,connection)
     data = AnomalyData(link_generator,connection)
     #data.generate_metadata_report(ReportGenerator(self.settings))
     #data.make_df()
     #data.save_df(name=self.settings.dataset_name)
     
     #data.find_discontinuities()
     #data.split_at_discontinuities()
     #data.plot_data()
     #data.add_temp()
     #data.save_dfs(name=self.settings.dataset_name)
     #data.load_dfs(date='2020-11-01')
     #data.load_extend_dfs(date='2020-11-13')
     startdate = datetime.strptime('2020-11-01',config.dateformat)
     data.load_dfs(date=datetime.strftime(startdate,config.dateformat))
     dates_ahead = 4
     mode = 'while'
     if mode == 'for':
         for i in range(dates_ahead):
         
             data.load_extend_dfs(date=datetime.strftime(startdate+timedelta(days=i), config.dateformat))
           
     elif mode == 'while': 
         tdate = startdate      
         while tdate.date() != date.today():
             try:
                 data.load_extend_dfs(date=datetime.strftime(tdate, config.dateformat))
                 
             except FileNotFoundError:
                 pass
             tdate = tdate+timedelta(days=1)
     data.purge_empty_dfs()  
     data.preprocess()
     data.merge_dfs()
     #data.plot_data()
     #data.find_correlation()
     anomaly_settings = AnomalySettings()
     kmeans_settings = KMeansSettings()
     start_hour = '00:00:00'
     end_hour = '23:59:59'
     data.filter_hours(start_hour,end_hour)
     data.purge_empty_time_filtered_dfs()
     #data.plot_filtered_hours(plot_objects=False)
     data.set_object_settings(anomaly_settings)
     anomaly_name = f"{startdate}_{mode}_{start_hour}_{end_hour}_{anomaly_settings.anomaly_sensor}_anomaly"
     print(os.listdir(config.anomaly_path))
     print(anomaly_name)
     if f"{anomaly_name}.json" in os.listdir(config.anomaly_path):
         data.load_objects(name=f"{anomaly_name}.json")
         print(f"{anomaly_name} loaded")
     else:       
         for feature in anomaly_settings.anomaly_sensor:
             #data.locate_anomalies_filtered_dfs(feature)
             data.locate_objects_dfs(feature)
             #data.save_plots(feature)
             #data.plot_filtered_hours(foi = feature)
         data.save_objects(name=anomaly_name)
     
     
     kmeans = KMeansClustering(data.objects,kmeans_settings)
     kmeans.fit_Kmeans()
     #sensor_prediction = SensorPrediction(data.anomalies,self.settings)
     data.plot_filtered_hours(foi = 'acc1_ch_x')#,project_anomalies = 'acc1_ch_z')
     pca = PCAAnomalies(data.objects,self.settings)
     pca.fit_PCA()
     pca.save_pca(f'{anomaly_name}_pca')
     pca.set_labels(kmeans.send_labels())
     #pca.get_cov()
     #anomaly_key, df_number = pca.get_argmax(col='sigma')
     #data.plot_regularities()
     pca.plot_components_labels(n_categories = kmeans_settings.n_clusters)
     pca.scree_plot()
     pca.plot_hist_pca()
     #pca.plot_components_3d()
     pca.plot_components(features = ['Duration','frequency'])
import EpipolarGeometry
import ImageFeaturesHomography
import KMeansClustering

# start task 1
task1 = ImageFeaturesHomography.ImageFeaturesHomography()
task1.start()

# start task 2
task2 = EpipolarGeometry.EpipolarGeometry()
task2.start()

# start task 3
task3 = KMeansClustering.KMeansClustering()
task3.start()
##################################################################
##################################################################
##################################################################

#parse the input into a list of Point objects
points = []
data = open("toy_data.txt", 'r')
for line in data:
	curr_args = list(map(lambda x : float(x), line.split()))
	curr_point = Point(*curr_args)
	points.append(curr_point)

runs_info = []
best_WCSS = -1
for run in range(20):
	centers, cluster_index, WCSS_list = KMeansClustering.k_means_clustering(points, 4, True)
	runs_info.append(WCSS_list)
	curr_WCSS = WCSS_list[-1]
	if best_WCSS == -1 or curr_WCSS < best_WCSS:
		best_cluster_index = cluster_index
		best_WCSS = curr_WCSS

# make first graph - points in clusters graph
x_clusters = [ [] for i in range(4) ] 
y_clusters = [ [] for i in range(4) ] 
for i in range(len(best_cluster_index)):
	x_clusters[best_cluster_index[i]].append(points[i].x)
	y_clusters[best_cluster_index[i]].append(points[i].y)

colors = ["bo", "go", "ro", "yo"]
for i in range(4):
Пример #7
0
'''
Created on 23.02.2016

@author: Masus04
'''
import time
import KMeansClustering

# timer
startTime = time.time()
blackThreshold = 0

KMeansClustering.buildClusters(10)

# timer
print('Execution time: ' + str(int(time.time() - startTime)) + 's')
#################################################################################
#################################################################################

# 128 * 128 pixel image
image = Image.open("bird_small.tiff")
pixels = image.load()
# initialize points in 3D space with coordinates as pixel's RGB values
points = []
for x in range(128):
	for y in range(128):
		R, G, B = pixels[x, y]	
		points.append( Point(R, G, B, x, y) )

centers = []
for i in range(16):
	while True:
		RGB = Point(randrange(0, 256), randrange(0, 256), randrange(0, 256), 0, 0)
		if RGB not in centers:
			centers.append(RGB)
			break	

centers, cluster_index = KMeansClustering.k_means_clustering(points, 16, False, True, centers)

for i in range(len(points)):
	pnt = points[i]
	R = centers[cluster_index[i]].R
	G = centers[cluster_index[i]].G
	B = centers[cluster_index[i]].B
	pixels[pnt.x, pnt.y] = (int(R), int(G), int(B))
image.save("output-bird.tiff")