def dbscan_run_all(): pd.set_option('expand_frame_repr', True) pd.set_option('max_rows', 100) pd.set_option('max_columns', 250) pd.set_option('display.width', 1000) np.set_printoptions(precision=3, floatmode='fixed') for fn in c.DB_TESTS: e, pts = c.e_pts[fn] df, class_id = parse_csv(fn) clusters, outliers = dbscan(df, e, pts) results = evaluate_clusters(clusters, None, verbose=False, outliers=outliers) totals = results.loc[:, [MAX, MIN, AVG, PTS, SSE]].sum() totals[NUM_DROPPED] = '-' totals[PCT_DROPPED] = '-' totals.name = TOTALS results = results.append(totals) sfn = strip_file_path(fn) print(f'\nSummary - {sfn}') print(results.round(3)) centroids = np.array([cluster.mean() for cluster in clusters]) for idx, (cluster, centroid) in enumerate(zip(clusters, centroids)): print(f'\nCluster {idx + 1}') print(f'Centroid: {centroid}') print(cluster) print('\nOutliers') print(outliers) if 2 <= clusters[0].shape[1] <= 3: plot_clusters(clusters, centroids, f'dbscan {sfn}')
def run(self): self.read_input() algo = self.options['algo'] params = self.options['params'] if algo == 'kmeans': model = kmeans(self.doc, params) elif algo == 'dbscan': model = dbscan(self.doc, params) elif algo == 'agglo': model = agglo(self.doc, params) elif algo == 'minib': model = minib(self.doc, params) model.evaluate()
def calculate_mbrs(points, epsilon, min_pts, debug=False): """ Find clusters using DBscan and then create a list of bounding rectangles to return. """ mbrs = {} clusters = dbscan(points, epsilon, min_pts, distance=euclidean, debug=debug) extremes = { 'max_x': sys.maxint * -1, 'max_y': sys.maxint * -1, 'min_x': sys.maxint, 'min_y': sys.maxint } """ Traditional dictionary iteration to populate mbr list Does same as below """ for id, cpoints in clusters.items(): print(id) xs = [] ys = [] for p in cpoints: xs.append(p[0]) ys.append(p[1]) max_x = max(xs) max_y = max(ys) min_x = min(xs) min_y = min(ys) if max_x > extremes['max_x']: extremes['max_x'] = max_x if max_y > extremes['max_y']: extremes['max_y'] = max_y if min_x < extremes['min_x']: extremes['min_x'] = min_x if min_y < extremes['min_y']: extremes['min_y'] = min_y mbrs[id] = [(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y), (min_x, min_y)] mbrs['extremes'] = extremes return mbrs
def dbscan_hypertuning(fn: str): df, class_id = parse_csv(fn) # best: k, pts, SSE best = (0, 0, float('inf')) max_dist = df.max().max() step = min_dist = max(df.min().min() * 1.01, 1) num_steps = int(max_dist // min_dist + 1) for e in range(1, num_steps): for pts in range(2, df.shape[0] // 2, 2): clusters, noise = dbscan(df, e * step, pts) if len(clusters) == 0: continue if check_pct_outliers(df, noise): break measures = evaluate_clusters(clusters, None, verbose=False) if measures[SSE].sum() < best[2]: best = (e * step, pts, measures[SSE].sum()) print(f'{fn}: e: {best[0]}, pts: {best[1]}, sse: {best[2]}') return best[0], best[1]
def calculate_mbrs(self, points, epsilon, min_pts): """ Find clusters using DBscan and then create a list of bounding rectangles to return. """ mbrs = [] clusters = dbscan(points, epsilon, min_pts) """ Traditional dictionary iteration to populate mbr list Does same as below """ # for id,cpoints in clusters.items(): # xs = [] # ys = [] # for p in cpoints: # xs.append(p[0]) # ys.append(p[1]) # max_x = max(xs) # max_y = max(ys) # min_x = min(xs) # min_y = min(ys) # mbrs.append([(min_x,min_y),(max_x,min_y),(max_x,max_y),(min_x,max_y),(min_x,min_y)]) # return mbrs """ Using list index value to iterate over the clusters dictionary Does same as above """ for id in range(len(clusters) - 1): xs = [] ys = [] for p in clusters[id]: xs.append(p[0]) ys.append(p[1]) max_x = max(xs) max_y = max(ys) min_x = min(xs) min_y = min(ys) mbrs.append([(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y), (min_x, min_y)]) print(mbrs) return mbrs
def calculate_mbrs(points, epsilon, min_pts): """ Find clusters using DBscan and then create a list of bounding rectangles to return. """ mbrs = [] clusters = dbscan(points, epsilon, min_pts) """ Using list index value to iterate over the clusters dictionary Does same as above """ for id in range(len(clusters)-1): xs = [] ys = [] for p in clusters[id]: xs.append(p[0]) ys.append(p[1]) max_x = max(xs) max_y = max(ys) min_x = min(xs) min_y = min(ys) mbrs.append([(min_x,min_y),(max_x,min_y),(max_x,max_y),(min_x,max_y),(min_x,min_y)]) return mbrs
import numpy as np from dbscan import * from plot3d import * encodedImgs = np.loadtxt('datasets/temp/encodedImgs', delimiter=',') encodedImgsTrain = np.loadtxt('datasets/temp/encodedImgsTrain', delimiter=',') labels = np.loadtxt('datasets/temp/labels', delimiter=',') labels = labels.astype(int) epsilon = 0.015 minSamps = 2 print("Training") dbscan(encodedImgsTrain, labels, epsilon=epsilon, minSamples=minSamps) print("Training") dbscan(encodedImgs, labels, epsilon=epsilon, minSamples=minSamps)
datum['Time'], datum['temparature'], datum['humidity'], datum['LPG'], datum['label'] ]) if checkFileExist("%s.csv" % name) == True: with open(output_file_name, 'a', encoding='utf-8') as output_file: csvwriter = csv.writer(output_file) csvwriter.writerow([ datum['Time'], datum['temparature'], datum['humidity'], datum['LPG'], datum['label'] ]) output_file.close() if count >= 50 and count % 10 == 0: label = dbscan(temp_list[count - 50:count], hum_list[count - 50:count], gas_list[count - 50:count], label_list[count - 50:count]) label = map(str, label) data_s = " ".join(label) conn.sendall(data_s.encode()) else: reply = "send" conn.sendall(reply.encode()) count += 1 conn.close() s.close() print('close')
import sys import utils from dbscan import * if __name__ == "__main__": filename = sys.argv[1] points = utils.read_input(filename) dbscan(points, 0.5) # it will labeled cluster of elements of points utils.visualize(points)
#Creiamo la matrice x X = data[:, :data.shape[1]-1] #------------------------------------ NORMALIZAZZIONE FEATURE ---------------------------------# #------------------------------------- ZSCORE -----------------------------------# mu, sigma = muSigma(X) X = zScore(X, mu, sigma) #--------------------------------------------------------------------------------# #------------------------------------ MINMAX ------------------------------------# # min, diff, max = minmax(X) # X = Min_Max(X, min, diff) #--------------------------------------------------------------------------------# #-------------------------------- FEATURE SCALING -------------------------------# # min, diff, max = minmax(X) # X = Feat_Scaling(X, max) #--------------------------------------------------------------------------------# epsylon=2 punti_min=2 centroid=dbscan(X,epsylon,punti_min)
import arcpy from dbscan import * import numpy as np from matplotlib import pyplot as plt # curs = arcpy.SearchCursor("C:\Users\huangyixiu\Documents\Course\gisData\SH_hos\Hospital.shp") curs = arcpy.SearchCursor("../Export_Output.shp") pointx = [] pointy = [] c = 0 for row in curs: pointx.append(row.Shape.firstPoint.X) pointy.append(row.Shape.firstPoint.Y) c += 1 if c%1000 ==0: print "processing line %d"%c m = np.matrix([pointx,pointy]) # 0.005 degree in wgs84, proximately 500m eps = 0.005 min_points = 4 clusterlis = dbscan(m, eps, min_points) res = open("../cluster2.csv","w") res.write("x,y,clusterid\n") for i in range(0,len(pointy)): res.write("%f,%f,%s\n"%(pointx[i],pointy[i],clusterlis[i])) res.close() print "result written done!"