def compute_dis_matrix(): station_dic = sp.get_station_dic() dist_matrix = [([0] * len(station_dic)) for i in range(len(station_dic))] stations_shortest_path_dic = sp.get_all_stations_spt_dic_from_file() row = 0 stat_id_to_index = {} for stat_id in station_dic: stat_id_to_index[stat_id] = row row += 1 for stat_id1 in station_dic: for stat_id2 in station_dic: if stat_id1 is not stat_id2: distance = 0 key = stat_id1 + "#" + stat_id2 #print(key) distance, path = sp.get_shortest_path_from_stat_id( stat_id1, stat_id2, station_dic, stations_shortest_path_dic) if distance == None: print("Coundn't find distance in file, calculate again") path, distance = internal_get_spt_from_stat_name( station_dic[stat_id1]['name'], station_dic[stat_id2]['name']) idx1 = stat_id_to_index[stat_id1] idx2 = stat_id_to_index[stat_id2] dist_matrix[idx1][idx2] = distance dist_matrix[idx2][idx1] = distance #print(dist_matrix) return dist_matrix
def computeDisMetrix(): staion_dic = get_station_dic() id_name = {} id = 0 for station in staion_dic: id_name[staion_dic[station]['name']] = id id += 1 dis_metrix = [([0] * len(id_name)) for i in range(len(id_name))] road_network_dic, station_info_dic = sp.preprocess() stations_shortest_path_dic = sp.get_all_stations_spt_dic_from_file() for station in id_name: for station1 in id_name: if station is not station1: distance = 0 print(station + "#" + station1) distance, path = sp.get_shortest_path( station, station1, station_info_dic, stations_shortest_path_dic) if distance == None: _, distance = internal_get_spt_from_stat_name( station, station1) dis_metrix[id_name[station]][id_name[station1]] = distance dis_metrix[id_name[station1]][id_name[station]] = distance return dis_metrix
def plot_stations_cluster(k): stat_dic = sp.get_station_dic() X = np.array(get_stat_samples_from_dic(stat_dic)) #print(X) kmeans = KMeans(n_clusters=k, random_state=100) y_pred = kmeans.fit_predict(X) #print(y_pred) plt.rcParams['figure.figsize'] = (15, 10) df_roads = gpd.read_file(files.roads_pads_network_utm_geojson) df_roads.plot(color='black') plt.scatter(X[:, 0], X[:, 1], c=y_pred) if False: for stat_id in stat_dic: label = stat_dic[stat_id]['name'] e = stat_dic[stat_id]['road_coordinates'][0] n = stat_dic[stat_id]['road_coordinates'][1] plt.annotate(label, xy=(e, n), xytext=(e+10, n+10)) # Plot centroids centroids = kmeans.cluster_centers_ plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=169, linewidths=3,color='r', zorder=10) plt.title('Stations with ' + str(k) + ' clusters') plt.xlabel('Easting [m]', fontsize=13) plt.ylabel('Northing [m]', fontsize=13) filename = '../resources/img/' + str(k) + '_clusters_stations.png' plt.savefig(filename, dpi=1000) plt.show()
def plot_clustering_from_dic(cluster_stat_dic): stat_info_dic = sp.get_station_dic() plt.rcParams['figure.figsize'] = (12, 9) df_roads = gpd.read_file(files.roads_pads_network_utm_geojson) df_roads.plot(color='black') for cluster in cluster_stat_dic: stats = cluster_stat_dic[cluster]['stations'] stat_coords = [] # plot stations for stat in stats: stat_coords.append(stat_info_dic[stat]['coordinates']) stat_coords = np.array(stat_coords) plt.scatter(stat_coords[:, 0], stat_coords[:, 1]) # plot cluster only once if False: stat1 = stats[0] e = stat_info_dic[stat1]['coordinates'][0] n = stat_info_dic[stat1]['coordinates'][1] plt.annotate(cluster, xy=(e, n), xytext=(e + 20, n + 20), fontsize=15) # plot annotate if False: for stat in stats: e = stat_info_dic[stat]['coordinates'][0] n = stat_info_dic[stat]['coordinates'][1] #label = stat + "(" + cluster + ")" #label_name = stat_info_dic[stat]['name'] + "(" + cluster + ")" label_id = stat + "(" + cluster + ")" plt.annotate(label_id, xy=(e, n), xytext=(e + 10, n + 10)) if False: for coord in stat_coords: plt.annotate(cluster, xy=(coord[0], coord[1]), xytext=(coord[0] + 10, coord[1] + 10)) plt.title('Stations with ' + str(len(cluster_stat_dic)) + ' clusters') plt.xlabel('Easting [m]', fontsize=13) plt.ylabel('Northing [m]', fontsize=13) filename = '../resources/img/stations_name_clustering.png' plt.savefig(filename, dpi=1000) plt.show()
def determine_k_by_elbow(): stat_dic = sp.get_station_dic() X = np.array(get_stat_samples_from_dic(stat_dic)) distortions = [] K = range(1,20) for k in K: kmeanModel = KMeans(n_clusters=k).fit(X) kmeanModel.fit(X) distortions.append(sum(np.min(cdist(X, kmeanModel.cluster_centers_, 'euclidean'), axis=1)) / X.shape[0]) plt.plot(K, distortions, 'bx-') plt.xlabel('k') plt.ylabel('Distortion') plt.title('The Elbow Method showing the optimal k') plt.xlim([0, 20]) plt.savefig('../resources/img/elbow_method.png', dpi=1000) plt.show()
def get_cluster_dic(): global stat_list global dist_matrix if len(cluster_stat_dic) != 0: return cluster_stat_dic station_dic = sp.get_station_dic() dist_matrix = compute_dis_matrix() stat_list = np.array(list(station_dic.keys())) cluster_labels = agglomerative_clustering(cluster_num, dist_matrix) # cluster_stat_dic = {} # for i in range(0, cluster_number): # cluster_stat_dic[str(i)] = {} # cluster_stat_dic[str(i)]['stations'] = stat_list[cluster_labels==i] #print(cluster_stat_dic) #print(cluster_stat_dic['0']) #print(cluster_stat_dic) return cluster_stat_dic
#itemlist = [([0] * len(time_station)) for i in range(len(time_station))] #i = 0 #for stat in time_station: # j = 0 # for stat1 in time_station[stat]: # if i == j: # j += 1 # itemlist[i][j] = time_station[stat][stat1] # j += 1 # i += 1 # #print(itemlist) #print(len(itemlist)) # create scatter plot staion_dic = get_station_dic() loc = [] xmin = 5000000 xmax = 0 ymin = 50000000 ymax = 0 for station in staion_dic: loc.append(staion_dic[station]['coordinates']) if staion_dic[station]['coordinates'][0] < xmin: xmin = staion_dic[station]['coordinates'][0] if staion_dic[station]['coordinates'][0] > xmax: xmax = staion_dic[station]['coordinates'][0] if staion_dic[station]['coordinates'][1] < ymin: