for vehicle in wProfiles: data.append([vehicle]+wProfiles[vehicle]) random.shuffle(data) sampleN = 30000 data2 = [] chosen = [] for i in range(sampleN): data2.append(data[i][1:]) chosen.append(data[i][0]) data = data[sampleN:] CE = ClusteringExercise(data2) CE.k_means(3) # first plot and save the chosen centroids with confindence intervals on them plt.figure(figsize=(5,1.5)) plt.rcParams["font.family"] = 'serif' plt.rcParams["font.size"] = '8' x = [8,24,40] x_ticks = ['04:00','12:00','20:00'] locs = {'0':[2.68,-0.5],'1':[1.39,-0.7],'2':[0.1,-0.9],'3':[2.68,0.2],'4':[1.39,0.0]} clrs = {'2':'g','3':'y','1':'b','0':'r','4':'c'} clrs2 = {'2':'#CCFFCC','3':'#FFFFCC','1':'#CCCCFF','0':'#FFCCCC','4':'#CCFFFF'}
for i in range(0,48): weProfiles[vehicle][i] = float(weProfiles[vehicle][i])/maxWeDist weTotal[i] += weProfiles[vehicle][i] data = [] for vehicle in wProfiles: data.append(wProfiles[vehicle]) random.shuffle(data) x = [8,24,40] x_ticks = ['04:00','12:00','20:00'] sampleN = 30000 CE = ClusteringExercise(data[:sampleN]) x = [8,24,40] x_ticks = ['04:00','12:00','20:00'] css = [] plt.figure(1) plt.rcParams["font.family"] = 'serif' plt.rcParams["font.size"] = '10' for k in range(1,11): plt.subplot(3,4,k) CE.k_means(k) css.append(CE.get_sum_of_squares()) #''' for label in CE.clusters:
for vehicle in wProfiles: for i in range(0, 48): wProfiles[vehicle][i] = float(wProfiles[vehicle][i]) / maxWDist wTotal[i] += wProfiles[vehicle][i] for profile in test_data: for i in range(48): profile[i] = profile[i] / maxWDist data = [] for vehicle in wProfiles: data.append(wProfiles[vehicle]) random.shuffle(data) sampleN = 30000 CE = ClusteringExercise(data[:sampleN]) x = [8, 24, 40] x_ticks = ['04:00', '12:00', '20:00'] css = [] plt.figure(1) plt.rcParams["font.family"] = 'serif' plt.rcParams["font.size"] = '10' CE.k_means(5) n = 1 clrs = {'2': 'g', '3': 'y', '1': 'b', '0': 'r', '4': 'c'}
wTotal[i] += wProfiles[vehicle][i] for vehicle in weProfiles: for i in range(0,48): weProfiles[vehicle][i] = float(weProfiles[vehicle][i])/maxWeDist weTotal[i] += weProfiles[vehicle][i] data = [] for vehicle in wProfiles: data.append(wProfiles[vehicle]) random.shuffle(data) sampleN = 30000 CE = ClusteringExercise(data[:sampleN]) x = [8,24,40] x_ticks = ['04:00','12:00','20:00'] css = [] plt.figure(1) plt.rcParams["font.family"] = 'serif' plt.rcParams["font.size"] = '10' for k in range(2,11): plt.subplot(3,4,k-1) CE.k_means(k) css.append(CE.get_dist_closest_centroids()) #''' for label in CE.clusters:
weTotal[i] += weProfiles[vehicle][i] data = [] for vehicle in weProfiles: data.append(weProfiles[vehicle]) ''' x = np.arange(8,48,8) x_ticks = range(4,24,4) for i in range(0,len(x_ticks)): if x_ticks[i] < 10: x_ticks[i] = '0'+str(x_ticks[i])+':00' else: x_ticks[i] = str(x_ticks[i])+':00' ''' CE = ClusteringExercise(data[:10000]) plt.figure(1) for k in range(2, 8): plt.subplot(3, 2, k - 1) CE.k_means(k) for label in CE.clusters: plt.plot(CE.clusters[label].mean, label=str(CE.clusters[label].nPoints)) plt.legend() CE.reset_clusters() plt.title('k=' + str(k), y=0.8) plt.show()
for vehicle in wProfiles: data.append([vehicle] + wProfiles[vehicle]) nTotal = len(data) random.shuffle(data) sampleN = 1000 data2 = [] chosen = [] for i in range(sampleN): data2.append(data[i][1:]) chosen.append(data[i][0]) data = data[sampleN:] CE = ClusteringExercise(data2) CE.k_means(3) pts = {} for c in range(3): pts[c] = {} for t in range(48): pts[c][t] = [] # first get the training pts trainingLabels = CE.labels labels = {} for i in range(sampleN): labels[chosen[i]] = int(trainingLabels[i]) for t in range(48): pts[int(trainingLabels[i])][t].append(data2[i][t])