def plot_blobs(self, variable=None, k=None): # show blobs we created if not variable: data = self.regions variable = 'blob ID' else: data = [] try: a = self.blobs_data[variable] except: try: a = self.blobs_data[variable + '_mean'] variable = variable + '_mean' except: print("invalid variable name; variables are the following: \n" + ', '.join(self.blobs_data.columns)) return False for i in self.d.tractID: try: data.append(self.blobs_data.ix[self.r.area2region[str(i)],variable]) except KeyError: data.append(0) data = np.array(data) if not k: k = self.r.p print(' Plotting...'), maps.plot_choropleth(self.shp_link, data, type='quantiles', title='Blobs from Census ' + self.level + 's\nby ' + variable + ' (' + str(self.r.p)+' blobs)', k=k, figsize=(6,9)) print('\r \n')
def plot_map(self, variable=None): # plot clusters on the map plots = np.zeros(self.b.regions.shape) if not variable: for i in range(len(self.b.regions)): plots[i] = self.assignments[self.b.regions[i]] else: for i in range(len(self.b.regions)): plots[i] = self.centers.ix[self.assignments[self.b.regions[i]], variable] maps.plot_choropleth(self.b.shp_link, plots, type='equal_interval', title=('Clustered blobs from Census ' + self.b.level + 's'), k=30, figsize=(6,9))
def blobs(vars, min_pop, iterations, method='equal votes', weights=[], initial=10, plot=False): solutions = [] top_scores = [] times = [] num_blobs = [] current_time = [] iteration = [] best_score = 10**12 best_solution = None for i in range(0,iterations): start = time.time() r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), floor=min_pop, floor_variable=calls['pop'], initial=initial) end = time.time() times.append(end - start) current_time.append(end) solutions.append(r.objective_function()) num_blobs.append(r.k) if (r.objective_function() < best_score): best_score = r.objective_function() best_solution = r top_scores.append(best_score) iteration.append(i) print('iteration '+str(i+1)+' - score: '+str(round(r.objective_function(),2))+ ' ('+str(r.k)+' blobs), best: '+str(round(best_score,2))+', '+ str(round(end-start,1))+'s') if plot: # prep for plotting r = best_solution ids=np.array(calls['tractce10']).astype(str) # r.sort_regions(method='mean') # uncomment to sort regions by intensity of the variable regions=np.empty(calls.shape[0]) for j in range(0,calls.shape[0]): reg=r.area2region[ids[j]] regions[j]=reg # show blobs we created maps.plot_choropleth(shp_link, regions, type='quantiles', title='Chicago blobs from census tracts\n(min ' + str(r.floor) +' population per blob, ' + str(r.p)+' blobs)', k=r.p, figsize=(6,8)) return dict(times=times, solutions=solutions, top_scores=top_scores, current_time=current_time, iteration=iteration, best=r)
def plot_map(self, variable=None, blob_shp=None): # plot clusters on the map plots = np.zeros(self.b.regions.shape) if blob_shp: cluster = np.zeros(len(self.b.contours)) for i in range(len(self.b.contours)): cluster[i] = self.assignments[self.b.contours_to_blobs[i]] maps.plot_choropleth(blob_shp, cluster, type='unique_values', title=('Clustered blobs from Census ' + self.b.level + 's'), k=30, figsize=(6,8)) elif not variable: for i in range(len(self.b.regions)): plots[i] = self.assignments[self.b.regions[i]] maps.plot_choropleth(self.b.shp_link, plots, type='unique_values', title=('Clustered blobs from Census ' + self.b.level + 's'), k=30, figsize=(6,8)) else: for i in range(len(self.b.regions)): plots[i] = self.centers.ix[self.assignments[self.b.regions[i]], variable] maps.plot_choropleth(self.b.shp_link, plots, type='equal_interval', title=('Clustered blobs from Census ' + self.b.level + 's'), k=30, figsize=(6,9))
def plot_blobs(self, blob_shp=None, variable=None, k=None, mapType=None): # show blobs we created if blob_shp: k = len(self.contours) if not variable: variable = 'blob ID' data = np.arange(len(self.contours)) mapType = 'unique_values' else: data = [] try: a = self.blobs_data[variable] except: try: a = self.blobs_data[variable + '_mean'] variable = variable + '_mean' except: print("invalid variable name; variables are the following: \n" + ', '.join(self.blobs_data.columns)) return False for i in range(len(self.contours)): try: data.append(self.blobs_data.ix[self.contours_to_blobs[i],variable]) except KeyError: data.append(0) data = np.array(data) mapType = 'quantiles' elif not variable: data = self.regions variable = 'blob ID' else: data = [] try: a = self.blobs_data[variable] except: try: a = self.blobs_data[variable + '_mean'] variable = variable + '_mean' except: print("invalid variable name; variables are the following: \n" + ', '.join(self.blobs_data.columns)) return False for i in self.d.tractID: try: data.append(self.blobs_data.ix[self.r.area2region[str(i)],variable]) except KeyError: data.append(0) data = np.array(data) if not k: k = self.r.p if not mapType: mapType = 'quantiles' print(' Plotting...') map_shp = None if blob_shp: map_shp = blob_shp else: map_shp = self.shp_link if mapType == 'unique_values': maps.plot_choropleth(map_shp, data, type=mapType, cmap = 'Paired', title='Blobs from Test ' + self.level + 's\nby ' + variable + ' (' + str(self.r.p)+' blobs)', k=k, figsize=(1,3)) else: maps.plot_choropleth(map_shp, data, type=mapType, cmap = 'hot_r', title='Blobs from Census ' + self.level + 's\nby ' + variable + ' (' + str(self.r.p)+' blobs)', k=k, figsize=(1,3)) print('\r \n')
def blobs(v, min_pop, floor_var='pop', iterations=10, method='equal votes', weights=[], initial=10, plot=True, savedata=False, plot_values=False, verbose=True): """Create a max-p regions solution for a given shapefile and associated dataset. Builds on pysal.Maxp with improvements to the user interface, verbosity, and mapping. Original problem from "The Max-p-Regions Problem," Duque, Anselin, and Rey, JRS, October 2010, available at http://geography.sdsu.edu/Research/ Projects/IPC/publication/MaxP_authored.pdf. Parameters ---------- v : array array of variables on which to create blobs (for all variables, use ['all']) min_pop : int minimum population in each blob iterations : int number of blobs solutions to create (will return best): 10 by default method : {'equal votes', 'default', 'weighted'} equal votes' by default, can change to 'weighted' weights : array if method='weighted', add weights for variables as an array initial : int number of times to revise each solution (10 by default) plot : boolean will plot the best solution (True by default) savedata : boolean will save a CSV of the blobs data to the root folder (False by default) plot_values : boolean will color-code the plot by the mean of the underlying variables. only makes sense with one variable. default False (plots by ID of the blob) Sample usage ------------ >>> blobs(['all_calls_per1000'], min_pop=10000, plot_values=True) """ solutions = [] top_scores = [] times = [] num_blobs = [] current_time = [] iteration = [] best_score = -1 best_solution = None floor_var_array = np.ones((calls.shape[0],1)) #################### changed this blob_vars = np.array(calls.loc[:,v], np.float64) if len(v) == 1: # add shape to the array blob_vars.shape = (blob_vars.shape[0], 1) print('\n### CREATING BLOBS FROM ' + str(len(v)) + ' VARIABLES ###\n PARAMETERS:\n # Minimum ' + floor_var + ' in each blob: ' + str(int(min_pop)) + '\n # Iterations: ' + str(iterations) + '\n # Method: ' + method + '\n # Plot blobs: ' + str(plot) + '\n # Save blobs data: ' + str(savedata) + '\n') for i in range(0,iterations): start = time.time() r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), floor=min_pop, floor_variable=floor_var_array, initial=initial, verbose=verbose) end = time.time() times.append(end - start) current_time.append(end) current_score = r.objective_function() solutions.append(current_score) num_blobs.append(r.k) if (best_score == -1 or current_score < best_score): best_score = current_score best_solution = r top_scores.append(best_score) iteration.append(i) msg = '\r# ITERATION '+str(i+1)+' \n Score: ' + \ str(round(current_score,2)) + '\n Created '+str(r.k)+' blobs (' + \ str(int(calls.shape[0]/r.k)) + ' tracts per blob)\n Best solution so far: ' + \ str(round(best_score,2)) msg += '\n Time taken: '+str(round(end-start,1))+' seconds ('+ \ str(int(np.mean(times)*(iterations-i-1)))+' seconds remaining)\n' print msg r = best_solution print('\r# BEST SOLUTION: \n Score: '+ str(round(r.objective_function(),2)) + '\n '+str(r.k)+' blobs ('+str(int(calls.shape[0]/r.k))+ ' blocks per blob)') if plot: print(' Plotting...'), # prep for plotting ids=np.array(calls['block_id']).astype(str) if plot_values: r.sort_regions(method='mean') # sort regions by intensity of the variable regions=np.empty(calls.shape[0]) for j in range(0,calls.shape[0]): reg=r.area2region[ids[j]] regions[j]=reg # show blobs we created maps.plot_choropleth(shp_link, regions, type='quantiles', title='Chicago blobs from census tracts\n(min ' + str(int(r.floor)) +' population per blob, ' + str(r.p)+' blobs)', k=r.p, figsize=(6,9)) print('\r \n') #build data structure sr = np.zeros([r.k, len(v)*2+4]) for region in range(0,r.k): # blob ID sr[region][0] = region selectionIDs = [r.w.id_order.index(i) for i in r.regions[region]] m = r.z[selectionIDs, :] # objective function var = m.var(axis=0) sr[region][1] = sum(np.transpose(var)) * len(r.regions[region]) # blob size (number of places in blob) sr[region][2] = len(r.regions[region]) # blob population sr[region][3] = calls.loc[selectionIDs, floor_var].sum() # variable means and standard deviations for j in range(0,len(v)): sr[region][4+j*2] = m[:,j].mean() sr[region][5+j*2] = m[:,j].std() srdf = pd.DataFrame(sr) cols = ['Blob', 'Score', 'Size', floor_var] for j in range(0, len(v)): cols.append(v[j]+'_mean') cols.append(v[j]+'_stdev') srdf.columns = cols if savedata: srdf.to_csv('Blobs data ' + datetime.datetime.now().strftime('%Y%m%d %H%M') + \ '.csv', index=False) return dict(best=r, data=srdf, regions=r.area2region)
def plot_blobs(self, blob_shp=None, variable=None, k=None, mapType=None): # show blobs we created if blob_shp: k = len(self.contours) if not variable: variable = 'blob ID' data = np.arange(len(self.contours)) mapType = 'unique_values' else: data = [] try: a = self.blobs_data[variable] except: try: a = self.blobs_data[variable + '_mean'] variable = variable + '_mean' except: print("invalid variable name; variables are the following: \n" + ', '.join(self.blobs_data.columns)) return False for i in range(len(self.contours)): try: data.append(self.blobs_data.ix[self.contours_to_blobs[i],variable]) except KeyError: data.append(0) data = np.array(data) mapType = 'quantiles' elif not variable: data = self.regions variable = 'blob ID' else: data = [] try: a = self.blobs_data[variable] except: try: a = self.blobs_data[variable + '_mean'] variable = variable + '_mean' except: print("invalid variable name; variables are the following: \n" + ', '.join(self.blobs_data.columns)) return False for i in self.d.tractID: try: data.append(self.blobs_data.ix[self.r.area2region[str(i)],variable]) except KeyError: data.append(0) data = np.array(data) if not k: k = self.r.p if not mapType: mapType = 'quantiles' print(' Plotting...') map_shp = None if blob_shp: map_shp = blob_shp else: map_shp = self.shp_link if mapType == 'unique_values': maps.plot_choropleth(map_shp, data, type=mapType, cmap = 'Paired', title='Blobs from Census ' + self.level + 's\nby ' + variable + ' (' + str(self.r.p)+' blobs)', k=k, figsize=(12,16)) else: maps.plot_choropleth(map_shp, data, type=mapType, cmap = 'hot_r', title='Blobs from Census ' + self.level + 's\nby ' + variable + ' (' + str(self.r.p)+' blobs)', k=k, figsize=(12,16)) print('\r \n')
from pysal.contrib.viz import mapping as maps # Where will our shapefile be stored shp_link = os.path.join('outputs', 'lsoas_kde.shp') # Save it! sdf.to_file(shp_link) # And now re-load the values from the DBF file # associated with the shapefile. values = np.array(ps.open(shp_link.replace('.shp', '.dbf')).by_col(k_var)) maps.plot_choropleth(shp_link, values, 'unique_values', title='K-Means ' + str(k_pref) + ' Cluster Analysis', savein=os.path.join('outputs', 'K-Means.png'), dpi=150, figsize=(8, 6), alpha=0.9) #save pickle for later analysis data_std.to_pickle(os.path.join("outputs", "clusters.pickle")) ################################################## #trying out dbscan clustering for fun. Looks like the results are awful though data_std = pd.read_pickle(os.path.join('outputs', 'clusters.pickle')) d_var = 'DBSCAN' # Quick sanity check in case something hasn't # run successfully -- these muck up k-means data_std.drop(list(data_std.columns[data_std.isnull().any().values].values),
# if duplicates, need to remove len(df.ix[df.duplicated('geoid10'),:]) # number of duplicate pairs ## create weights (only need to run once) w = ps.rook_from_shapefile(shp_link) w.n == df.shape[0] # should be true gal = ps.open('blocks/CensusBlockTIGER2010.gal','w') gal.write(w) gal.close() df.tractce10 = df.tractce10.astype('int') df['order'] = df.index # plot community areas maps.plot_choropleth(shp_link, np.array(df.tractce10), type='equal_interval', title='Initial Map', k=80) # get spatial weights w=ps.open('blocks/CensusBlockTIGER2010.gal').read() # need to fix the ohare island (tracts 980000 and 770602) # the following was saved to X_fixed.gal # w.neighbors['770602'] = ['980000', '090100'] # w.weights['770602'] = [1.0, 1.0] # w.neighbors['980000'] = ['770602', '760802'] # w.weights['980000'] = [1.0, 1.0] # w.neighbors['090100'] = ['770602', '090200'] # w.weights['090100'] = [1.0, 1.0] # w.neighbors['760802'] = ['980000', '760801', '170500', '170600', '760803', '770902'] # w.weights['760802'] = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
df.columns = df.columns.map(lambda x: x.lower()) df.commarea = df.commarea.astype('int') df['order'] = df.index calls = pd.read_csv('master311.csv', dtype=object) for c in calls.columns[1:]: calls[c] = calls[c].astype('float') ordered_tracts = pd.DataFrame(df.loc[:,['tractce10', 'commarea', 'order']]) calls = pd.merge(calls, ordered_tracts, how='right', left_on='tract', right_on='tractce10', sort=False).fillna(0) calls = calls.sort(['order']) # all calls by census tract y = np.array(calls['all_calls_per1000']) # map values maps.plot_choropleth(shp_link, np.array(calls.all_calls_per1000), type='fisher_jenks', title='All 311 Calls by Census Area, 2011-2015\nUnsmoothed', k=20, figsize=(6,9)) # Global Moran's I mi = ps.Moran(y, w) mi.I mi.EI mi.p_norm # Geary's C gc = ps.Geary(y, w) gc.C gc.EC gc.z_norm