def generateRegions(w, observations=None, minAreas=5, maxp=True, contiguity=False, numRegs=None, compact=False): if maxp == True: observations = observations.transpose() l = len(observations) fv = np.ones(l) r = pysal.Maxp(w, observations, floor=minAreas, floor_variable=fv, initial=99) r.inference() regions = r.regions order = w.id_order return regions, r.pvalue else: # maxp == False from pysal.region import Random_Region ids = w.id_order regions = None cardinalities = [] while np.sum(cardinalities) != len( ids): # and len(set(cardinalities))!=numRegs: cardinalities = [ np.random.randint(minAreas, len(ids)) for i in range(numRegs) ] if contiguity == False: regions = Random_Region(area_ids=ids, num_regions=numRegs, cardinality=cardinalities, compact=compact) else: regions = Random_Region(area_ids=ids, num_regions=numRegs, cardinality=cardinalities, contiguity=w, compact=compact) return regions, None
def blobs(vars, min_pop, iterations, method='equal votes', weights=[], initial=10, plot=False): solutions = [] top_scores = [] times = [] num_blobs = [] current_time = [] iteration = [] best_score = 10**12 best_solution = None for i in range(0,iterations): start = time.time() r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), floor=min_pop, floor_variable=calls['pop'], initial=initial) end = time.time() times.append(end - start) current_time.append(end) solutions.append(r.objective_function()) num_blobs.append(r.k) if (r.objective_function() < best_score): best_score = r.objective_function() best_solution = r top_scores.append(best_score) iteration.append(i) print('iteration '+str(i+1)+' - score: '+str(round(r.objective_function(),2))+ ' ('+str(r.k)+' blobs), best: '+str(round(best_score,2))+', '+ str(round(end-start,1))+'s') if plot: # prep for plotting r = best_solution ids=np.array(calls['tractce10']).astype(str) # r.sort_regions(method='mean') # uncomment to sort regions by intensity of the variable regions=np.empty(calls.shape[0]) for j in range(0,calls.shape[0]): reg=r.area2region[ids[j]] regions[j]=reg # show blobs we created maps.plot_choropleth(shp_link, regions, type='quantiles', title='Chicago blobs from census tracts\n(min ' + str(r.floor) +' population per blob, ' + str(r.p)+' blobs)', k=r.p, figsize=(6,8)) return dict(times=times, solutions=solutions, top_scores=top_scores, current_time=current_time, iteration=iteration, best=r)
def return_cluster_pysal(new_coor, new_weight, ref_list, nb_data, list_coor, out_file, out_file_2): # The k value might have to change! wknn3 = pysal.weights.KNN(new_coor, k = 4) wknn3.transform = 'r' #print wknn3[0] """ # statistic np.random.seed(12345) g4 = pysal.Gamma(new_weight, wknn3,operation = 'a')#operation=func) print g4.g # result 0 print "%.3f"%g4.g_z # - 1.9111 print g4.p_sim_g # 0.001 print g4.min_g # 6 print g4.max_g # 260 print g4.mean_g # 38.1761761762 """ # The floor value should also be tuned. nb_try = 0 condition = False floor_v = 4 """ while nb_try < 100 and not condition: print nb_try r = pysal.Maxp(wknn3, new_weight, floor = floor_v, floor_variable = np.ones((nb_data, 1)), initial = 40) mid_cond = True; reg_idx = 0; while mid_cond: sum_w = 0 for i in r.regions[reg_idx]: sum_w = sum_w + np.sum(new_weight[i]) if sum_w == 0: mid_cond = False reg_idx = reg_idx + 1 if mid_cond == True: condition = True nb_try = nb_try + 1 if (nb_try%3 == 0): floor_v = floor_v + 1 #print r.regions """ r = pysal.Maxp(wknn3, new_weight, floor = floor_v, floor_variable = np.ones((nb_data, 1)), initial = 99) ### Create geoson file for the map cluster_id = np.zeros(shape=(new_coor.shape[0],1)) nb_cluster = 0 for M_list in r.regions: nb_cluster = nb_cluster + 1 for m_list in M_list: cluster_id[m_list] = nb_cluster print nb_cluster # Centroid points # Polygon preparation feature_list = [] polygon_list = [] idx = 0 for feat in new_coor: food_list = [] f_idx = 0 for f in new_weight[idx]: if f > 0: food_list.append(ref_list[f_idx]) f_idx = f_idx + 1 feature_list.append({ "type": "Feature", "geometry" : { "type": "Point", "coordinates": [feat[0], feat[1]], }, "properties" : { "category": food_list, "cluster": cluster_id[idx][0] } }) polygon_list.append({ "type": "Feature", "geometry" : { "type": "Polygon", "coordinates": [[list_coor[idx]]], }, "properties" : { "category": food_list, "cluster": cluster_id[idx][0] } }) idx = idx + 1 geojson = { "type": "FeatureCollection", "features": feature_list } geojson_2 = { "type": "FeatureCollection", "features": polygon_list } output = open(out_file, 'w') json.dump(geojson, output) output_2 = open(out_file_2, 'w') json.dump(geojson_2, output_2)
import os import pysal import numpy as np examples = os.path.join(os.path.dirname(pysal.__file__), 'examples', 'us_income') input_file = pysal.open(os.path.join(examples, 'usjoin.csv')) pci = np.array([input_file.by_col[str(y)] for y in range(1929, 2010)]) pci = pci.transpose() weights = pysal.open(os.path.join(examples, "states48.gal")).read() maxp = pysal.Maxp(weights, pci, floor=5, floor_variable=np.ones((48, 1)), initial=99) names = input_file.by_col('Name') names = np.array(names) for region in maxp.regions: ids = list(map(int, region)) print(", ".join(names[ids]))
def to_foursquare_geo(in_file, out_file, nb_type): data = json.load(open(in_file)) #### Create clusters # Extract all possible fields and construct weight matrix nb_data = 0 for d in data['hits']['hits']: nb_data = nb_data + 1 nb_type = 181 # This number should be retrieved from Elastic Search coor = np.zeros(shape=(nb_data, 2)) weight = np.zeros(shape=(nb_data, nb_type)) ref_list = [] index = 0 for d in data['hits']['hits']: coor[index][0] = d['_source']['location']["lon"] coor[index][1] = d['_source']['location']["lat"] category = d['_source']['category'] if category not in ref_list: ref_list.append(category) i = ref_list.index(category) weight[index][i] = 1 index = index + 1 # The k value might have to change! wknn3 = pysal.weights.KNN(coor, k=15) # The floor value should also be tuned. r = pysal.Maxp(wknn3, weight, floor=6, floor_variable=np.ones((nb_data, 1)), initial=99) print r.regions ### Create geoson file for the map cluster_id = np.zeros(shape=(nb_data, 1)) nb_cluster = 0 for M_list in r.regions: nb_cluster = nb_cluster + 1 for m_list in M_list: cluster_id[m_list] = nb_cluster idx = 0 for d in data['hits']['hits']: d['_source']['cluster'] = cluster_id[idx][0] idx = idx + 1 print nb_cluster geojson = { "type": "FeatureCollection", "features": [{ "type": "Feature", "geometry": { "type": "Point", "coordinates": [ d['_source']['location']["lon"], d['_source']['location']["lat"] ], }, "properties": { "category": d['_source']['category'], "cluster": d['_source']['cluster'] } } for d in data['hits']['hits']] } output = open(out_file, 'w') json.dump(geojson, output)
def blobs(v, min_pop, floor_var='pop', iterations=10, method='equal votes', weights=[], initial=10, plot=True, savedata=False, plot_values=False, verbose=True): """Create a max-p regions solution for a given shapefile and associated dataset. Builds on pysal.Maxp with improvements to the user interface, verbosity, and mapping. Original problem from "The Max-p-Regions Problem," Duque, Anselin, and Rey, JRS, October 2010, available at http://geography.sdsu.edu/Research/ Projects/IPC/publication/MaxP_authored.pdf. Parameters ---------- v : array array of variables on which to create blobs (for all variables, use ['all']) min_pop : int minimum population in each blob iterations : int number of blobs solutions to create (will return best): 10 by default method : {'equal votes', 'default', 'weighted'} equal votes' by default, can change to 'weighted' weights : array if method='weighted', add weights for variables as an array initial : int number of times to revise each solution (10 by default) plot : boolean will plot the best solution (True by default) savedata : boolean will save a CSV of the blobs data to the root folder (False by default) plot_values : boolean will color-code the plot by the mean of the underlying variables. only makes sense with one variable. default False (plots by ID of the blob) Sample usage ------------ >>> blobs(['all_calls_per1000'], min_pop=10000, plot_values=True) """ solutions = [] top_scores = [] times = [] num_blobs = [] current_time = [] iteration = [] best_score = -1 best_solution = None floor_var_array = np.ones((calls.shape[0],1)) #################### changed this blob_vars = np.array(calls.loc[:,v], np.float64) if len(v) == 1: # add shape to the array blob_vars.shape = (blob_vars.shape[0], 1) print('\n### CREATING BLOBS FROM ' + str(len(v)) + ' VARIABLES ###\n PARAMETERS:\n # Minimum ' + floor_var + ' in each blob: ' + str(int(min_pop)) + '\n # Iterations: ' + str(iterations) + '\n # Method: ' + method + '\n # Plot blobs: ' + str(plot) + '\n # Save blobs data: ' + str(savedata) + '\n') for i in range(0,iterations): start = time.time() r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), floor=min_pop, floor_variable=floor_var_array, initial=initial, verbose=verbose) end = time.time() times.append(end - start) current_time.append(end) current_score = r.objective_function() solutions.append(current_score) num_blobs.append(r.k) if (best_score == -1 or current_score < best_score): best_score = current_score best_solution = r top_scores.append(best_score) iteration.append(i) msg = '\r# ITERATION '+str(i+1)+' \n Score: ' + \ str(round(current_score,2)) + '\n Created '+str(r.k)+' blobs (' + \ str(int(calls.shape[0]/r.k)) + ' tracts per blob)\n Best solution so far: ' + \ str(round(best_score,2)) msg += '\n Time taken: '+str(round(end-start,1))+' seconds ('+ \ str(int(np.mean(times)*(iterations-i-1)))+' seconds remaining)\n' print msg r = best_solution print('\r# BEST SOLUTION: \n Score: '+ str(round(r.objective_function(),2)) + '\n '+str(r.k)+' blobs ('+str(int(calls.shape[0]/r.k))+ ' blocks per blob)') if plot: print(' Plotting...'), # prep for plotting ids=np.array(calls['block_id']).astype(str) if plot_values: r.sort_regions(method='mean') # sort regions by intensity of the variable regions=np.empty(calls.shape[0]) for j in range(0,calls.shape[0]): reg=r.area2region[ids[j]] regions[j]=reg # show blobs we created maps.plot_choropleth(shp_link, regions, type='quantiles', title='Chicago blobs from census tracts\n(min ' + str(int(r.floor)) +' population per blob, ' + str(r.p)+' blobs)', k=r.p, figsize=(6,9)) print('\r \n') #build data structure sr = np.zeros([r.k, len(v)*2+4]) for region in range(0,r.k): # blob ID sr[region][0] = region selectionIDs = [r.w.id_order.index(i) for i in r.regions[region]] m = r.z[selectionIDs, :] # objective function var = m.var(axis=0) sr[region][1] = sum(np.transpose(var)) * len(r.regions[region]) # blob size (number of places in blob) sr[region][2] = len(r.regions[region]) # blob population sr[region][3] = calls.loc[selectionIDs, floor_var].sum() # variable means and standard deviations for j in range(0,len(v)): sr[region][4+j*2] = m[:,j].mean() sr[region][5+j*2] = m[:,j].std() srdf = pd.DataFrame(sr) cols = ['Blob', 'Score', 'Size', floor_var] for j in range(0, len(v)): cols.append(v[j]+'_mean') cols.append(v[j]+'_stdev') srdf.columns = cols if savedata: srdf.to_csv('Blobs data ' + datetime.datetime.now().strftime('%Y%m%d %H%M') + \ '.csv', index=False) return dict(best=r, data=srdf, regions=r.area2region)
import pysal import numpy as np w = sp.Writer() x, y = np.indices((5, 5)) x.shape = (25, 1) y.shape = (25, 1) data = np.hstack([x, y]) weight = np.ones((50, 3)) wknn3 = pysal.weights.KNN(data, k=3) r = pysal.Maxp(wknn3, weight, floor=5, floor_variable=np.ones((50, 1)), initial=99) r.regions print str(r.regions)[1:-1]
point2 = [point2] point3 = [] for x in range(0,size): point3.append([4.8,4.8]) point3 = [point3] w.poly(parts=point1, shapeType=sp.POLYLINE) w.poly(parts=point3, shapeType=sp.POLYLINE) w.poly(parts=point2, shapeType=sp.POLYLINE) """ for i in range(0,10): for j in range(0,10): point = [[[i,j],[i+1,j],[i+1,j+1],[i,j+1],[i,j]]] w.poly(parts=point, shapeType=sp.POLYLINE) weight_list = np.ones((100,3)) point = 0; for i in range(0,10): for j in range(0,10): if i < 5: weight_list[point][1] = 0 point = point + 1 w.save("Document/test4/poly1") w = pysal.weights.Queen.from_shapefile("Document/test4/poly1.shp") pci = np.array([[1,1,1],[0,0,0], [0,1,0], [0,1,0], [0,1,0] ]) r = pysal.Maxp(w, pci, floor = 1, floor_variable = np.ones((5, 1)), initial = 20) r.regions print str(r.regions)[1:-1]
def build_blobs(self): """ Method to create a blobs solution. """ solutions = [] top_scores = [] times = [] num_blobs = [] current_time = [] iteration = [] best_score = -1 best_solution = None if self.floor_var == 'areas': floor_var_array = np.ones((self.d.shape[0], 1)) else: floor_var_array = self.d[self.floor_var] blob_vars = np.array(self.d.loc[:, self.vars_to_use], np.float64) if len(self.vars_to_use) == 1: # add shape to the array blob_vars.shape = (blob_vars.shape[0], 1) print('\n### CREATING BLOBS FROM ' + str(len(self.vars_to_use)) + ' VARIABLES ###\n PARAMETERS:\n # Minimum ' + self.floor_var + ' in each blob: ' + str(int(self.floor)) + '\n # Iterations: ' + str(self.iterations) + '\n # Method: ' + self.method + '\n # Plot blobs: ' + str(self.plot) + '\n # Save blobs data: ' + str(self.savedata) + '\n') for i in range(0,self.iterations): start = time.time() r=ps.Maxp(self.w, self._format_blobs(blob_vars), floor=self.floor, floor_variable=floor_var_array, initial=self.initial, verbose=self.verbose) end = time.time() times.append(end - start) current_time.append(end) current_score = r.objective_function() solutions.append(current_score) num_blobs.append(r.k) if (best_score == -1 or current_score < best_score): best_score = current_score best_solution = r top_scores.append(best_score) iteration.append(i) msg = '\n# ITERATION '+str(i+1)+' \n Score: ' + \ str(round(current_score,2)) + '\n Created '+str(r.k)+' blobs (' + \ str(int(self.d.shape[0]/r.k)) + ' tracts per blob)\n Best solution so far: ' + \ str(round(best_score,2)) msg += '\n Time taken: '+str(round(end-start,1))+' seconds ('+ \ str(int(np.mean(times)*(self.iterations-i-1)))+' seconds remaining)\n' print msg r = best_solution print('\r# BEST SOLUTION: \n Score: '+ str(round(r.objective_function(),2)) + '\n '+str(r.k)+' blobs ('+str(int(self.d.shape[0]/r.k))+ ' tracts per blob)') self.r = r # prep for plotting ids=np.array(self.d[self.id_var]).astype(str) if self.level == 'block': ids = map(str,np.arange(self.d.shape[0])) if self.plot_values: self.r.sort_regions(method='mean') # sort regions by intensity of the variable regions=np.empty(self.d.shape[0]) for j in range(0,self.d.shape[0]): reg=r.area2region[ids[j]] regions[j]=reg self.regions = regions if self.plot: self.plot_blobs() self.build_data_structure(self.savedata)
iterations = 3 # num iterations to find best solution # num_seeds = 6 # num initial seed census tracts ############## #### Run the following code to the end ############## # seeds = np.random.choice(calls['tractce10'], replace=False, size=num_seeds) # iterate to find the best solution blob_vars = np.array(calls.loc[:,v], np.float64) best_score = 10**9 best_solution = None for i in range(0,iterations): r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), floor=min_pop, floor_variable=calls['pop'], initial=10, myverbose=True) if (r.objective_function() < best_score): best_score = r.objective_function() best_solution = r print('iteration '+str(i+1)+' - score: '+str(round(r.objective_function(),2))+ ' ('+str(r.k)+' blobs), best: '+str(round(best_score,2))) # prep for plotting r = best_solution ids=np.array(calls['tractce10']).astype(str) r.sort_regions(method='mean') # uncomment to sort regions by intensity of the variable regions=np.empty(calls.shape[0]) for j in range(0,calls.shape[0]): reg=r.area2region[ids[j]] regions[j]=reg # show blobs we created