示例#1
0
def generateRegions(w,
                    observations=None,
                    minAreas=5,
                    maxp=True,
                    contiguity=False,
                    numRegs=None,
                    compact=False):
    if maxp == True:
        observations = observations.transpose()
        l = len(observations)
        fv = np.ones(l)
        r = pysal.Maxp(w,
                       observations,
                       floor=minAreas,
                       floor_variable=fv,
                       initial=99)
        r.inference()
        regions = r.regions
        order = w.id_order

        return regions, r.pvalue

    else:  # maxp == False
        from pysal.region import Random_Region
        ids = w.id_order
        regions = None

        cardinalities = []
        while np.sum(cardinalities) != len(
                ids):  # and len(set(cardinalities))!=numRegs:
            cardinalities = [
                np.random.randint(minAreas, len(ids)) for i in range(numRegs)
            ]

        if contiguity == False:
            regions = Random_Region(area_ids=ids,
                                    num_regions=numRegs,
                                    cardinality=cardinalities,
                                    compact=compact)
        else:
            regions = Random_Region(area_ids=ids,
                                    num_regions=numRegs,
                                    cardinality=cardinalities,
                                    contiguity=w,
                                    compact=compact)

        return regions, None
示例#2
0
文件: configure.py 项目: sjsrey/blobs
def blobs(vars, min_pop, iterations, method='equal votes', weights=[], 
    initial=10, plot=False):
    solutions = []
    top_scores = []
    times = []
    num_blobs = []
    current_time = []
    iteration = []
    best_score = 10**12
    best_solution = None
    for i in range(0,iterations):
        start = time.time()
        r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), 
            floor=min_pop, floor_variable=calls['pop'], initial=initial)
        end = time.time()
        times.append(end - start)
        current_time.append(end)
        solutions.append(r.objective_function())
        num_blobs.append(r.k)
        if (r.objective_function() < best_score):
            best_score = r.objective_function()
            best_solution = r
        top_scores.append(best_score)
        iteration.append(i)
        print('iteration '+str(i+1)+' - score: '+str(round(r.objective_function(),2))+
        ' ('+str(r.k)+' blobs), best: '+str(round(best_score,2))+', '+
        str(round(end-start,1))+'s')
    if plot:
        # prep for plotting
        r = best_solution
        ids=np.array(calls['tractce10']).astype(str)
        # r.sort_regions(method='mean')  # uncomment to sort regions by intensity of the variable
        regions=np.empty(calls.shape[0])
        for j in range(0,calls.shape[0]):
            reg=r.area2region[ids[j]]
            regions[j]=reg
        # show blobs we created
        maps.plot_choropleth(shp_link, regions, type='quantiles',
            title='Chicago blobs from census tracts\n(min ' + 
                str(r.floor) +' population per blob, ' + 
                str(r.p)+' blobs)', k=r.p, figsize=(6,8))
    return dict(times=times, solutions=solutions, top_scores=top_scores, 
        current_time=current_time, iteration=iteration, best=r)
def return_cluster_pysal(new_coor, new_weight, ref_list, nb_data, list_coor, out_file, out_file_2):
    # The k value might have to change!
    wknn3 = pysal.weights.KNN(new_coor, k = 4)
    wknn3.transform = 'r'
    #print wknn3[0]
    
    """
    # statistic
    np.random.seed(12345)
    g4 = pysal.Gamma(new_weight, wknn3,operation = 'a')#operation=func)
    print g4.g
    # result 0
    print "%.3f"%g4.g_z
    # - 1.9111
    print g4.p_sim_g
    # 0.001
    print g4.min_g
    #  6
    print g4.max_g
    # 260
    print g4.mean_g
    # 38.1761761762
    """
    # The floor value should also be tuned.
    nb_try = 0
    condition = False
    floor_v = 4
    """
    while nb_try < 100 and not condition:
        print nb_try
        r = pysal.Maxp(wknn3, new_weight, floor = floor_v, floor_variable = np.ones((nb_data, 1)), initial = 40)
        mid_cond = True;
        reg_idx = 0;
        while mid_cond:
            sum_w = 0
            for i in r.regions[reg_idx]:
                sum_w = sum_w + np.sum(new_weight[i])
            if sum_w == 0:
                mid_cond = False    
            reg_idx = reg_idx + 1
        if mid_cond == True:
            condition = True
        nb_try = nb_try + 1
        if (nb_try%3 == 0):
            floor_v = floor_v + 1 
    #print r.regions
    """



    
    r = pysal.Maxp(wknn3, new_weight, floor = floor_v, floor_variable = np.ones((nb_data, 1)), initial = 99)

    ### Create geoson file for the map
    cluster_id = np.zeros(shape=(new_coor.shape[0],1))
    nb_cluster = 0
    for M_list in r.regions:
        nb_cluster = nb_cluster + 1
        for m_list in M_list:
            cluster_id[m_list] = nb_cluster
    print nb_cluster
    
    # Centroid points
    # Polygon preparation

    feature_list = []
    polygon_list = []
    idx = 0
    for feat in new_coor:
        food_list = []
        f_idx = 0
        for f in new_weight[idx]:
            if f > 0:
                food_list.append(ref_list[f_idx])
            f_idx = f_idx + 1    
        feature_list.append({
            "type": "Feature",
            "geometry" : {
                "type": "Point",
                "coordinates": [feat[0], feat[1]],
                },
            "properties" : {
                    "category": food_list,
                    "cluster": cluster_id[idx][0]
                }
         })
        polygon_list.append({
            "type": "Feature",
            "geometry" : {
                "type": "Polygon",
                "coordinates": [[list_coor[idx]]],
                },
            "properties" : {
                    "category": food_list,
                    "cluster": cluster_id[idx][0]
                }

            })
        idx = idx + 1

    geojson = {
        "type": "FeatureCollection",
        "features": feature_list
        }

    geojson_2 = {
        "type": "FeatureCollection",
        "features": polygon_list
        }
    output = open(out_file, 'w')
    json.dump(geojson, output)
    output_2 = open(out_file_2, 'w')
    json.dump(geojson_2, output_2)
示例#4
0
import os
import pysal
import numpy as np

examples = os.path.join(os.path.dirname(pysal.__file__), 'examples', 'us_income')

input_file = pysal.open(os.path.join(examples, 'usjoin.csv'))
pci = np.array([input_file.by_col[str(y)] for y in range(1929, 2010)])
pci = pci.transpose()

weights = pysal.open(os.path.join(examples, "states48.gal")).read()
maxp = pysal.Maxp(weights, pci, floor=5, floor_variable=np.ones((48, 1)), initial=99)

names = input_file.by_col('Name')
names = np.array(names)

for region in maxp.regions:
    ids = list(map(int, region))
    print(", ".join(names[ids]))
示例#5
0
def to_foursquare_geo(in_file, out_file, nb_type):

    data = json.load(open(in_file))

    #### Create clusters

    # Extract all possible fields and construct weight matrix
    nb_data = 0
    for d in data['hits']['hits']:
        nb_data = nb_data + 1

    nb_type = 181  # This number should be retrieved from Elastic Search

    coor = np.zeros(shape=(nb_data, 2))
    weight = np.zeros(shape=(nb_data, nb_type))
    ref_list = []
    index = 0
    for d in data['hits']['hits']:
        coor[index][0] = d['_source']['location']["lon"]
        coor[index][1] = d['_source']['location']["lat"]
        category = d['_source']['category']
        if category not in ref_list:
            ref_list.append(category)
        i = ref_list.index(category)
        weight[index][i] = 1
        index = index + 1
    # The k value might have to change!
    wknn3 = pysal.weights.KNN(coor, k=15)
    # The floor value should also be tuned.
    r = pysal.Maxp(wknn3,
                   weight,
                   floor=6,
                   floor_variable=np.ones((nb_data, 1)),
                   initial=99)
    print r.regions

    ### Create geoson file for the map
    cluster_id = np.zeros(shape=(nb_data, 1))
    nb_cluster = 0
    for M_list in r.regions:
        nb_cluster = nb_cluster + 1
        for m_list in M_list:
            cluster_id[m_list] = nb_cluster
    idx = 0
    for d in data['hits']['hits']:
        d['_source']['cluster'] = cluster_id[idx][0]
        idx = idx + 1
    print nb_cluster

    geojson = {
        "type":
        "FeatureCollection",
        "features": [{
            "type": "Feature",
            "geometry": {
                "type":
                "Point",
                "coordinates": [
                    d['_source']['location']["lon"],
                    d['_source']['location']["lat"]
                ],
            },
            "properties": {
                "category": d['_source']['category'],
                "cluster": d['_source']['cluster']
            }
        } for d in data['hits']['hits']]
    }

    output = open(out_file, 'w')
    json.dump(geojson, output)
def blobs(v, min_pop, floor_var='pop', iterations=10, method='equal votes', weights=[], 
    initial=10, plot=True, savedata=False, plot_values=False, verbose=True):
    """Create a max-p regions solution for a given shapefile and associated 
    dataset. Builds on pysal.Maxp with improvements to the user interface, 
    verbosity, and mapping. 

    Original problem from "The Max-p-Regions Problem," Duque, Anselin, and Rey, 
    JRS, October 2010, available at http://geography.sdsu.edu/Research/
    Projects/IPC/publication/MaxP_authored.pdf.

    Parameters
    ----------
    v           : array
                  array of variables on which to create blobs (for all 
                    variables, use ['all'])

    min_pop     : int
                  minimum population in each blob

    iterations  : int
                  number of blobs solutions to create (will return best): 10 by 
                    default

    method      : {'equal votes', 'default', 'weighted'}
                  equal votes' by default, can change to 'weighted'

    weights     : array
                  if method='weighted', add weights for variables as an array

    initial     : int
                  number of times to revise each solution (10 by default)

    plot        : boolean
                  will plot the best solution (True by default)

    savedata    : boolean
                  will save a CSV of the blobs data to the root folder (False 
                    by default)

    plot_values : boolean
                  will color-code the plot by the mean of the underlying 
                    variables. only makes sense with one variable. default 
                    False (plots by ID of the blob)
    
    Sample usage
    ------------

    >>> blobs(['all_calls_per1000'], min_pop=10000, plot_values=True)

    """
    
    solutions = []
    top_scores = []
    times = []
    num_blobs = []
    current_time = []
    iteration = []
    best_score = -1
    best_solution = None
    floor_var_array = np.ones((calls.shape[0],1))   #################### changed this
    blob_vars = np.array(calls.loc[:,v], np.float64)
    if len(v) == 1:
        # add shape to the array
        blob_vars.shape = (blob_vars.shape[0], 1)
    print('\n### CREATING BLOBS FROM ' + str(len(v)) + 
        ' VARIABLES ###\n    PARAMETERS:\n     # Minimum ' + floor_var + ' in each blob: ' + 
        str(int(min_pop)) + '\n     # Iterations: ' + str(iterations) +
        '\n     # Method: ' + method + '\n     # Plot blobs: ' + str(plot) + 
        '\n     # Save blobs data: ' + str(savedata) + '\n')
    for i in range(0,iterations):
        start = time.time()
        r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), 
            floor=min_pop, floor_variable=floor_var_array, initial=initial, verbose=verbose)
        end = time.time()
        times.append(end - start)
        current_time.append(end)
        current_score = r.objective_function()
        solutions.append(current_score)
        num_blobs.append(r.k)
        if (best_score == -1 or current_score < best_score):
            best_score = current_score
            best_solution = r
        top_scores.append(best_score)
        iteration.append(i)
        msg = '\r# ITERATION '+str(i+1)+'                 \n  Score: ' + \
            str(round(current_score,2)) + '\n  Created '+str(r.k)+' blobs (' + \
            str(int(calls.shape[0]/r.k)) + ' tracts per blob)\n  Best solution so far: ' + \
            str(round(best_score,2))
        msg += '\n  Time taken: '+str(round(end-start,1))+' seconds ('+ \
            str(int(np.mean(times)*(iterations-i-1)))+' seconds remaining)\n'
        print msg
    
    r = best_solution
    print('\r# BEST SOLUTION:                      \n  Score: '+
        str(round(r.objective_function(),2)) + 
        '\n  '+str(r.k)+' blobs ('+str(int(calls.shape[0]/r.k))+
        ' blocks per blob)')
    if plot:
        print('  Plotting...'),
        # prep for plotting
        ids=np.array(calls['block_id']).astype(str)
        if plot_values:
            r.sort_regions(method='mean')  # sort regions by intensity of the variable
        regions=np.empty(calls.shape[0])
        for j in range(0,calls.shape[0]):
            reg=r.area2region[ids[j]]
            regions[j]=reg
        # show blobs we created
        maps.plot_choropleth(shp_link, regions, type='quantiles',
            title='Chicago blobs from census tracts\n(min ' + 
                str(int(r.floor)) +' population per blob, ' + 
                str(r.p)+' blobs)', k=r.p, figsize=(6,9))
        print('\r             \n')
    
    #build data structure
    sr = np.zeros([r.k, len(v)*2+4])
    for region in range(0,r.k):
        # blob ID
        sr[region][0] = region
        selectionIDs = [r.w.id_order.index(i) for i in r.regions[region]]
        m = r.z[selectionIDs, :]
        # objective function
        var = m.var(axis=0)
        sr[region][1] = sum(np.transpose(var)) * len(r.regions[region])
        # blob size (number of places in blob)
        sr[region][2] = len(r.regions[region])
        # blob population
        sr[region][3] = calls.loc[selectionIDs, floor_var].sum()
        # variable means and standard deviations
        for j in range(0,len(v)):
            sr[region][4+j*2] = m[:,j].mean()
            sr[region][5+j*2] = m[:,j].std()
    srdf = pd.DataFrame(sr)
    cols = ['Blob', 'Score', 'Size', floor_var]
    for j in range(0, len(v)):
        cols.append(v[j]+'_mean')
        cols.append(v[j]+'_stdev')
    srdf.columns = cols
    if savedata:
        srdf.to_csv('Blobs data ' + datetime.datetime.now().strftime('%Y%m%d %H%M') + \
            '.csv', index=False)
    return dict(best=r, data=srdf, regions=r.area2region)
import pysal
import numpy as np

w = sp.Writer()

x, y = np.indices((5, 5))
x.shape = (25, 1)
y.shape = (25, 1)
data = np.hstack([x, y])

weight = np.ones((50, 3))
wknn3 = pysal.weights.KNN(data, k=3)
r = pysal.Maxp(wknn3,
               weight,
               floor=5,
               floor_variable=np.ones((50, 1)),
               initial=99)

r.regions
print str(r.regions)[1:-1]
示例#8
0
point2 = [point2]

point3 = []
for x in range(0,size):
	point3.append([4.8,4.8])
point3 = [point3]

w.poly(parts=point1, shapeType=sp.POLYLINE)
w.poly(parts=point3, shapeType=sp.POLYLINE)
w.poly(parts=point2, shapeType=sp.POLYLINE)
"""

for i in range(0,10):
	for j in range(0,10):
		point = [[[i,j],[i+1,j],[i+1,j+1],[i,j+1],[i,j]]]
		w.poly(parts=point, shapeType=sp.POLYLINE)
weight_list = np.ones((100,3))
point = 0;
for i in range(0,10):
	for j in range(0,10):
		if i < 5:
			weight_list[point][1] = 0
		point = point + 1

w.save("Document/test4/poly1")
w = pysal.weights.Queen.from_shapefile("Document/test4/poly1.shp")
pci = np.array([[1,1,1],[0,0,0], [0,1,0], [0,1,0], [0,1,0] ])

r = pysal.Maxp(w, pci, floor = 1, floor_variable = np.ones((5, 1)), initial = 20)
r.regions
print str(r.regions)[1:-1]
示例#9
0
    def build_blobs(self):
        """ Method to create a blobs solution.
        """
        solutions = []
        top_scores = []
        times = []
        num_blobs = []
        current_time = []
        iteration = []
        best_score = -1
        best_solution = None
        if self.floor_var == 'areas':
            floor_var_array = np.ones((self.d.shape[0], 1))
        else:
            floor_var_array = self.d[self.floor_var]
        blob_vars = np.array(self.d.loc[:, self.vars_to_use], np.float64)
        
        if len(self.vars_to_use) == 1:
            # add shape to the array
            blob_vars.shape = (blob_vars.shape[0], 1)
        print('\n### CREATING BLOBS FROM ' + str(len(self.vars_to_use)) + 
            ' VARIABLES ###\n    PARAMETERS:\n     # Minimum ' + self.floor_var + ' in each blob: ' + 
            str(int(self.floor)) + '\n     # Iterations: ' + str(self.iterations) +
            '\n     # Method: ' + self.method + '\n     # Plot blobs: ' + str(self.plot) + 
            '\n     # Save blobs data: ' + str(self.savedata) + '\n')

        for i in range(0,self.iterations):
            start = time.time()
            r=ps.Maxp(self.w, self._format_blobs(blob_vars),
                floor=self.floor, floor_variable=floor_var_array, 
                initial=self.initial, verbose=self.verbose)
            end = time.time()
            times.append(end - start)
            current_time.append(end)
            current_score = r.objective_function()
            solutions.append(current_score)
            num_blobs.append(r.k)
            if (best_score == -1 or current_score < best_score):
                best_score = current_score
                best_solution = r
            top_scores.append(best_score)
            iteration.append(i)
            msg = '\n# ITERATION '+str(i+1)+'                 \n  Score: ' + \
                str(round(current_score,2)) + '\n  Created '+str(r.k)+' blobs (' + \
                str(int(self.d.shape[0]/r.k)) + ' tracts per blob)\n  Best solution so far: ' + \
                str(round(best_score,2))
            msg += '\n  Time taken: '+str(round(end-start,1))+' seconds ('+ \
                str(int(np.mean(times)*(self.iterations-i-1)))+' seconds remaining)\n'
            print msg
        
        r = best_solution
        print('\r# BEST SOLUTION:                      \n  Score: '+
            str(round(r.objective_function(),2)) + 
            '\n  '+str(r.k)+' blobs ('+str(int(self.d.shape[0]/r.k))+
            ' tracts per blob)')
        self.r = r
        # prep for plotting
        ids=np.array(self.d[self.id_var]).astype(str)
        if self.level == 'block':
            ids = map(str,np.arange(self.d.shape[0]))
        if self.plot_values:
            self.r.sort_regions(method='mean')  # sort regions by intensity of the variable
        regions=np.empty(self.d.shape[0])
        for j in range(0,self.d.shape[0]):
            reg=r.area2region[ids[j]]
            regions[j]=reg
        self.regions = regions
        if self.plot:
            self.plot_blobs()
        self.build_data_structure(self.savedata)
示例#10
0
文件: configure.py 项目: sjsrey/blobs
iterations = 3   # num iterations to find best solution
# num_seeds = 6  # num initial seed census tracts


##############
#### Run the following code to the end
##############


# seeds = np.random.choice(calls['tractce10'], replace=False, size=num_seeds)
# iterate to find the best solution
blob_vars = np.array(calls.loc[:,v], np.float64)
best_score = 10**9
best_solution = None
for i in range(0,iterations):
    r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), 
        floor=min_pop, floor_variable=calls['pop'], initial=10, myverbose=True)
    if (r.objective_function() < best_score):
        best_score = r.objective_function()
        best_solution = r
    print('iteration '+str(i+1)+' - score: '+str(round(r.objective_function(),2))+
    ' ('+str(r.k)+' blobs), best: '+str(round(best_score,2)))

# prep for plotting
r = best_solution
ids=np.array(calls['tractce10']).astype(str)
r.sort_regions(method='mean')  # uncomment to sort regions by intensity of the variable
regions=np.empty(calls.shape[0])
for j in range(0,calls.shape[0]):
    reg=r.area2region[ids[j]]
    regions[j]=reg
# show blobs we created