Exemplo n.º 1
0
def get_user_msoas_paralel(args):

    user_coord_chunks = args[0]
    cityshape = args[1]
    thread_id = args[2]
    city = args[3]
    outfolder = args[4]
    nnn = len(user_coord_chunks)

    fout = open(outfolder + '/venue_msoa_attributes_' + str(thread_id), 'w')

    for ind, (user, coord) in enumerate(user_coord_chunks.items()):

        #if ind == 50: break
        if ind % 100 == 0:
            print(thread_id, '\t', ind, '/', nnn)

        lat = float(coord[1])
        lng = float(coord[0])

        pnt = Point(lng, lat)
        query_df = cityshape[cityshape.contains(pnt)]

        if query_df.shape[0] == 1:
            msoa = query_df.iloc[0]['msoa11cd']
            fout.write(user + '\t' + msoa + '\n')

    fout.close()
Exemplo n.º 2
0
def get_state_count(data):
    ''' Get state widgets from a single process '''
    state_count = {}
    for index in list(data.index):
        if index % 1000 == 0:
            print("processing at:", index)
        try:
            point = Point(data.loc[index, "lng"], data.loc[index, "lat"])
            for index1 in list(state_map.index):
                state_polygon = state_map.loc[index1, "geometry"]
                state_name = state_map.loc[index1, "NAME"]
                if point.within(state_polygon):
                    if state_count.get(state_name) != None:
                        state_count[state_name] += data.loc[index, "widgets"]
                    else:
                        state_count[state_name] = data.loc[index, "widgets"]
                    break
        except:
            print("ERROR in index: ", index, data.loc[index, "lng"])
    return state_count
Exemplo n.º 3
0
def get_wards_paralel(args):

    venues_coord_chunks = args[0]
    cityshape = args[1]
    thread_id = args[2]
    bbox = args[3]
    city = args[4]
    outfolder = args[5]
    nnn = len(venues_coord_chunks)

    fout = open(outfolder + '/venue_ward_attributes_' + str(thread_id), 'w')

    for ind, (venue, coord) in enumerate(venues_coord_chunks.items()):

        if ind % 100 == 0:
            print(thread_id, '\t', ind, '/', nnn)

        lat = float(coord[1])
        lng = float(coord[0])

        #if 2 == 2:

        print(lat, lng, '\t', bbox, check_box(bbox, city, lat, lng))

        if check_box(bbox, city, lat, lng):

            pnt = Point(lng, lat)
            query_df = cityshape[cityshape.contains(pnt)]
            if query_df.shape[0] == 1:

                #try:
                ward, polygon = (query_df.iloc[0]['GSS_CODE'],
                                 query_df.iloc[0]['geometry'])

                bounds = polygon.bounds
                lng0 = str(bounds[0])
                lat0 = str(bounds[1])
                lng1 = str(bounds[2])
                lat1 = str(bounds[3])
                length = str(polygon.length)
                area = str(polygon.area)

                fout.write('\t'.join([
                    venue,
                    str(lng),
                    str(lat), ward, lng0, lat0, lng1, lat1, length, area
                ]) + '\n')

                #except:
                #    pass

    fout.close()
Exemplo n.º 4
0
def coordinates_to_msoa(lats, lons, cityshape):

    poly = (0, 0)

    try:
        pnt = Point(lons, lats)
        query_df = cityshape[cityshape.contains(pnt)]
        if query_df.shape[0] == 1:
            poly = (query_df.iloc[0]['msoa11cd'], query_df.iloc[0]['geometry'])
    except Exception as exception:
        pass

    return poly
Exemplo n.º 5
0
 def true_local_pop(self, x, y, r):
     """
     Calculates local population by geometric intersection.
     More precise but much slower than the rasterization-based method; included for validation.
     """
     bounds = Point((x, y)).buffer(r)
     pop = 0
     for fid in list(self.vtd_idx.intersection(bounds.bounds)):
         if getattr(self.df.iloc[fid], 'geometry').intersects(bounds):
             intersect = getattr(
                 self.df.iloc[fid],
                 'geometry').buffer(0).intersection(bounds).area
             pop += self.total_pop[fid] * (
                 intersect / getattr(self.df.iloc[fid], 'geometry').area)
     return pop
flowlines = gpd.GeoDataFrame.from_file(
    "H:/NHDPlusV21/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDFlowline.shp"
)
catchments = gpd.GeoDataFrame.from_file(
    "H:/NHDPlusV21/NHDPlusPN/NHDPlus17/NHDPlusCatchment/Catchment.shp")

for index, row in flowlines.iterrows():
    if row[flowlines._geometry_column_name].geom_type != 'LineString':
        print(row['COMID'])

flowlines.loc[flowlines['COMID'] == 947050377, 'geometry']
flowlines = flowlines[flowlines.COMID != 947050377].copy(deep=True)
COMs = flowlines['COMID']
endpts = gpd.GeoSeries(
    [Point(list(pt['geometry'].coords)[-1]) for i, pt in flowlines.iterrows()])
endpts = gpd.GeoDataFrame(endpts)
endpts = endpts.rename(columns={0: 'geometry'}).set_geometry('geometry')
# Bring in the COMIDs from original flowlines
result = pd.concat([endpts, COMs], axis=1, ignore_index=True)
result = result[[1, 0]]
result = gpd.GeoDataFrame(result)
result = result.rename(columns={
    1: 'COMID',
    0: 'geometry'
}).set_geometry('geometry')
result.to_file("H:/WorkingData/Junk/result_test.shp")  # just to check results

# restrict to catchment endpoints
result = result[result['COMID'].isin(catchments['FEATUREID'])].dropna()
result["Lon"] = result.centroid.map(lambda p: p.x)
Exemplo n.º 7
0
    def allocate(self, r_P, theta, to_vtd=None):
        """
        Coordinates are given in the (r_P, θ) system, where r_P is a proportion of the population (0-1) and θ is a direction (in radians).
        """
        if random() < P_RANDOM_ALLOC and self.vtd_by_district[
                self.current_district] and not to_vtd:
            border_vtds = []
            self.allocate(0,
                          0,
                          to_vtd=choice(
                              self.graph.unallocated_on_border(
                                  self.current_district)))
            return

        if not to_vtd:
            r_P_abs = min(max(0, r_P), 1) * self.total_pop.sum()
            r_G = self.people_to_geo(r_P_abs)
            to_x = min(max(self.min_x,
                           r_G * np.cos(theta) + self.x), self.max_x)
            to_y = min(max(self.min_y,
                           r_G * np.sin(theta) + self.y), self.max_y)
            p = Point((to_x, to_y))

            vtd_idx = None
            for fid in list(self.vtd_idx.intersection(p.bounds)):
                # API: https://streamhsacker.com/2010/03/23/python-point-in-polygon-shapely/
                if getattr(self.df.iloc[fid], 'geometry').contains(p):
                    vtd_idx = fid
                    break

            if vtd_idx in self.vtd_by_district[self.current_district]:
                self.x = to_x
                self.y = to_y
                return
            elif not vtd_idx or vtd_idx not in self.vtd_by_district[0]:
                return  # already allocated or out of bounds
        else:
            vtd_idx = to_vtd
            to_x = self.centroids[0][vtd_idx]
            to_y = self.centroids[1][vtd_idx]
        """
        Algorithm for allocating VTDs:
        1. Figure out if the county, or the remaining unallocated part of the county, 
           can be allocated wholly to the current district. This requires:

           a. The remaining county is contiguous to the current district.
           b. The remaining county’s population plus the current district’s population 
              less than or equal to the expected number of people per district, ± some very small ϵ.

              If the allocating the county results in two isolated regions of whitespace, 
              the smaller region of whitespace will be allocated to the district, and the population
              of this region will be added to the remaining county's population when checking the 
              equal population constraint. 

            If the remaining county can be allocated, do so. 
            If contiguity is violated, abort. Otherwise, proceed to step 2.
        """
        county_pop = 0
        county = self.df.iloc[vtd_idx]['county']
        for idx in self.unallocated_in_county[county]:
            county_pop += self.total_pop[idx]
        if not self.graph.contiguous(self.unallocated_in_county[county]):
            return  # no connection between county and current district
        if to_vtd:
            self.x = to_x
            self.y = to_y
            self.i += 1
        if self.update(self.unallocated_in_county[county], county_pop):
            self.i += 1
            return  # whole county allocated
        """
        Algorithm for allocating VTDs (cont'd):

        2. If updating fails due to population constraints, remove cities (whole or fractional) on the border of the allocation.
        Do this greedily until the constraints are satisfied, removing the cities farthest from (x,y) first.
    
        3. If updating fails due to population constraints, remove VTDs on the border of the allocation.
        Do this greedily until the constraints are satisfied, removing the VTDs farthest from (x,y) first.
        """
        vtds = copy(self.unallocated_in_county[county])

        tested = set([])
        tested_cities = set([])
        distances = np.flip(
            np.argsort(
                np.sqrt((self.centroids[0][vtds] - self.x)**2 +
                        (self.centroids[1][vtds] - self.y)**2)))
        border_vtds = set(self.graph.border_vtds(vtds))
        last_idx = 0
        while len(tested_cities) < len(
                set([self.vtd_to_city[vtd]
                     for vtd in vtds])) and len(tested) < len(vtds):
            farthest_vtd = None
            for idx, vtd_idx in enumerate(distances[last_idx:]):
                if vtds[vtd_idx] not in tested and self.vtd_to_city[vtds[
                        vtd_idx]] not in tested_cities:  #and vtds[vtd_idx] in border_vtds:
                    farthest_vtd = vtds[vtd_idx]
                    last_idx = idx + 1
                    break
            if not farthest_vtd:
                break

            test_vtds = copy(vtds)
            removed = []
            for city_vtd in self.unallocated_in_city[
                    self.vtd_to_city[farthest_vtd]]:
                if city_vtd in test_vtds:
                    test_vtds.remove(city_vtd)
                    removed.append(city_vtd)

            if self.graph.contiguous(test_vtds):
                if self.update(test_vtds,
                               sum([self.total_pop[vtd]
                                    for vtd in test_vtds])):
                    return
                else:
                    for vtd in removed:
                        vtds.remove(vtd)
                    tested = set([])
                    distances = np.flip(
                        np.argsort(
                            np.sqrt((self.centroids[0][vtds] - self.x)**2 +
                                    (self.centroids[1][vtds] - self.y)**2)))
                    border_vtds = set(self.graph.border_vtds(vtds))
                    last_idx = 0
            else:
                tested.add(farthest_vtd)
                tested_cities.add(self.vtd_to_city[farthest_vtd])

        # TODO clean up to avoid duplication
        tested = set([])  # TODO should this be here?
        distances = np.flip(
            np.argsort(
                np.sqrt((self.centroids[0][vtds] - self.x)**2 +
                        (self.centroids[1][vtds] - self.y)**2)))
        border_vtds = set(self.graph.border_vtds(vtds))
        last_idx = 0
        while len(vtds) > 0 and len(tested) < len(vtds):
            farthest_vtd = None
            for idx, vtd_idx in enumerate(distances[last_idx:]):
                if vtds[vtd_idx] not in tested and vtds[vtd_idx] in border_vtds:
                    farthest_vtd = vtds[vtd_idx]
                    last_idx = idx + 1
                    break
            if not farthest_vtd:
                break

            test_vtds = copy(vtds)
            test_vtds.remove(farthest_vtd)
            if self.graph.contiguous(test_vtds):
                if self.update(test_vtds,
                               sum([self.total_pop[vtd]
                                    for vtd in test_vtds])):
                    return
                else:
                    vtds.remove(farthest_vtd)
                    tested = set([])
                    distances = np.flip(
                        np.argsort(
                            np.sqrt((self.centroids[0][vtds] - self.x)**2 +
                                    (self.centroids[1][vtds] - self.y)**2)))
                    border_vtds = set(self.graph.border_vtds(vtds))
                    last_idx = 0
            else:
                tested.add(farthest_vtd)

        # last resort: allocate a single VTD
        if self.graph.contiguous([vtd_idx
                                  ]) and vtd_idx in self.vtd_by_district[0]:
            self.update([vtd_idx], self.total_pop[vtd_idx])