Exemplo n.º 1
0
def inreg_db(argo_db):
    crs = {"init": "epsg:4326"}
    fix_lon = argo_db.copy()
    fix_lon["lon"] = fix_lon["lon"].apply(lambda x: x - 360 if x > 180 else x)
    geometry = [Point(xy) for xy in zip(fix_lon["lon"], fix_lon["lat"])]
    starts = gpd.GeoDataFrame(fix_lon, crs=crs, geometry=geometry)

    SA_200 = gpd.read_file("/data/users/grivera/Shapes/costa_200mn_mask.shp")
    SA_100 = gpd.read_file("/data/users/grivera/Shapes/costa_100mn_mask.shp")
    SA_50 = gpd.read_file("/data/users/grivera/Shapes/costa_50mn_mask.shp")
    #     SA_300['geometry'] = SA_300.geometry.buffer(1)
    pointIn200 = sjoin(starts, SA_200, how="left", op="within")
    pointIn200 = pointIn200.dropna()
    pointIn100 = sjoin(starts, SA_100, how="left", op="within")
    pointIn100 = pointIn100.dropna()
    pointIn50 = sjoin(starts, SA_50, how="left", op="within")
    pointIn50 = pointIn50.dropna()

    argo_db["in200"] = "0"
    argo_db["in100"] = "0"
    argo_db["in50"] = "0"

    argo_db.loc[pointIn200.index, "in200"] = "1"
    argo_db.loc[pointIn100.index, "in100"] = "1"
    argo_db.loc[pointIn50.index, "in50"] = "1"
    return argo_db
Exemplo n.º 2
0
 def test_sjoin_values(self):
     # GH190
     self.polydf.index = [1, 3, 4, 5, 6]
     df = sjoin(self.pointdf, self.polydf, how='left')
     self.assertEquals(df.shape, (21,8))
     df = sjoin(self.polydf, self.pointdf, how='left')
     self.assertEquals(df.shape, (12,8))
Exemplo n.º 3
0
    def _get_HU8_units_for_gdf(self, gdf: GDF) -> GDF:
        """Get HU8 units that intersect gdf

        Args:
            - gdf: GeoDataFrame to intersect with

        Returns:
            GeoDataFrame of HU8 boundaries that intersect gdf
        """
        gdf = gdf.to_crs(epsg=4326)

        # First find HU2 units that intersect gdf
        intersecting_hu2 = []
        for hu2_id in self.hu2_list:
            hu2 = self._load_HU8_boundaries(hu2_id=hu2_id, region_size='HU2')
            hu2 = hu2.to_crs(epsg=4326)
            intersecting_hu2.append(sjoin(hu2, gdf, how='inner'))

        int_hu2_gdf = gpd.GeoDataFrame(pd.concat(intersecting_hu2))
        hu2_ids = int_hu2_gdf['HUC2'].values

        # Npw just look within the large regions that I know gdf is in
        intersecting_hu8 = []
        for hu2_id in hu2_ids:
            hu8 = self._load_HU8_boundaries(hu2_id=hu2_id, region_size='HU8')
            hu8 = hu8.to_crs(epsg=4326)
            intersecting_hu8.append(sjoin(hu8, gdf, how='inner'))

        return gpd.GeoDataFrame(pd.concat(intersecting_hu8))
Exemplo n.º 4
0
 def test_sjoin_values(self):
     # GH190
     self.polydf.index = [1, 3, 4, 5, 6]
     df = sjoin(self.pointdf, self.polydf, how='left')
     self.assertEquals(df.shape, (21, 8))
     df = sjoin(self.polydf, self.pointdf, how='left')
     self.assertEquals(df.shape, (12, 8))
Exemplo n.º 5
0
    def _hydro_point(self, hydro, files, buffer):
        point = hydro.read_files(files=files, layer='NHDPoint')

        SPRING = 45800
        WATERFALL = 48700
        WELL = 48800
        keep = [SPRING, WATERFALL]
        point = point[point['FCode'].isin(keep)]

        point = sjoin(point, buffer, how='inner')
        len(point)
        point = to_2d(point)
        point

        point
        if len(point) > 0:
            raise NotImplementedError('Water points near trail')

        areal = hydro.read_files(files=files, layer='NHDArea')
        areal = sjoin(areal, trail, how='inner')
        if len(point) > 0:
            raise NotImplementedError('Areal near trail')

        w_areal = hydro.read_files(files=files, layer='NHDWaterbody')
        w_areal = sjoin(w_areal, trail, how='inner')
        if len(point) > 0:
            raise NotImplementedError('Areal near trail')

        len(w_areal)
        self.buffer
Exemplo n.º 6
0
    def download(self,
                 trail: gpd.GeoDataFrame,
                 buffer_dist=None,
                 buffer_unit='mile',
                 overwrite=False):
        """Download polygon shapefile and intersect with PCT track

        Args:
            - trail: gdf of trail to use to find polygons that intersect
            - buffer_dist: distance to use for trail buffer when intersecting with polygons. By default is None, so no buffer will be used.
            - buffer_unit: unit to use for buffer
            - overwrite: whether to overwrite existing data
        """
        assert self.save_dir is not None, 'self.save_dir must be set'
        assert self.url is not None, 'self.url must be set'
        assert self.filename is not None, 'self.filename must be set'

        # Cache original download in self.raw_dir
        parsed_url = urlparse(self.url)
        raw_fname = Path(parsed_url.path).name
        raw_path = self.raw_dir / raw_fname
        if overwrite or (not raw_path.exists()):
            urlretrieve(self.url, raw_path)

        # Now load the saved file as a GeoDataFrame
        with open(raw_path, 'rb') as f:
            with fiona.BytesCollection(f.read()) as fcol:
                crs = fcol.crs
                gdf = gpd.GeoDataFrame.from_features(fcol, crs=crs)

        # Reproject to WGS84
        gdf = gdf.to_crs(epsg=4326)

        # Use provided `trail` object
        trail = trail.to_crs(epsg=4326)

        # Intersect with the trail
        if buffer_dist is not None:
            buf = geom.buffer(trail, distance=buffer_dist, unit=buffer_unit)
            # Returned as GeoSeries; coerce to GDF
            if not isinstance(buf, gpd.GeoDataFrame):
                buf = gpd.GeoDataFrame(geometry=buf)
                buf = buf.to_crs(epsg=4326)

            intersection = sjoin(gdf, buf, how='inner')
        else:
            intersection = sjoin(gdf, trail, how='inner')

        # Make sure I have valid geometries
        intersection = geom.validate_geom_gdf(intersection)

        # Do any specific steps, to be overloaded in subclasses
        intersection = self._post_download(intersection)

        # Save to GeoJSON
        self.save_dir.mkdir(exist_ok=True, parents=True)
        intersection.to_file(self.save_dir / self.filename, driver='GeoJSON')
Exemplo n.º 7
0
 def test_sjoin_right(self):
     # the inverse of left
     df = sjoin(self.pointdf, self.polydf, how="right")
     df2 = sjoin(self.polydf, self.pointdf, how="left")
     self.assertEquals(df.shape, (12, 8))
     self.assertEquals(df.shape, df2.shape)
     for i, row in df.iterrows():
         self.assertEquals(row.geometry.type, 'MultiPolygon')
     for i, row in df2.iterrows():
         self.assertEquals(row.geometry.type, 'MultiPolygon')
Exemplo n.º 8
0
    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        self.assertEquals(df.shape, (21,8))
        self.assertEquals(df.ix[1]['BoroName'], 'Staten Island')

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        self.assertEquals(df.shape, (21, 8))
        self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
Exemplo n.º 9
0
 def test_sjoin_right(self):
     # the inverse of left
     df = sjoin(self.pointdf, self.polydf, how="right")
     df2 = sjoin(self.polydf, self.pointdf, how="left")
     self.assertEquals(df.shape, (12, 8))
     self.assertEquals(df.shape, df2.shape)
     for i, row in df.iterrows():
         self.assertEquals(row.geometry.type, 'MultiPolygon')
     for i, row in df2.iterrows():
         self.assertEquals(row.geometry.type, 'MultiPolygon')
Exemplo n.º 10
0
    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        self.assertEquals(df.shape, (21, 8))
        self.assertAlmostEquals(df.ix[1]['Shape_Leng'], 330454.175933)

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        self.assertEquals(df.shape, (21, 8))
        self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
Exemplo n.º 11
0
    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        self.assertEquals(df.shape, (21,8))
        self.assertAlmostEquals(df.ix[1]['Shape_Leng'], 330454.175933)

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        self.assertEquals(df.shape, (21, 8))
        self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
        def compute_spatial_join(df):
            df = gpd.GeoDataFrame(df,
                                  geometry=gpd.points_from_xy(df.lon, df.lat),
                                  crs="epsg:4326")

            df = sjoin(df, gdf_regions, how='left')
            return df[['nuts_id', 'amenity', 'osm_id']]
Exemplo n.º 13
0
def print_cid_count(node_array_x, node_array_y):
  
  gridx = np.linspace(300000, 800000, 5)
  gridy = np.linspace(3700000, 5500000, 5)
  grid, _, _ = np.histogram2d(node_array_x, node_array_y, bins=[gridx, gridy])
  
  # plotting
  # plt.figure(figsize=(9, 7), dpi=90, facecolor='w', edgecolor='k')
  # plt.plot(node_array_x, node_array_y, 'ro')
  # plt.grid(True)
  # plt.figure(figsize=(9, 7), dpi=90, facecolor='w', edgecolor='k')
  # plt.pcolormesh(gridx, gridy, grid)
  # plt.plot(node_array_x, node_array_y, 'ro')
  # plt.colorbar()
  # plt.show()

  points = gpd.GeoDataFrame({"x":node_array_x,"y":node_array_y})
  points['geometry'] = points.apply(lambda p: Point(p.x, p.y), axis=1)
  # print(points.head(2))

  # np mesh-grid to shapely polygons
  hlines = [((x1, yi), (x2, yi)) for x1, x2 in list(zip(gridx[:-1], gridx[1:])) for yi in gridy]
  vlines = [((xi, y1), (xi, y2)) for y1, y2 in zip(gridy[:-1], gridy[1:]) for xi in gridx]
  polys = list(polygonize(MultiLineString(hlines + vlines)))
  grids = list(polygonize(MultiLineString(hlines + vlines)))
  cid = [i for i in range(len(grids))]
  grid = gpd.GeoDataFrame({"cid":cid,"geometry":polys})
  # print(grid.head(2))

  # number of points in polygons
  pointInPolys = sjoin(points, grid, how='left')
  print(pointInPolys.groupby(['cid']).size().reset_index(name='count'))
Exemplo n.º 14
0
def spatial_join(sm_geom, lg_geom):
    """
    Spatially join two geographies adding data from
    the larger geometry to the smaller.  Then add the
    geometry from the smaller geographical unit back
    onto the new dataset one.  Returns a GeoDataFrame.
    
    Parameters
    ----------
    sm_geom : GeoDataFrame
        GeoDataFrame to receive data

    lg_geom : GeoDataFrame
        Large GeoDataFrame to apply data

    """
    sm_original = sm_geom
    
    sm_geom = sm_geom.to_crs(lg_geom.crs)
    
    sm_geom['geometry'] = sm_geom['geometry'].centroid

    df = sjoin(sm_geom, lg_geom, how="left", op="within")
    
    df.drop(['geometry', 'index_right'], axis=1, inplace=True)
    
    df = df.join(pd.DataFrame(sm_original['geometry'], columns=['geometry']))

    return df
Exemplo n.º 15
0
 def createInlandList(inputDF):
     '''Requires TimeStep be populated; run before calc maxwindspeed.'''
     TimeStepList = []
     try:
         point = geopandas.GeoDataFrame(
             inputDF,
             geometry=geopandas.points_from_xy(
                 inputDF.Longitude, inputDF.Latitude)).copy()
         point.crs = "epsg:4326"
         try:
             #polygonPath = './src/assets/spatial/inlandPolygon.shp'
             polygonPath = './src/assets/spatial/inlandPolygon.json'
             polygon = geopandas.GeoDataFrame.from_file(polygonPath)
         except Exception as e:
             print('polygon issue')
             print(polygonPath)
             print(e)
         pointInPolys = sjoin(point, polygon, how='left')
         inlandPoints = pointInPolys[
             pointInPolys.index_right.notnull()].copy()
         inlandPointsDF = pd.DataFrame(
             inlandPoints.drop(columns='geometry'))
         TimeStepList = inlandPointsDF['TimeStep'].to_list()
     except Exception as e:
         print('inlandlist issue')
         print(e)
     return TimeStepList
Exemplo n.º 16
0
 def test_sjoin_left(self):
     df = sjoin(self.pointdf, self.polydf, how='left')
     self.assertEquals(df.shape, (21,8))
     for i, row in df.iterrows():
         self.assertEquals(row.geometry.type, 'Point')
     self.assertTrue('pointattr1' in df.columns)
     self.assertTrue('BoroCode' in df.columns)
Exemplo n.º 17
0
def get_neighborhood(locations, neighborhoods_geom):
    points = gpd.GeoDataFrame()
    points['geom'] = locations.apply(lambda x: Point(x['lng'], x['lat']),
                                     axis=1)
    points = points.set_geometry('geom')
    result = sjoin(points, neighborhoods_geom, how='left')
    return result['Name']
Exemplo n.º 18
0
def copy_csd_data(gdf, csd):
    """
    Copy csd field data/calculate geometry data to geodataframe

    Attributes
    ----------
    gdf: geodataframe
        Geodataframe containing the building footprint data
    csd: geodataframe
        Geodataframe containing the Census Subdivision data

    Returns
    -------
    gdf: geodataframe
        An updated geodataframe with new fields containing csd and geometry data.
    """
    gdf_centroids = gpd.GeoDataFrame(gdf.centroid,
                                     geometry=gdf.centroid,
                                     crs=gdf.crs)

    gdf_centroids_csd_join = sjoin(gdf_centroids, csd, how='left', op='within')

    gdf = gdf.to_crs('epsg:4326')

    gdf['CSDUID'] = gdf_centroids_csd_join['CSDUID']
    gdf['CSDNAME'] = gdf_centroids_csd_join['CSDNAME']
    gdf['Shape_Area'] = gdf_centroids_csd_join.geometry.area
    gdf['Shape_Leng'] = gdf_centroids_csd_join.geometry.length

    # Update joined centroids dataframe to epsg:4326 to provide DD values
    gdf_centroids_csd_join = gdf_centroids_csd_join.to_crs('epsg:4326')
    gdf['Longitude'] = gdf_centroids_csd_join.geometry.x
    gdf['Latitude'] = gdf_centroids_csd_join.geometry.y

    return gdf
Exemplo n.º 19
0
def copy_csduid(gdf, csd):
    """
    Copy csd field data/calculate geometry data to geodataframe

    Attributes
    ----------
    gdf: geodataframe
        Geodataframe containing the building footprint data
    csd: geodataframe
        Geodataframe containing the Census Subdivision data

    Returns
    -------
    gdf: geodataframe
        An updated geodataframe with new fields containing csd and geometry data.
    """
    gdf_centroids = gpd.GeoDataFrame(gdf.centroid,
                                     geometry=gdf.centroid,
                                     crs=gdf.crs)

    gdf_centroids_csd_join = sjoin(gdf_centroids, csd, how='left', op='within')

    gdf = gdf.to_crs('epsg:4326')

    gdf['CSDUID'] = gdf_centroids_csd_join['CSDUID']

    return gdf
Exemplo n.º 20
0
def brdrPctFull(zns, brdr, ncol, acol='AreaSqKM'):
    '''
    Arguments
    ---------
    zns      : geoDF of basin polygons
    brdr     : geoDF of CONUS polygon
    ncol     : name of the column that uniquely identifies zns polygons    
    acol     : name of column that holds area (sq. KM)
    '''
    # move poly to albers, need to stay in this CRS to cal. area later
    if brdr.crs != zns.crs:
        brdr.to_crs(zns.crs,inplace=True)
    touch = sjoin(zns,brdr,op='within')
    nwin = zns.ix[~zns[ncol].isin(touch[ncol])].copy()
    if len(nwin) == 0:
        return pd.DataFrame()    
    tot = pd.DataFrame()
    for idx, row in nwin.iterrows():
        p = gpd.GeoDataFrame({ncol: [row[ncol]],
                          acol: [row[acol]]},
                          geometry=[row.geometry],
                          crs=nwin.crs)
        clip = gpd.overlay(brdr, p, how='intersection')
        if len(clip) == 0:
            p['CatPctFull'] = 0
            tot = pd.concat([tot,p.set_index(ncol)[['CatPctFull']]])
        else:
            out = clip.dissolve(by=ncol)
            out['Area_CONUS'] = out.geometry.area * 1e-6    
            out['CatPctFull'] = (out['Area_CONUS'] / out[acol]) * 100
            tot = pd.concat([tot,out[['CatPctFull']]])
    assert len(tot) == len(nwin)
    return tot
Exemplo n.º 21
0
def main():
	# Read in Data
	grid = GeoDataFrame.from_file(igrid)
	points = pd.read_csv(ipoints)

	# create geopoints
	geopoints = points2geo(points, lat, lon)

	# match projection info:
	## Points - should already be in wgs84
	geopoints.crs =wgs84
	geopoints['geometry'] = geopoints['geometry'].to_crs(epsg=4326)
	## Grid - project from meters to wgs84
	grid.crs = gridproj
	grid['geometry'] = grid['geometry'].to_crs(epsg=4326)
	# create uid to groupby 
	grid['id'] = [i for i in range(len(grid))] 

	# Spatial join points to grid
	join_inner_df = sjoin(grid, geopoints, how="inner")
	# Group by the uid and geometry - return mean
	join_inner_df = join_inner_df.groupby(['id','geometry'])['Decibel'].mean()
	# join_inner_df = join_inner_df.groupby(['id','geometry'])['Decibel'].max()

	# Create geodataframe & reset the index of the file
	output = GeoDataFrame(join_inner_df)
	output = output.reset_index()

	# output

	# write to file
	output.to_file(ofile)
Exemplo n.º 22
0
def pts_poly_join(pts, poly, poly_id_col):
    """
    Simple function to join the attributes of the polygon to the points. Specifically for an ID field in the polygon.

    Parameters
    ----------
    pts: GeoDataFrame
        A GeoDataFrame of points with the site names as the index.
    poly: GeoDataFrame
        A GeoDataFrame of polygons with the site names as the index.
    poly_id_col: str or list of str
        The names of the columns to join.

    Returns
    -------
    GeoDataFrame
    """
    if isinstance(poly_id_col, str):
        poly_id_col = [poly_id_col]
    cols = poly_id_col.copy()
    cols.extend(['geometry'])
    poly2 = poly[cols].copy()
    poly3 = poly2.dissolve(poly_id_col).reset_index()

    join1 = sjoin(pts.copy(), poly3.copy(), how='inner', op='within')
    cols = set(pts.columns)
    cols.update(set(poly3.columns))
    join1.drop([i for i in join1.columns if i not in cols], axis=1, inplace=True)

    return join1, poly3
Exemplo n.º 23
0
def add_loc_ocean2df(df=None, LatVar='lat', LonVar='lon'):
    """
    Add the ocean of a location to dataframe

    Parameters
    -------
    df (pd.DataFrame): DataFrame of data
    LatVar (str): variable name in DataFrame for latitude
    LonVar (str): variable name in DataFrame for longitude

    Returns
    -------
    (pd.DataFrame)
    """
    from geopandas.tools import sjoin
    # Get the shapes for the ocean
    featurecla='ocean'
    group = AC.get_shapes4oceans(rtn_group=True, featurecla=featurecla)
    # Turn the dataframe into a geopandas dataframe
    gdf = geopandas.GeoDataFrame(
        df, geometry=geopandas.points_from_xy(df[LonVar], df[LatVar]))
    # Work out if any of the points are within the polys
    pointInPolys = sjoin(gdf, group, how='left')
    # Check how many were assigned to a region
    Nnew = float(pointInPolys['name'].dropna().shape[0])
    N = float(df.shape[0])
    if N != Nnew:
        pstr = 'WARNING: Only {:.2f}% assigned ({} of {})'
        print( pstr.format( (Nnew/N)*100, int(Nnew), int(N)) )
    # Add the ocean assignment
    df[featurecla] = pointInPolys['name'].values
    return df
Exemplo n.º 24
0
def points_in_shp(points_list, shapefile_gpd):
    '''
    Check if the points_list generated by libpypack.visualization.generate_maps.lat_lon_to_points()
    is in the Shapefile.

    Parameters
    ----------
    point_list: list
        A list of all lat, lon points generated by:
        libpypack.visualization.generate_maps.lat_lon_to_points()

    shapefile_gpd: dataframe
        A dataframe created from a Shapefile.


    Returns
    -------
    : list, Pandas DataFrame
        pointInPolys: A list of all points that are contained in the Shapefile.
        grouped: A DataFrame which has been grouped.

    '''
    pnts = geopandas.GeoDataFrame(geometry=points_list,
                                  index=range(0, len(points_list)))
    pointInPolys = sjoin(pnts, shapefile_gpd, how='left')
    grouped = pointInPolys.groupby('index_right', as_index=False)

    return pointInPolys, grouped
Exemplo n.º 25
0
def covert():

    # coordinates
    coordinates = request.files['file']
    df = pd.read_csv(coordinates)

    # convert to geographic data
    tmp = df[['Longitude','Latitude']].dropna()
    geometry = [Point(xy) for xy in zip(tmp.Longitude, tmp.Latitude)]
    crs = {'init': 'epsg:4326'}  # http://www.spatialreference.org/ref/epsg/2263/
    geo_df = gp.GeoDataFrame(tmp, crs=crs, geometry=geometry)

    # shapefile
    print(request.form)
    #shfile = request.files['shfile']
    #filename = secure_filename(shfile.filename)
    #shfile.save('data/'+filename)
    print(request.form['shfile'])
    if request.form['shfile'] == 'standard':
        filename = 'W:/OSZ/OMXF/SHAPEFILES/Standardized_admin_areas/ADM2/global_adm2.shp'
    if request.form['shfile'] == 'RBD':
        filename = 'W:/OSZ/OMXF/SHAPEFILES/RBD_GAUL_REV2019/rbd_vam_cod_bnd_admin_level_2_gaul_revised_20190304.shp'

    shpf = gp.GeoDataFrame.from_file(filename)

    pointInPolys = sjoin(geo_df, shpf, how='left')

    output = pd.DataFrame(pointInPolys.drop('geometry', axis=1))

    df = df.merge(output, on=['Longitude','Latitude'], how='left')

    resp = make_response(df.to_csv())
    resp.headers["Content-Disposition"] = "attachment; filename=export.csv"
    resp.headers["Content-Type"] = "text/csv"
    return resp
Exemplo n.º 26
0
def get_stations(basin):
    """retrieves list of stations within a basin.

    Parameters
    ----------
    basin : str
        Name of basin to get hydrological stations.

    Returns
    -------
    list
        list containing stations ID.

    """
    points = gpd.GeoDataFrame.from_file(os.path.join(os.getcwd(),
                                                     'Station',
                                                     'stations.shp'))
    polys = gpd.GeoDataFrame.from_file(os.path.join(os.getcwd(),
                                                    'basins',
                                                    'rbasin_polygon.shp'))
    poly_subset = polys[polys['BNAME'] == basin]
    pointInPolys = sjoin(points,
                         poly_subset,
                         how='left')
    grouped = pointInPolys.groupby('index_right')
    list_of_stations = list(grouped)
    basin_stations = list_of_stations[0][1]
    basin_stations_list = basin_stations['ID'].tolist()
    return list(set(basin_stations_list))
Exemplo n.º 27
0
 def test_sjoin_left(self):
     df = sjoin(self.pointdf, self.polydf, how='left')
     self.assertEquals(df.shape, (21, 8))
     for i, row in df.iterrows():
         self.assertEquals(row.geometry.type, 'Point')
     self.assertTrue('pointattr1' in df.columns)
     self.assertTrue('BoroCode' in df.columns)
Exemplo n.º 28
0
def spatial_join_pt(pt_file, poly_file, lat='lat', lon='lon'):
    #    """Spatially join polygon attributes to point data.
    #
    #    'pt_file' is a csv file with latitude and longitude attributes that
    #    can be interpreted as points.
    #
    #    'poly_file' is a geojson file that contains polygon data.
    #
    #    lat --> latitude field in the point df
    #    lon --> longitude field in the point df
    #
    #    Both layers must use the same CRS.
    #
    #    This function returns a DataFrame, not a Geodataframe.
    #    """
    logging.info('Loading both layers.')
    df = pd.read_csv(pt_file)
    pt = df_to_geodf_pt(df, lat, lon)
    poly = geojson_to_geodf(poly_file)
    pt.crs = poly.crs
    logging.info('Operating spatial join.')
    pt_join = sjoin(pt, poly, how='left')
    pt_join = pt_join.drop(['geometry', 'index_right'], axis=1)
    logging.info('Successfully spatially joined data.')
    return pt_join
Exemplo n.º 29
0
def main(inputs_dir, split_data, split_field):
    """
    Entry point for merge and split processing script

    Attributes:
    ------------
    :param inputs_dir {string} - Path to the directory containing input data
    :param split_data {string} - Path to the geospatial file used for splitting the combined input data
    :param split_field {string} - Field name in split data used for dividing up combined input data
    """
    # Combine input files into a single input dataframe
    inputs_df = combine_inputs(inputs_dir)

    # Read geospatial features used for splitting data
    split_features = gpd.read_file(split_data)
    split_features = split_features.to_crs(INPUT_CRS)

    # Create centroids for all input data
    input_centroids = gpd.GeoDataFrame(inputs_df.centroid,
                                       geometry=inputs_df.centroid,
                                       crs=inputs_df.crs)

    # Intersect split data with input centroids
    intersected_data = gdptools.sjoin(input_centroids,
                                      split_features,
                                      how='left',
                                      op='within')

    # Copy intersected split field value to input dataframe
    inputs_df[split_field] = intersected_data[split_field]

    # Split input data used split data
    split_data_by_split_field(inputs_df, split_field)
def polygonsInPolygons(contained_polygons, container_polygons, new_file):
    contained_polygons = loadFile(contained_polygons)
    container_polygons = loadFile(container_polygons)
    selected_contained_polygons = sjoin(contained_polygons,
                                        container_polygons,
                                        op='within')
    selected_contained_polygons.to_file(new_file)
    return selected_contained_polygons
Exemplo n.º 31
0
def geoCode(df):
    """all geoprocessing part"""
    df = df[(pd.notnull(df.lat)) & (pd.notnull(df.lon))]  # filter tweets without geocoordinates

    gdf = toGeoDataFrame(df, lat="lat", lon="lon")
    zips = gp.read_file("DATAVAULT/misc/zipcodes.geojson")[["geometry", "postalCode"]]

    return sjoin(gdf, zips, how="left", op="within")
Exemplo n.º 32
0
def points_in_shp(points_list, shapefile_gpd):

    pnts = geopandas.GeoDataFrame(geometry=points_list,
                                  index=range(0, len(points_list)))
    pointInPolys = sjoin(pnts, shapefile_gpd, how='left')
    grouped = pointInPolys.groupby('index_right', as_index=False)

    return pointInPolys, grouped
Exemplo n.º 33
0
def main(dbPath, zipPath):
	'''generates sintetic csv 
	from master df with zipcodes'''

	gdf  = toGeoDataFrame(getDF(dbPath), lat='lon',lon='lat')  ### mesed with lat_long
	zips = gp.read_file(zipPath)[['geometry', 'postalCode']]

	datum = sjoin(gdf, zips, how="left")[['timestamp', 'postalCode', 'user_id', 'id']]
	datum.to_csv('sintetic.csv')
Exemplo n.º 34
0
def joinTrafficCounts(data_grid, utm10n):
    data_grid['gid'] = data_grid.id
    data_grid.crs = utm10n; print data_grid.crs

    osm_trafficCounts_centroids = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/yvr-open-data-traffic-counts/generated-traffic-counts-osm-split/'+'osm_trafficCounts_split_dev_'+str(50)+'_centroids.shp')
    gridded_traffic_counts = sjoin(data_grid, osm_trafficCounts_centroids, how="left")
    print len(gridded_traffic_counts)

    return gridded_traffic_counts 
Exemplo n.º 35
0
def main(dbPath, zipPath):
	'''generates sintetic csv 
	from master_df with zipcodes'''

	gdf  = toGeoDataFrame(getDF(dbPath), lat='lon',lon='lat')  ### mesed with lat_long
	zips = gp.read_file(zipPath)[['geometry', 'postalCode']]

	datum = sjoin(gdf, zips, how="left")[['timestamp', 'postalCode', 'user_id', 'id']]
	datum = datum[pd.notnull(datum.postalCode)] # remove failed sjoin
	datum.to_csv('sintetic.csv')
Exemplo n.º 36
0
def polygons_in_polygons(contained_polygons, container_polygons, new_file):
    contained_polygons = load_file(contained_polygons)
    container_polygons = load_file(container_polygons)
    selected_contained_polygons = sjoin(contained_polygons,
                                        container_polygons,
                                        op='within')
    selected_contained_polygons = remove_multipolygons(
        selected_contained_polygons)
    selected_contained_polygons.to_file(new_file)
    return selected_contained_polygons
Exemplo n.º 37
0
def everything(path):
	''''''
	pD = gp.read_file(PARQA + 'data/SHP/Park_Districts/ParkDistrict.shp')[['SYSTEM','geometry']]
	df = toGeoDataFrame(pd.read_csv(oath, index_col=0))

	df = df.to_crs(pDistricts.crs)
	df = sjoin(df, pD, how="left").rename(columns={'SYSTEM':'parkDistrict'})
	df = df.to_crs(epsg=4326)

	rPath = path.replace('.csv','.json')
	writeGeoJson(df, rPath)
Exemplo n.º 38
0
def geoCode(df):
    '''all geoprocessing part'''
    df = df[(pd.notnull(df.lat)) &
            (pd.notnull(df.lon))]  # filter tweets without geocoordinates

    gdf = toGeoDataFrame(df, lat='lat', lon='lon')
    zips = gp.read_file('DATAVAULT/misc/zipcodes.geojson')[[
        'geometry', 'postalCode'
    ]]

    return sjoin(gdf, zips, how="left", op='within')
Exemplo n.º 39
0
def get_bounding_shape(lat, lon, gdf, name):
    h=pd.DataFrame({'Lat':[lat], 'Lon':[lon]})
    geometry = [Point(xy) for xy in zip([lon], [lat])]
    hg = gpd.GeoDataFrame(h, geometry=geometry)
    hg.crs = {'init' :'epsg:4326'}
    hg_1 = hg.to_crs(gdf.crs)
    r = sjoin(gdf,hg_1)
    if r.empty:
        return None
    else:
        return r[name].tolist()[0]
Exemplo n.º 40
0
def spatial_join(gdf_amenity, gdf_nuts):
    gdf_amenity = gpd.GeoDataFrame(
        gdf_amenity,
        geometry=gpd.points_from_xy(gdf_amenity.lon, gdf_amenity.lat),
        crs="epsg:4326").drop(columns=['lon', 'lat'])
    gdf_amenity.sindex
    gdf = sjoin(gdf_nuts, gdf_amenity, how='left')

    s_counts = gdf.groupby(['nuts_id', 'amenity'])['geometry'].count()
    s_counts.name = "counts"

    return s_counts.reset_index()
Exemplo n.º 41
0
def pts_poly_join(pts, poly, poly_id_col):
    """
    Simple function to join the attributes of the polygon to the points. Specifically for an ID field in the polygon.
    """

    poly2 = poly[[poly_id_col, 'geometry']]
    poly3 = poly2.dissolve(by=poly_id_col)

    join1 = sjoin(pts, poly3, how='inner', op='within')
    join1.rename(columns={join1.columns[-1]: poly_id_col}, inplace=True)

    return ([join1, poly3])
Exemplo n.º 42
0
def occupation(dataframe):  #colone pour regrouper donnes
	data_centroid=gpd.GeoDataFrame(dataframe.copy()) #On ne prend que les centroid des polygone (seuil bas)
	data_centroid['geometry']=dataframe['geometry'].centroid
	try : 
		occup=sjoin(maillage[['ID','geometry']],data_centroid[['geometry']])
		occup=occup.drop_duplicates(['geometry']) #On vire les doublons
	except ValueError:
		print("erreur sjoin")
		occup=gpd.GeoDataFrame()
		occup["geometry"]=[""]
	
	return occup
Exemplo n.º 43
0
def spatial_join_pt(pt_file, poly_file, lat='lat', lon='lon'):
    """Spatially join polygon attributes to point data.

    'pt_file' is a csv file with latitude and longitude attributes that
    can be interpreted as points.

    'poly_file' is a geojson file that contains polygon data.

    lat --> latitude field in the point df
    lon --> longitude field in the point df

    Both layers must use the same CRS.

    This function returns a DataFrame, not a Geodataframe.
    """
    logging.info('Loading point file')
    df = pd.read_csv(pt_file,low_memory=False)

    logging.info('Starting with {} rows in point file'.format(df.shape[0]))

    df = df.reset_index(drop=True)
    df_cols = df.columns.values.tolist()
    
    logging.info('Converting point file to geodf')
    pt = df_to_geodf_pt(df, lat, lon)
    logging.info('Loading poly file as geodf')
    poly = geojson_to_geodf(poly_file)
    pt.crs = poly.crs
    
    logging.info('Operating spatial join.')
    pt_join = sjoin(pt, poly, how='left')
    pt_join = pt_join.drop(['geometry', 'index_right'], axis=1)
    
    logging.info('Successfully spatially joined data.')
    join_cols = pt_join.columns.values.tolist()
    new_cols = [x for x in join_cols if x not in df_cols]

    # We will not keep the results for points 
    # that join to multiple polygons
    pt_join = pt_join.reset_index().drop_duplicates(subset="index",keep=False).set_index("index")
    
    # We must join the result back to original dataframe to keep all rows
    final = pd.merge(df,pt_join[new_cols],left_index=True,right_index=True,how="left")
    
    logging.info('Finished with {} rows'.format(final.shape[0]))
    return final
Exemplo n.º 44
0
def addNeighborhoods(data, utm10n):
    hoods = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/yvr-open-data-neighborhoods/csg_neighborhood_areas.shp'); print hoods.crs
    hoods.crs = utm10n
    output = data.copy()
    output.iscopy = False
    print len(output)
    output = sjoin(output, hoods, how="left")
    output['temp'] = [str(i.bounds) for i in output.geometry]
    print output['temp'].head()
    output = output.drop_duplicates('temp', keep="last")
    print len(output)

    # output.index = [i for i in range(len(otu))]
    for i in range(len(output)):
        if output['NAME'].iloc[i] is None:
            output['NAME'].iloc[i] = "Stanley Park"

        if output["MAPID"].iloc[i] is None:
            output['MAPID'].iloc[i] = "SP1"

    # output = output[pd.isnull(output.co2_avg_e]
    print len(output)
    return output
Exemplo n.º 45
0
def rarete_espece(datasp,maille,seuil):
	if len(datasp)<1: #si 0 donnees
		return gpd.pd.DataFrame()
	
	try :
		occupation=sjoin(grille[['ID','geometry']],datasp[['geometry']])
	except:
		print("erreur jointure (pas en picardie ?")
		return gpd.pd.DataFrame()
	
	occupation=occupation.drop_duplicates(['geometry']) #On vire les doublons
	nb_mailles=len(occupation)
	rr=(1-(float(len(occupation))/nbMaillesTotal))*100
	
	for indice,seuil in seuil_orig.items():#Pour trouver quel indice
		if rr >= seuil[0] and rr < seuil[1]:
			indice_base=indice
			break
		else:
			indice_base='TTC' #Si on depasse le 100pour100 (citations mini > 1)
	
	for indice,seuil in seuil_ajust.items():#Pour trouver quel indice
		if rr >= seuil[0] and rr < seuil[1]:
			indice_ajust=indice
			break
		else:
			indice_ajust='TTC' #Si on depasse le 100pour100 (citations mini > 1)
	
	rapport=gpd.pd.DataFrame()
	rapport["id_esp"]=[datasp.id_esp.values[0]]
	rapport["nom_s"]=[datasp.nom_s.values[0]]
	rapport["nb_mailles"]=[nb_mailles]
	rapport["rr"]=[rr]
	rapport["indiceBrute"]=indice_base
	rapport["indicePondere"]=indice_ajust
	return rapport
Exemplo n.º 46
0
def makeGrid(ipoints, experiment, gridsize):
    # Projections 
    gridproj = {'init': 'epsg:3740', 'no_defs': True}
    wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'}
    # import grid script
    sys.path.insert(0, os.getcwd()+'/mapping/libs/')
    import grid as g

    opath =  os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-grid'
    if(os.path.isdir(opath)):
        print "already a folder!"
    else:
        os.mkdir(opath)

    # gridsize = 200
    ogridname = "grid_"+str(gridsize)+"m.shp"
    ofile = opath + "/" + ogridname
    print "making grid"
    g.main(ofile, ipoints.total_bounds[0], ipoints.total_bounds[2], 
        ipoints.total_bounds[1], ipoints.total_bounds[3],
        gridsize, gridsize)

    print "grid complete! "
    # read in the grid that was just made
    grid = GeoDataFrame.from_file(ofile)
    grid.crs = gridproj
    # create grid id to groupby
    grid['id'] = [i for i in range(len(grid))]

    # Read in transect to spatial subset grids in transect
    transect = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/study-area/' +'transect_epicc2sp_woss.shp')
    transect.crs = gridproj

    # subset grid
    # transectgrid = grid[grid.geometry.intersects(transect.geometry)]; print transectgrid
    sagrid = []
    for i in range(len(grid)):
        if np.array(transect.intersects(grid.geometry[i]))[0] != False:
            sagrid.append(grid.geometry[i])

    transectgrid = GeoDataFrame(sagrid)
    transectgrid.columns = ['geometry']
    transectgrid['id'] = [i for i in range(len(transectgrid))]
    transectgrid.crs = gridproj

    

    transectgrid.to_file(ofile[:-4]+"_transect.shp")
    # transectgrid.to_file(ofile[:-4]+"_transect.geojson",driver="GeoJSON")

    ## !!!Some weird things with reading in data makes the sjoin work !!! :(
    transectgrid = GeoDataFrame.from_file(ofile[:-4]+"_transect.shp")
    transectgrid.crs = gridproj
    print transectgrid.head()

    ipoints = GeoDataFrame.from_file( os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/all_20150528.shp')
    ipoints.crs = gridproj
    print ipoints.head()

    # ipoints['id'] = [i for i in range(len(ipoints))]
    # Spatial join points to grid
    oname = "gridjoin_"+str(gridsize)+"m.shp"
    # join_inner_df = sjoin(transectgrid, ipoints, how="inner")
    join_inner_df = sjoin(transectgrid, ipoints, how="left", op='intersects')
    # join_inner_df.to_file(opath+ "/"+oname)

    return join_inner_df
Exemplo n.º 47
0
subdata=gpd.GeoDataFrame(data[data["date_obs"].dt.year.isin(years)])#selection sur la periode de ref
mcp_ref = subdata['geometry'].unary_union.convex_hull #mcp total sur la periode de ref
mcp_ref = gpd.GeoSeries(mcp_ref)
mcp_ref.to_file(path+'/out/occurence_ref.shp')

##### Zone d occupation #####
##Par annne et par periode
occup_an=occupation('annee')
occup_an.to_file(path+'/out/occup_an.shp')
occup_per=occupation('periode')
occup_per.to_file(path+'/out/occup_per.shp')
#Caclule de la zone pour la periode de ref
data_centroid=gpd.GeoDataFrame(data.copy())
data_centroid['geometry']=data['geometry'].centroid
data_centroid_ref=data_centroid[data_centroid["date_obs"].dt.year.isin(years)]
occup_ref=sjoin(maillage[['ID','geometry']],data_centroid_ref[['geometry']])
occup_ref=occup_ref.drop_duplicates(['geometry'])
occup_ref.to_file(path+'/out/occupation.shp')

##### Stats #####
ref=pd.DataFrame()
ref["annee"]=years #Pour les jointures 
##Surface des mcps par an
mcp_an["mcp_area"]=mcp_an.area/1000000
stats=pd.merge(ref,mcp_an[["annee","mcp_area"]], on="annee",how="left")
#mcp_ref.area.sum() #sur la periode de ref

##Surface des mailles par an et sur la periode de ref
occup_an["occup_area"]=occup_an.area/1000000
occup_grp=pd.DataFrame(occup_an.groupby('annee')["occup_area"].sum())
occup_grp["annee"]=occup_grp.index #il faudrait que j apprenne a utiliser les index sur pandas
Exemplo n.º 48
0
def occupation(col):  #colone pour regrouper donnes
	data_centroid=gpd.GeoDataFrame(data[data[col].notnull()].copy()) #On ne prend que les centroid des polygone (seuil bas)
	data_centroid['geometry']=data['geometry'].centroid
	occup=sjoin(maillage[['ID','geometry']],data_centroid[[col,'geometry']])
	occup=occup.drop_duplicates(['geometry',col]) #On vire les doublons
	return occup
Exemplo n.º 49
0
census_blocks_file = 'nycb2010_15b/nycb2010.shp'
pluto_file = 'Update/NYC_PLUTO.shp'

print "Reading files..."
pluto = gp.GeoDataFrame.from_file(os.path.join(dpath, pluto_file))
dsny_sections = gp.GeoDataFrame.from_file(os.path.join(dpath, dsny_section_file))
census_blocks = gp.GeoDataFrame.from_file(os.path.join(dpath, census_blocks_file))
	
	
census_blocks.to_crs(dsny_sections.crs, inplace=True)
pluto.crs = dsny_sections.crs

pluto['geometry'] = pluto['geometry'].centroid

print "Begin spatial joins..."
census_pluto = sjoin(pluto, census_blocks, how="left", op="within")

census_pluto.drop(['geometry', 'CB2010_right', 'CT2010_right','Shape_Area','Shape_Leng','index_right'], axis=1, inplace=True)

# --
# Re-merge the old file to use the Polygon instead of the point
census_pluto = census_pluto.merge(census_blocks[['geometry', 'BCTCB2010']])

census_pluto['geometry'] = census_pluto['geometry'].centroid

dsny_pluto_census = sjoin(census_pluto, dsny_sections, how="left", op="within")

writePath = dpath + 'Output/'
writePathFile = writePath + 'nyc.shp'

dsny_pluto_census['AssessTot'] = dsny_pluto_census['AssessTot'].astype(str)
Exemplo n.º 50
0
#### DECLARE FILE PATHS

utility = '/home/kircheis/data/shp/Electric_Retail_Service_Ter.shp'
util = gpd.read_file(utility) 

urbarea = '/home/kircheis/data/shp/census/cb_2013_us_ua10_500k/cb_2013_us_ua10_500k.shp'
ua = gpd.read_file(urbarea)
ua = ua.to_crs(util.crs)

urbpop = '/home/kircheis/data/census/ua/ua_list_all.txt'
uapop = pd.read_fwf(urbpop, colspecs=[(0,5), (10,70), (75,84), (89,98), (103,117), (122,131), (136,150), (155,164), (169,178), (183,185)], names=['UACE', 'NAME', 'POP', 'HU', 'AREALAND', 'AREALANDSQMI', 'AREAWATER', 'AREAWATERSQMI', 'POPDEN', 'LSADC'], skiprows=1)
uapop['UACE'] = uapop['UACE'].astype(str).str.pad(5, side='left', fillchar='0')
uapop = uapop.set_index('UACE')
#### FIND WHICH URBAN AREAS ARE IN WHICH UTILITY SERVICE AREAS

j = tools.sjoin(util, ua)

#### ALLOCATE GRID FOR TEMPERATURE FORCINGS

g = rect_grid((-130, 24, -65, 50), 0.125) 

coords = g.centroid.apply(lambda x: x.coords[0])
coordstr = coords.apply(lambda x: 'data_%s_%s' % (x[1], x[0]))

g = gpd.GeoDataFrame(geometry=g.geometry, index=g.index)
g.crs = util.crs
g['coordstr'] = coordstr

#### JOIN UTILITY SERVICE AREAS WITH TEMPERATURE FORCINGS

ua_g = tools.sjoin(ua, g)
Exemplo n.º 51
0
rapport_list=[]

for file in os.listdir(pathData):
	if file.endswith('.shp') and file.startswith("espace_"):
		shapes.append(file)

for shape in shapes:
	listData.append(gpd.GeoDataFrame.from_file(pathData+'/'+shape))

data=gpd.GeoDataFrame(gpd.pd.concat(listData,ignore_index=True))
data.geometry=data.geometry.centroid
data.crs=lamb93
listSp=set(data.nom_s.values)

#Compter le nombre de mailles prospectees
occup_total=sjoin(grille[['ID','geometry']],data[['geometry']])
occup_total_agreg=occup_total.groupby(['geometry']).count() #ici, geometry est l index
occup_total_agreg=gpd.GeoDataFrame(occup_total_agreg)
occup_total_agreg.geometry=occup_total_agreg.index.values.copy() #Copy de l'index dans la colonne geometry
occup_total_agreg.reset_index(drop=True, inplace=True) #On remplace par un index normal
occup_total_agreg["nb_cit"]=occup_total_agreg["index_right"].values.copy()
del occup_total_agreg["ID"]
del occup_total_agreg["index_right"]
nbMaillesProsp=len(occup_total_agreg[occup_total_agreg["nb_cit"]>=citation_mini])
P=100*float((nbMaillesTotal-nbMaillesProsp))/nbMaillesTotal
#Ajuster les seuils des indicdes de rarete
for indice,seuil in seuil_orig.items():
	seuil_ajust[indice][0]=seuil[0]+P-(seuil[0]*P/100)
	seuil_ajust[indice][1]=seuil[1]+P-(seuil[1]*P/100)

def rarete_espece(datasp,maille,seuil):
Exemplo n.º 52
0
 def test_sjoin_outer(self):
     df = sjoin(self.pointdf, self.polydf, how="outer")
     self.assertEquals(df.shape, (21,8))
Exemplo n.º 53
0
        # finish
        new_regions.append(new_region.tolist())

    return new_regions, np.asarray(new_vertices)


b = gpd.GeoDataFrame.from_file('/home/akagi/Desktop/electricity_data/Electric_Retail_Service_Ter.shp')

#### LOOP THROUGH UTILITY SERVICE AREAS
sub = gpd.read_file(substations)
util = gpd.read_file(utility)

invalid_util = util[~util['geometry'].apply(lambda x: x.is_valid)]
util.loc[invalid_util.index, 'geometry'] = util.loc[invalid_util.index, 'geometry'].apply(lambda x: x.buffer(0))

sub_util = tools.sjoin(sub, util, op='within', how='left')

sub_xy = np.vstack(sub['geometry'].apply(lambda u: np.concatenate(u.xy)).values)

#util_poly = b.set_index('UNIQUE_ID')['geometry']
vor = spatial.Voronoi(sub_xy)
reg, vert = voronoi_finite_polygons_2d(vor,1)

v_poly = gpd.GeoSeries(pd.Series(reg).apply(lambda x: geometry.Polygon(vert[x])))

v_gdf = gpd.GeoDataFrame(pd.concat([sub.drop('geometry', axis=1), v_poly], axis=1)).rename(columns={0:'geometry'})
v_gdf.crs = sub.crs

j = tools.sjoin(util, v_gdf, op='intersects')
j['right_geom'] = j['UNIQUE_ID_right'].map(v_gdf.set_index('UNIQUE_ID')['geometry'])
j = j.dropna(subset=['geometry', 'right_geom']).set_index('UNIQUE_ID_left')
Exemplo n.º 54
0
 def test_sjoin_duplicate_column_name(self):
     pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'})
     df = sjoin(pointdf2, self.polydf, how="left")
     self.assertTrue('Shape_Area_left' in df.columns)
     self.assertTrue('Shape_Area_right' in df.columns)
Exemplo n.º 55
0
output["occup"]=""
output["citations"]=""

for file in os.listdir(path):
	if file.endswith('.shp') and file.startswith("espace_"):
		shapes.append(file)

for shape in shapes:
	listData.append(gpd.GeoDataFrame.from_file(path+'/'+shape))

data=gpd.GeoDataFrame(pd.concat(listData,ignore_index=True))

###Travail sur lot de donnees
data=data[data["nb"] >= 0] #filtre des obs négative
data=data[data.geometry.area < 5000000] #filtre des polygon sup a 5km2
data["date_obs"]=pd.to_datetime(data["date_obs"]) #convertire la date
data["annee"]=data["date_obs"].dt.year

for debut in range(annee_min,annee_max+1-4):
	for fin in range(debut+5-1,annee_max+1):
		print str(debut)+" - "+str(fin)
		data_centroid=gpd.GeoDataFrame(data[data["annee"].between(debut,fin)].copy()) #On ne prend que les centroid des data qui entre dans la periode
		data_centroid['geometry']=data['geometry'].centroid
		count=len(data_centroid.index)
		occup=sjoin(maillage[['ID','geometry']],data_centroid[['geometry']])
		occup=occup.drop_duplicates(['geometry']) #On vire les doublons
		line=pd.DataFrame({'debut':debut,'fin':fin,'occup':occup.area.sum()/1000000,'citations':count},index=[0])
		output=output.append(line,ignore_index=True)

output.to_csv(path+"data_grp.csv",index=False)
Exemplo n.º 56
0
 def test_sjoin_inner(self):
     df = sjoin(self.pointdf, self.polydf, how="inner")
     self.assertEquals(df.shape, (11, 8))
Exemplo n.º 57
0
# line_shp = gpd.read_file(network_shp_file)
country_shp = gpd.read_file(country_shp_file)

myanmar = country_shp[country_shp.ADMIN=='Myanmar']
nigeria = country_shp[country_shp.ADMIN=='Nigeria']

osm_places_mmr = osm.query_osm(typ='node', bbox=myanmar.total_bounds, recurse='down', tags='place')
osm_power_mmr = osm.query_osm(typ='node', bbox=myanmar.total_bounds, recurse='down', tags='power')

osm_places_nga = osm.query_osm(typ='node', bbox=nigeria.total_bounds, recurse='down', tags='place')
osm_power_nga = osm.query_osm(typ='node', bbox=nigeria.total_bounds, recurse='down', tags='power')
osm_power_way_nga = osm.query_osm(typ='way', bbox=nigeria.total_bounds, recurse='down', tags='power')

# find places that have a population
places_nga = osm_places_nga[['geometry', 'population', 'name']]
places_nga = sjoin(places_nga, nigeria, how="inner", op="within")
places_nga = places_nga[~pd.isnull(places_nga.population)]

# find length of power line data in nigeria
power_lines_nga = osm_power_way_nga[osm_power_way_nga.geom_type == 'LineString'][['geometry']]
power_lines_nga = sjoin(power_lines_nga, nigeria, how="inner", op="within")

# assign line distances back to geopandas df and add styling to display via geojson.io
power_lines_nga['line_length_m'] = power_lines_nga.geometry.apply(lambda linestring: sum(su.linestring_distances(linestring)))
# style red
power_lines_nga['stroke'] = "#00ff00"

# output as geojson
power_line_json = "/home/cjn/geodata/nga_power_lines_osm.json"
json_file = open(power_line_json, mode='w')
json_file.write(power_lines_nga.to_json())
Exemplo n.º 58
0
from shapely.geometry import Point

here = os.path.dirname(os.path.abspath('__file__'))
data_dir = os.path.join(here, '..', 'data')

vector_file = os.path.join(here, 'nybb_15b', 'nybb.shp')

boros = read_file(vector_file)

xmin, ymin, xmax, ymax = boros.total_bounds
N = 1000
X = np.random.uniform(low=xmin, high=xmax, size=N)
Y = np.random.uniform(low=ymin, high=ymax, size=N)
points = GeoDataFrame(geometry=GeoSeries([Point(x, y) for x, y in zip(X, Y)]))
points.crs = boros.crs
joined = sjoin(points, boros, how='inner')
joined.geometry = joined.buffer(2000)

ax = plt.subplot(121)
boros.plot(column='BoroCode', categorical=True, axes=ax)
points.plot(axes=ax)
ax.set_aspect('equal')
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)
ax = plt.subplot(122)
joined.plot(column='BoroCode', categorical=True, axes=ax)
ax.set_aspect('equal')
locs, labels = plt.xticks()
plt.setp(labels, rotation=90)

plt.tight_layout()
Exemplo n.º 59
0
    con = sqlite3.connect('test.db')
    df = pd.read_sql_query('select * from telemetry where boat_id =5', con, parse_dates=['received'], index_col=['received'])
    # geopandas requires this to be called 'geometry' 
    df['geometry'] = df.apply(lambda y: Point(y.lat, y.lon), axis=1)
    return df

def label_dockings(data):
    """ add a label to indicate when the bus was stopped at one of the docks """
    for row in data:
        import pdb
        pdb.set_trace()
    return data

def label_arrive(data):
    """ add a label to indicate when the bus arrives at one of the docks """
    pass

def label_depart(data):
    """ add a label to indicate when the bus departs one of the docks """

if __name__ == '__main__':
    print 'creating bounds'
    spots = GeoDataFrame({'geometry': [bounds['WATERFRONT'], bounds['LONSDALE'], bounds['PARKING']]})
    print 'loading bus data'
    data = loadbus()
    gdf = gpd.GeoDataFrame(data)
    print 'joining'
    joined = sjoin(gdf, spots, how='left', op='contains')
    import pdb
    pdb.set_trace()
Exemplo n.º 60
0
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import tools

census_old = '/home/kircheis/data/shp/census/census_tracts_all/census_tracts_1990.shp'
census_new = '/home/kircheis/data/shp/census/census_tracts_all/census_tracts_2014.shp'

df_90 = gpd.read_file(census_old)
df_14 = gpd.read_file(census_new)

df_14_c = df_14.copy()
df_14_c['geometry'] = df_14_c.centroid

j = tools.sjoin(df_90, df_14_c, op='contains')

#### FORMAT CENSUS TRACT NAMES

#### NONDECIMAL ENTRIES
j['TRACT_NAME'][~j['TRACT_NAME'].str.contains('\.')] = (j['TRACT_NAME'][~j['TRACT_NAME'].str.contains('\.')] + '00').str.pad(6, side='left', fillchar='0')

#### DECIMAL ENTRIES
j['TRACT_NAME'][j['TRACT_NAME'].str.contains('\.')] = j['TRACT_NAME'][j['TRACT_NAME'].str.contains('\.')].str.replace('.', '').str.pad(6, side='left', fillchar='0')

#### CREATE FIPS

j['GEOID_1990'] = j['ST'].astype(str).str.cat(j['CO'].astype(str)).str.cat(j['TRACT_NAME'])

j_cross = j.rename(columns={'GEOID':'GEOID_2014'})[['GEOID_1990', 'GEOID_2014']].sort('GEOID_1990')