def inreg_db(argo_db): crs = {"init": "epsg:4326"} fix_lon = argo_db.copy() fix_lon["lon"] = fix_lon["lon"].apply(lambda x: x - 360 if x > 180 else x) geometry = [Point(xy) for xy in zip(fix_lon["lon"], fix_lon["lat"])] starts = gpd.GeoDataFrame(fix_lon, crs=crs, geometry=geometry) SA_200 = gpd.read_file("/data/users/grivera/Shapes/costa_200mn_mask.shp") SA_100 = gpd.read_file("/data/users/grivera/Shapes/costa_100mn_mask.shp") SA_50 = gpd.read_file("/data/users/grivera/Shapes/costa_50mn_mask.shp") # SA_300['geometry'] = SA_300.geometry.buffer(1) pointIn200 = sjoin(starts, SA_200, how="left", op="within") pointIn200 = pointIn200.dropna() pointIn100 = sjoin(starts, SA_100, how="left", op="within") pointIn100 = pointIn100.dropna() pointIn50 = sjoin(starts, SA_50, how="left", op="within") pointIn50 = pointIn50.dropna() argo_db["in200"] = "0" argo_db["in100"] = "0" argo_db["in50"] = "0" argo_db.loc[pointIn200.index, "in200"] = "1" argo_db.loc[pointIn100.index, "in100"] = "1" argo_db.loc[pointIn50.index, "in50"] = "1" return argo_db
def test_sjoin_values(self): # GH190 self.polydf.index = [1, 3, 4, 5, 6] df = sjoin(self.pointdf, self.polydf, how='left') self.assertEquals(df.shape, (21,8)) df = sjoin(self.polydf, self.pointdf, how='left') self.assertEquals(df.shape, (12,8))
def _get_HU8_units_for_gdf(self, gdf: GDF) -> GDF: """Get HU8 units that intersect gdf Args: - gdf: GeoDataFrame to intersect with Returns: GeoDataFrame of HU8 boundaries that intersect gdf """ gdf = gdf.to_crs(epsg=4326) # First find HU2 units that intersect gdf intersecting_hu2 = [] for hu2_id in self.hu2_list: hu2 = self._load_HU8_boundaries(hu2_id=hu2_id, region_size='HU2') hu2 = hu2.to_crs(epsg=4326) intersecting_hu2.append(sjoin(hu2, gdf, how='inner')) int_hu2_gdf = gpd.GeoDataFrame(pd.concat(intersecting_hu2)) hu2_ids = int_hu2_gdf['HUC2'].values # Npw just look within the large regions that I know gdf is in intersecting_hu8 = [] for hu2_id in hu2_ids: hu8 = self._load_HU8_boundaries(hu2_id=hu2_id, region_size='HU8') hu8 = hu8.to_crs(epsg=4326) intersecting_hu8.append(sjoin(hu8, gdf, how='inner')) return gpd.GeoDataFrame(pd.concat(intersecting_hu8))
def test_sjoin_values(self): # GH190 self.polydf.index = [1, 3, 4, 5, 6] df = sjoin(self.pointdf, self.polydf, how='left') self.assertEquals(df.shape, (21, 8)) df = sjoin(self.polydf, self.pointdf, how='left') self.assertEquals(df.shape, (12, 8))
def _hydro_point(self, hydro, files, buffer): point = hydro.read_files(files=files, layer='NHDPoint') SPRING = 45800 WATERFALL = 48700 WELL = 48800 keep = [SPRING, WATERFALL] point = point[point['FCode'].isin(keep)] point = sjoin(point, buffer, how='inner') len(point) point = to_2d(point) point point if len(point) > 0: raise NotImplementedError('Water points near trail') areal = hydro.read_files(files=files, layer='NHDArea') areal = sjoin(areal, trail, how='inner') if len(point) > 0: raise NotImplementedError('Areal near trail') w_areal = hydro.read_files(files=files, layer='NHDWaterbody') w_areal = sjoin(w_areal, trail, how='inner') if len(point) > 0: raise NotImplementedError('Areal near trail') len(w_areal) self.buffer
def download(self, trail: gpd.GeoDataFrame, buffer_dist=None, buffer_unit='mile', overwrite=False): """Download polygon shapefile and intersect with PCT track Args: - trail: gdf of trail to use to find polygons that intersect - buffer_dist: distance to use for trail buffer when intersecting with polygons. By default is None, so no buffer will be used. - buffer_unit: unit to use for buffer - overwrite: whether to overwrite existing data """ assert self.save_dir is not None, 'self.save_dir must be set' assert self.url is not None, 'self.url must be set' assert self.filename is not None, 'self.filename must be set' # Cache original download in self.raw_dir parsed_url = urlparse(self.url) raw_fname = Path(parsed_url.path).name raw_path = self.raw_dir / raw_fname if overwrite or (not raw_path.exists()): urlretrieve(self.url, raw_path) # Now load the saved file as a GeoDataFrame with open(raw_path, 'rb') as f: with fiona.BytesCollection(f.read()) as fcol: crs = fcol.crs gdf = gpd.GeoDataFrame.from_features(fcol, crs=crs) # Reproject to WGS84 gdf = gdf.to_crs(epsg=4326) # Use provided `trail` object trail = trail.to_crs(epsg=4326) # Intersect with the trail if buffer_dist is not None: buf = geom.buffer(trail, distance=buffer_dist, unit=buffer_unit) # Returned as GeoSeries; coerce to GDF if not isinstance(buf, gpd.GeoDataFrame): buf = gpd.GeoDataFrame(geometry=buf) buf = buf.to_crs(epsg=4326) intersection = sjoin(gdf, buf, how='inner') else: intersection = sjoin(gdf, trail, how='inner') # Make sure I have valid geometries intersection = geom.validate_geom_gdf(intersection) # Do any specific steps, to be overloaded in subclasses intersection = self._post_download(intersection) # Save to GeoJSON self.save_dir.mkdir(exist_ok=True, parents=True) intersection.to_file(self.save_dir / self.filename, driver='GeoJSON')
def test_sjoin_right(self): # the inverse of left df = sjoin(self.pointdf, self.polydf, how="right") df2 = sjoin(self.polydf, self.pointdf, how="left") self.assertEquals(df.shape, (12, 8)) self.assertEquals(df.shape, df2.shape) for i, row in df.iterrows(): self.assertEquals(row.geometry.type, 'MultiPolygon') for i, row in df2.iterrows(): self.assertEquals(row.geometry.type, 'MultiPolygon')
def test_sjoin_op(self): # points within polygons df = sjoin(self.pointdf, self.polydf, how="left", op="within") self.assertEquals(df.shape, (21,8)) self.assertEquals(df.ix[1]['BoroName'], 'Staten Island') # points contain polygons? never happens so we should have nulls df = sjoin(self.pointdf, self.polydf, how="left", op="contains") self.assertEquals(df.shape, (21, 8)) self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
def test_sjoin_op(self): # points within polygons df = sjoin(self.pointdf, self.polydf, how="left", op="within") self.assertEquals(df.shape, (21, 8)) self.assertAlmostEquals(df.ix[1]['Shape_Leng'], 330454.175933) # points contain polygons? never happens so we should have nulls df = sjoin(self.pointdf, self.polydf, how="left", op="contains") self.assertEquals(df.shape, (21, 8)) self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
def test_sjoin_op(self): # points within polygons df = sjoin(self.pointdf, self.polydf, how="left", op="within") self.assertEquals(df.shape, (21,8)) self.assertAlmostEquals(df.ix[1]['Shape_Leng'], 330454.175933) # points contain polygons? never happens so we should have nulls df = sjoin(self.pointdf, self.polydf, how="left", op="contains") self.assertEquals(df.shape, (21, 8)) self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
def compute_spatial_join(df): df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="epsg:4326") df = sjoin(df, gdf_regions, how='left') return df[['nuts_id', 'amenity', 'osm_id']]
def print_cid_count(node_array_x, node_array_y): gridx = np.linspace(300000, 800000, 5) gridy = np.linspace(3700000, 5500000, 5) grid, _, _ = np.histogram2d(node_array_x, node_array_y, bins=[gridx, gridy]) # plotting # plt.figure(figsize=(9, 7), dpi=90, facecolor='w', edgecolor='k') # plt.plot(node_array_x, node_array_y, 'ro') # plt.grid(True) # plt.figure(figsize=(9, 7), dpi=90, facecolor='w', edgecolor='k') # plt.pcolormesh(gridx, gridy, grid) # plt.plot(node_array_x, node_array_y, 'ro') # plt.colorbar() # plt.show() points = gpd.GeoDataFrame({"x":node_array_x,"y":node_array_y}) points['geometry'] = points.apply(lambda p: Point(p.x, p.y), axis=1) # print(points.head(2)) # np mesh-grid to shapely polygons hlines = [((x1, yi), (x2, yi)) for x1, x2 in list(zip(gridx[:-1], gridx[1:])) for yi in gridy] vlines = [((xi, y1), (xi, y2)) for y1, y2 in zip(gridy[:-1], gridy[1:]) for xi in gridx] polys = list(polygonize(MultiLineString(hlines + vlines))) grids = list(polygonize(MultiLineString(hlines + vlines))) cid = [i for i in range(len(grids))] grid = gpd.GeoDataFrame({"cid":cid,"geometry":polys}) # print(grid.head(2)) # number of points in polygons pointInPolys = sjoin(points, grid, how='left') print(pointInPolys.groupby(['cid']).size().reset_index(name='count'))
def spatial_join(sm_geom, lg_geom): """ Spatially join two geographies adding data from the larger geometry to the smaller. Then add the geometry from the smaller geographical unit back onto the new dataset one. Returns a GeoDataFrame. Parameters ---------- sm_geom : GeoDataFrame GeoDataFrame to receive data lg_geom : GeoDataFrame Large GeoDataFrame to apply data """ sm_original = sm_geom sm_geom = sm_geom.to_crs(lg_geom.crs) sm_geom['geometry'] = sm_geom['geometry'].centroid df = sjoin(sm_geom, lg_geom, how="left", op="within") df.drop(['geometry', 'index_right'], axis=1, inplace=True) df = df.join(pd.DataFrame(sm_original['geometry'], columns=['geometry'])) return df
def createInlandList(inputDF): '''Requires TimeStep be populated; run before calc maxwindspeed.''' TimeStepList = [] try: point = geopandas.GeoDataFrame( inputDF, geometry=geopandas.points_from_xy( inputDF.Longitude, inputDF.Latitude)).copy() point.crs = "epsg:4326" try: #polygonPath = './src/assets/spatial/inlandPolygon.shp' polygonPath = './src/assets/spatial/inlandPolygon.json' polygon = geopandas.GeoDataFrame.from_file(polygonPath) except Exception as e: print('polygon issue') print(polygonPath) print(e) pointInPolys = sjoin(point, polygon, how='left') inlandPoints = pointInPolys[ pointInPolys.index_right.notnull()].copy() inlandPointsDF = pd.DataFrame( inlandPoints.drop(columns='geometry')) TimeStepList = inlandPointsDF['TimeStep'].to_list() except Exception as e: print('inlandlist issue') print(e) return TimeStepList
def test_sjoin_left(self): df = sjoin(self.pointdf, self.polydf, how='left') self.assertEquals(df.shape, (21,8)) for i, row in df.iterrows(): self.assertEquals(row.geometry.type, 'Point') self.assertTrue('pointattr1' in df.columns) self.assertTrue('BoroCode' in df.columns)
def get_neighborhood(locations, neighborhoods_geom): points = gpd.GeoDataFrame() points['geom'] = locations.apply(lambda x: Point(x['lng'], x['lat']), axis=1) points = points.set_geometry('geom') result = sjoin(points, neighborhoods_geom, how='left') return result['Name']
def copy_csd_data(gdf, csd): """ Copy csd field data/calculate geometry data to geodataframe Attributes ---------- gdf: geodataframe Geodataframe containing the building footprint data csd: geodataframe Geodataframe containing the Census Subdivision data Returns ------- gdf: geodataframe An updated geodataframe with new fields containing csd and geometry data. """ gdf_centroids = gpd.GeoDataFrame(gdf.centroid, geometry=gdf.centroid, crs=gdf.crs) gdf_centroids_csd_join = sjoin(gdf_centroids, csd, how='left', op='within') gdf = gdf.to_crs('epsg:4326') gdf['CSDUID'] = gdf_centroids_csd_join['CSDUID'] gdf['CSDNAME'] = gdf_centroids_csd_join['CSDNAME'] gdf['Shape_Area'] = gdf_centroids_csd_join.geometry.area gdf['Shape_Leng'] = gdf_centroids_csd_join.geometry.length # Update joined centroids dataframe to epsg:4326 to provide DD values gdf_centroids_csd_join = gdf_centroids_csd_join.to_crs('epsg:4326') gdf['Longitude'] = gdf_centroids_csd_join.geometry.x gdf['Latitude'] = gdf_centroids_csd_join.geometry.y return gdf
def copy_csduid(gdf, csd): """ Copy csd field data/calculate geometry data to geodataframe Attributes ---------- gdf: geodataframe Geodataframe containing the building footprint data csd: geodataframe Geodataframe containing the Census Subdivision data Returns ------- gdf: geodataframe An updated geodataframe with new fields containing csd and geometry data. """ gdf_centroids = gpd.GeoDataFrame(gdf.centroid, geometry=gdf.centroid, crs=gdf.crs) gdf_centroids_csd_join = sjoin(gdf_centroids, csd, how='left', op='within') gdf = gdf.to_crs('epsg:4326') gdf['CSDUID'] = gdf_centroids_csd_join['CSDUID'] return gdf
def brdrPctFull(zns, brdr, ncol, acol='AreaSqKM'): ''' Arguments --------- zns : geoDF of basin polygons brdr : geoDF of CONUS polygon ncol : name of the column that uniquely identifies zns polygons acol : name of column that holds area (sq. KM) ''' # move poly to albers, need to stay in this CRS to cal. area later if brdr.crs != zns.crs: brdr.to_crs(zns.crs,inplace=True) touch = sjoin(zns,brdr,op='within') nwin = zns.ix[~zns[ncol].isin(touch[ncol])].copy() if len(nwin) == 0: return pd.DataFrame() tot = pd.DataFrame() for idx, row in nwin.iterrows(): p = gpd.GeoDataFrame({ncol: [row[ncol]], acol: [row[acol]]}, geometry=[row.geometry], crs=nwin.crs) clip = gpd.overlay(brdr, p, how='intersection') if len(clip) == 0: p['CatPctFull'] = 0 tot = pd.concat([tot,p.set_index(ncol)[['CatPctFull']]]) else: out = clip.dissolve(by=ncol) out['Area_CONUS'] = out.geometry.area * 1e-6 out['CatPctFull'] = (out['Area_CONUS'] / out[acol]) * 100 tot = pd.concat([tot,out[['CatPctFull']]]) assert len(tot) == len(nwin) return tot
def main(): # Read in Data grid = GeoDataFrame.from_file(igrid) points = pd.read_csv(ipoints) # create geopoints geopoints = points2geo(points, lat, lon) # match projection info: ## Points - should already be in wgs84 geopoints.crs =wgs84 geopoints['geometry'] = geopoints['geometry'].to_crs(epsg=4326) ## Grid - project from meters to wgs84 grid.crs = gridproj grid['geometry'] = grid['geometry'].to_crs(epsg=4326) # create uid to groupby grid['id'] = [i for i in range(len(grid))] # Spatial join points to grid join_inner_df = sjoin(grid, geopoints, how="inner") # Group by the uid and geometry - return mean join_inner_df = join_inner_df.groupby(['id','geometry'])['Decibel'].mean() # join_inner_df = join_inner_df.groupby(['id','geometry'])['Decibel'].max() # Create geodataframe & reset the index of the file output = GeoDataFrame(join_inner_df) output = output.reset_index() # output # write to file output.to_file(ofile)
def pts_poly_join(pts, poly, poly_id_col): """ Simple function to join the attributes of the polygon to the points. Specifically for an ID field in the polygon. Parameters ---------- pts: GeoDataFrame A GeoDataFrame of points with the site names as the index. poly: GeoDataFrame A GeoDataFrame of polygons with the site names as the index. poly_id_col: str or list of str The names of the columns to join. Returns ------- GeoDataFrame """ if isinstance(poly_id_col, str): poly_id_col = [poly_id_col] cols = poly_id_col.copy() cols.extend(['geometry']) poly2 = poly[cols].copy() poly3 = poly2.dissolve(poly_id_col).reset_index() join1 = sjoin(pts.copy(), poly3.copy(), how='inner', op='within') cols = set(pts.columns) cols.update(set(poly3.columns)) join1.drop([i for i in join1.columns if i not in cols], axis=1, inplace=True) return join1, poly3
def add_loc_ocean2df(df=None, LatVar='lat', LonVar='lon'): """ Add the ocean of a location to dataframe Parameters ------- df (pd.DataFrame): DataFrame of data LatVar (str): variable name in DataFrame for latitude LonVar (str): variable name in DataFrame for longitude Returns ------- (pd.DataFrame) """ from geopandas.tools import sjoin # Get the shapes for the ocean featurecla='ocean' group = AC.get_shapes4oceans(rtn_group=True, featurecla=featurecla) # Turn the dataframe into a geopandas dataframe gdf = geopandas.GeoDataFrame( df, geometry=geopandas.points_from_xy(df[LonVar], df[LatVar])) # Work out if any of the points are within the polys pointInPolys = sjoin(gdf, group, how='left') # Check how many were assigned to a region Nnew = float(pointInPolys['name'].dropna().shape[0]) N = float(df.shape[0]) if N != Nnew: pstr = 'WARNING: Only {:.2f}% assigned ({} of {})' print( pstr.format( (Nnew/N)*100, int(Nnew), int(N)) ) # Add the ocean assignment df[featurecla] = pointInPolys['name'].values return df
def points_in_shp(points_list, shapefile_gpd): ''' Check if the points_list generated by libpypack.visualization.generate_maps.lat_lon_to_points() is in the Shapefile. Parameters ---------- point_list: list A list of all lat, lon points generated by: libpypack.visualization.generate_maps.lat_lon_to_points() shapefile_gpd: dataframe A dataframe created from a Shapefile. Returns ------- : list, Pandas DataFrame pointInPolys: A list of all points that are contained in the Shapefile. grouped: A DataFrame which has been grouped. ''' pnts = geopandas.GeoDataFrame(geometry=points_list, index=range(0, len(points_list))) pointInPolys = sjoin(pnts, shapefile_gpd, how='left') grouped = pointInPolys.groupby('index_right', as_index=False) return pointInPolys, grouped
def covert(): # coordinates coordinates = request.files['file'] df = pd.read_csv(coordinates) # convert to geographic data tmp = df[['Longitude','Latitude']].dropna() geometry = [Point(xy) for xy in zip(tmp.Longitude, tmp.Latitude)] crs = {'init': 'epsg:4326'} # http://www.spatialreference.org/ref/epsg/2263/ geo_df = gp.GeoDataFrame(tmp, crs=crs, geometry=geometry) # shapefile print(request.form) #shfile = request.files['shfile'] #filename = secure_filename(shfile.filename) #shfile.save('data/'+filename) print(request.form['shfile']) if request.form['shfile'] == 'standard': filename = 'W:/OSZ/OMXF/SHAPEFILES/Standardized_admin_areas/ADM2/global_adm2.shp' if request.form['shfile'] == 'RBD': filename = 'W:/OSZ/OMXF/SHAPEFILES/RBD_GAUL_REV2019/rbd_vam_cod_bnd_admin_level_2_gaul_revised_20190304.shp' shpf = gp.GeoDataFrame.from_file(filename) pointInPolys = sjoin(geo_df, shpf, how='left') output = pd.DataFrame(pointInPolys.drop('geometry', axis=1)) df = df.merge(output, on=['Longitude','Latitude'], how='left') resp = make_response(df.to_csv()) resp.headers["Content-Disposition"] = "attachment; filename=export.csv" resp.headers["Content-Type"] = "text/csv" return resp
def get_stations(basin): """retrieves list of stations within a basin. Parameters ---------- basin : str Name of basin to get hydrological stations. Returns ------- list list containing stations ID. """ points = gpd.GeoDataFrame.from_file(os.path.join(os.getcwd(), 'Station', 'stations.shp')) polys = gpd.GeoDataFrame.from_file(os.path.join(os.getcwd(), 'basins', 'rbasin_polygon.shp')) poly_subset = polys[polys['BNAME'] == basin] pointInPolys = sjoin(points, poly_subset, how='left') grouped = pointInPolys.groupby('index_right') list_of_stations = list(grouped) basin_stations = list_of_stations[0][1] basin_stations_list = basin_stations['ID'].tolist() return list(set(basin_stations_list))
def test_sjoin_left(self): df = sjoin(self.pointdf, self.polydf, how='left') self.assertEquals(df.shape, (21, 8)) for i, row in df.iterrows(): self.assertEquals(row.geometry.type, 'Point') self.assertTrue('pointattr1' in df.columns) self.assertTrue('BoroCode' in df.columns)
def spatial_join_pt(pt_file, poly_file, lat='lat', lon='lon'): # """Spatially join polygon attributes to point data. # # 'pt_file' is a csv file with latitude and longitude attributes that # can be interpreted as points. # # 'poly_file' is a geojson file that contains polygon data. # # lat --> latitude field in the point df # lon --> longitude field in the point df # # Both layers must use the same CRS. # # This function returns a DataFrame, not a Geodataframe. # """ logging.info('Loading both layers.') df = pd.read_csv(pt_file) pt = df_to_geodf_pt(df, lat, lon) poly = geojson_to_geodf(poly_file) pt.crs = poly.crs logging.info('Operating spatial join.') pt_join = sjoin(pt, poly, how='left') pt_join = pt_join.drop(['geometry', 'index_right'], axis=1) logging.info('Successfully spatially joined data.') return pt_join
def main(inputs_dir, split_data, split_field): """ Entry point for merge and split processing script Attributes: ------------ :param inputs_dir {string} - Path to the directory containing input data :param split_data {string} - Path to the geospatial file used for splitting the combined input data :param split_field {string} - Field name in split data used for dividing up combined input data """ # Combine input files into a single input dataframe inputs_df = combine_inputs(inputs_dir) # Read geospatial features used for splitting data split_features = gpd.read_file(split_data) split_features = split_features.to_crs(INPUT_CRS) # Create centroids for all input data input_centroids = gpd.GeoDataFrame(inputs_df.centroid, geometry=inputs_df.centroid, crs=inputs_df.crs) # Intersect split data with input centroids intersected_data = gdptools.sjoin(input_centroids, split_features, how='left', op='within') # Copy intersected split field value to input dataframe inputs_df[split_field] = intersected_data[split_field] # Split input data used split data split_data_by_split_field(inputs_df, split_field)
def polygonsInPolygons(contained_polygons, container_polygons, new_file): contained_polygons = loadFile(contained_polygons) container_polygons = loadFile(container_polygons) selected_contained_polygons = sjoin(contained_polygons, container_polygons, op='within') selected_contained_polygons.to_file(new_file) return selected_contained_polygons
def geoCode(df): """all geoprocessing part""" df = df[(pd.notnull(df.lat)) & (pd.notnull(df.lon))] # filter tweets without geocoordinates gdf = toGeoDataFrame(df, lat="lat", lon="lon") zips = gp.read_file("DATAVAULT/misc/zipcodes.geojson")[["geometry", "postalCode"]] return sjoin(gdf, zips, how="left", op="within")
def points_in_shp(points_list, shapefile_gpd): pnts = geopandas.GeoDataFrame(geometry=points_list, index=range(0, len(points_list))) pointInPolys = sjoin(pnts, shapefile_gpd, how='left') grouped = pointInPolys.groupby('index_right', as_index=False) return pointInPolys, grouped
def main(dbPath, zipPath): '''generates sintetic csv from master df with zipcodes''' gdf = toGeoDataFrame(getDF(dbPath), lat='lon',lon='lat') ### mesed with lat_long zips = gp.read_file(zipPath)[['geometry', 'postalCode']] datum = sjoin(gdf, zips, how="left")[['timestamp', 'postalCode', 'user_id', 'id']] datum.to_csv('sintetic.csv')
def joinTrafficCounts(data_grid, utm10n): data_grid['gid'] = data_grid.id data_grid.crs = utm10n; print data_grid.crs osm_trafficCounts_centroids = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/yvr-open-data-traffic-counts/generated-traffic-counts-osm-split/'+'osm_trafficCounts_split_dev_'+str(50)+'_centroids.shp') gridded_traffic_counts = sjoin(data_grid, osm_trafficCounts_centroids, how="left") print len(gridded_traffic_counts) return gridded_traffic_counts
def main(dbPath, zipPath): '''generates sintetic csv from master_df with zipcodes''' gdf = toGeoDataFrame(getDF(dbPath), lat='lon',lon='lat') ### mesed with lat_long zips = gp.read_file(zipPath)[['geometry', 'postalCode']] datum = sjoin(gdf, zips, how="left")[['timestamp', 'postalCode', 'user_id', 'id']] datum = datum[pd.notnull(datum.postalCode)] # remove failed sjoin datum.to_csv('sintetic.csv')
def polygons_in_polygons(contained_polygons, container_polygons, new_file): contained_polygons = load_file(contained_polygons) container_polygons = load_file(container_polygons) selected_contained_polygons = sjoin(contained_polygons, container_polygons, op='within') selected_contained_polygons = remove_multipolygons( selected_contained_polygons) selected_contained_polygons.to_file(new_file) return selected_contained_polygons
def everything(path): '''''' pD = gp.read_file(PARQA + 'data/SHP/Park_Districts/ParkDistrict.shp')[['SYSTEM','geometry']] df = toGeoDataFrame(pd.read_csv(oath, index_col=0)) df = df.to_crs(pDistricts.crs) df = sjoin(df, pD, how="left").rename(columns={'SYSTEM':'parkDistrict'}) df = df.to_crs(epsg=4326) rPath = path.replace('.csv','.json') writeGeoJson(df, rPath)
def geoCode(df): '''all geoprocessing part''' df = df[(pd.notnull(df.lat)) & (pd.notnull(df.lon))] # filter tweets without geocoordinates gdf = toGeoDataFrame(df, lat='lat', lon='lon') zips = gp.read_file('DATAVAULT/misc/zipcodes.geojson')[[ 'geometry', 'postalCode' ]] return sjoin(gdf, zips, how="left", op='within')
def get_bounding_shape(lat, lon, gdf, name): h=pd.DataFrame({'Lat':[lat], 'Lon':[lon]}) geometry = [Point(xy) for xy in zip([lon], [lat])] hg = gpd.GeoDataFrame(h, geometry=geometry) hg.crs = {'init' :'epsg:4326'} hg_1 = hg.to_crs(gdf.crs) r = sjoin(gdf,hg_1) if r.empty: return None else: return r[name].tolist()[0]
def spatial_join(gdf_amenity, gdf_nuts): gdf_amenity = gpd.GeoDataFrame( gdf_amenity, geometry=gpd.points_from_xy(gdf_amenity.lon, gdf_amenity.lat), crs="epsg:4326").drop(columns=['lon', 'lat']) gdf_amenity.sindex gdf = sjoin(gdf_nuts, gdf_amenity, how='left') s_counts = gdf.groupby(['nuts_id', 'amenity'])['geometry'].count() s_counts.name = "counts" return s_counts.reset_index()
def pts_poly_join(pts, poly, poly_id_col): """ Simple function to join the attributes of the polygon to the points. Specifically for an ID field in the polygon. """ poly2 = poly[[poly_id_col, 'geometry']] poly3 = poly2.dissolve(by=poly_id_col) join1 = sjoin(pts, poly3, how='inner', op='within') join1.rename(columns={join1.columns[-1]: poly_id_col}, inplace=True) return ([join1, poly3])
def occupation(dataframe): #colone pour regrouper donnes data_centroid=gpd.GeoDataFrame(dataframe.copy()) #On ne prend que les centroid des polygone (seuil bas) data_centroid['geometry']=dataframe['geometry'].centroid try : occup=sjoin(maillage[['ID','geometry']],data_centroid[['geometry']]) occup=occup.drop_duplicates(['geometry']) #On vire les doublons except ValueError: print("erreur sjoin") occup=gpd.GeoDataFrame() occup["geometry"]=[""] return occup
def spatial_join_pt(pt_file, poly_file, lat='lat', lon='lon'): """Spatially join polygon attributes to point data. 'pt_file' is a csv file with latitude and longitude attributes that can be interpreted as points. 'poly_file' is a geojson file that contains polygon data. lat --> latitude field in the point df lon --> longitude field in the point df Both layers must use the same CRS. This function returns a DataFrame, not a Geodataframe. """ logging.info('Loading point file') df = pd.read_csv(pt_file,low_memory=False) logging.info('Starting with {} rows in point file'.format(df.shape[0])) df = df.reset_index(drop=True) df_cols = df.columns.values.tolist() logging.info('Converting point file to geodf') pt = df_to_geodf_pt(df, lat, lon) logging.info('Loading poly file as geodf') poly = geojson_to_geodf(poly_file) pt.crs = poly.crs logging.info('Operating spatial join.') pt_join = sjoin(pt, poly, how='left') pt_join = pt_join.drop(['geometry', 'index_right'], axis=1) logging.info('Successfully spatially joined data.') join_cols = pt_join.columns.values.tolist() new_cols = [x for x in join_cols if x not in df_cols] # We will not keep the results for points # that join to multiple polygons pt_join = pt_join.reset_index().drop_duplicates(subset="index",keep=False).set_index("index") # We must join the result back to original dataframe to keep all rows final = pd.merge(df,pt_join[new_cols],left_index=True,right_index=True,how="left") logging.info('Finished with {} rows'.format(final.shape[0])) return final
def addNeighborhoods(data, utm10n): hoods = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/yvr-open-data-neighborhoods/csg_neighborhood_areas.shp'); print hoods.crs hoods.crs = utm10n output = data.copy() output.iscopy = False print len(output) output = sjoin(output, hoods, how="left") output['temp'] = [str(i.bounds) for i in output.geometry] print output['temp'].head() output = output.drop_duplicates('temp', keep="last") print len(output) # output.index = [i for i in range(len(otu))] for i in range(len(output)): if output['NAME'].iloc[i] is None: output['NAME'].iloc[i] = "Stanley Park" if output["MAPID"].iloc[i] is None: output['MAPID'].iloc[i] = "SP1" # output = output[pd.isnull(output.co2_avg_e] print len(output) return output
def rarete_espece(datasp,maille,seuil): if len(datasp)<1: #si 0 donnees return gpd.pd.DataFrame() try : occupation=sjoin(grille[['ID','geometry']],datasp[['geometry']]) except: print("erreur jointure (pas en picardie ?") return gpd.pd.DataFrame() occupation=occupation.drop_duplicates(['geometry']) #On vire les doublons nb_mailles=len(occupation) rr=(1-(float(len(occupation))/nbMaillesTotal))*100 for indice,seuil in seuil_orig.items():#Pour trouver quel indice if rr >= seuil[0] and rr < seuil[1]: indice_base=indice break else: indice_base='TTC' #Si on depasse le 100pour100 (citations mini > 1) for indice,seuil in seuil_ajust.items():#Pour trouver quel indice if rr >= seuil[0] and rr < seuil[1]: indice_ajust=indice break else: indice_ajust='TTC' #Si on depasse le 100pour100 (citations mini > 1) rapport=gpd.pd.DataFrame() rapport["id_esp"]=[datasp.id_esp.values[0]] rapport["nom_s"]=[datasp.nom_s.values[0]] rapport["nb_mailles"]=[nb_mailles] rapport["rr"]=[rr] rapport["indiceBrute"]=indice_base rapport["indicePondere"]=indice_ajust return rapport
def makeGrid(ipoints, experiment, gridsize): # Projections gridproj = {'init': 'epsg:3740', 'no_defs': True} wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'} # import grid script sys.path.insert(0, os.getcwd()+'/mapping/libs/') import grid as g opath = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-grid' if(os.path.isdir(opath)): print "already a folder!" else: os.mkdir(opath) # gridsize = 200 ogridname = "grid_"+str(gridsize)+"m.shp" ofile = opath + "/" + ogridname print "making grid" g.main(ofile, ipoints.total_bounds[0], ipoints.total_bounds[2], ipoints.total_bounds[1], ipoints.total_bounds[3], gridsize, gridsize) print "grid complete! " # read in the grid that was just made grid = GeoDataFrame.from_file(ofile) grid.crs = gridproj # create grid id to groupby grid['id'] = [i for i in range(len(grid))] # Read in transect to spatial subset grids in transect transect = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/study-area/' +'transect_epicc2sp_woss.shp') transect.crs = gridproj # subset grid # transectgrid = grid[grid.geometry.intersects(transect.geometry)]; print transectgrid sagrid = [] for i in range(len(grid)): if np.array(transect.intersects(grid.geometry[i]))[0] != False: sagrid.append(grid.geometry[i]) transectgrid = GeoDataFrame(sagrid) transectgrid.columns = ['geometry'] transectgrid['id'] = [i for i in range(len(transectgrid))] transectgrid.crs = gridproj transectgrid.to_file(ofile[:-4]+"_transect.shp") # transectgrid.to_file(ofile[:-4]+"_transect.geojson",driver="GeoJSON") ## !!!Some weird things with reading in data makes the sjoin work !!! :( transectgrid = GeoDataFrame.from_file(ofile[:-4]+"_transect.shp") transectgrid.crs = gridproj print transectgrid.head() ipoints = GeoDataFrame.from_file( os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/all_20150528.shp') ipoints.crs = gridproj print ipoints.head() # ipoints['id'] = [i for i in range(len(ipoints))] # Spatial join points to grid oname = "gridjoin_"+str(gridsize)+"m.shp" # join_inner_df = sjoin(transectgrid, ipoints, how="inner") join_inner_df = sjoin(transectgrid, ipoints, how="left", op='intersects') # join_inner_df.to_file(opath+ "/"+oname) return join_inner_df
subdata=gpd.GeoDataFrame(data[data["date_obs"].dt.year.isin(years)])#selection sur la periode de ref mcp_ref = subdata['geometry'].unary_union.convex_hull #mcp total sur la periode de ref mcp_ref = gpd.GeoSeries(mcp_ref) mcp_ref.to_file(path+'/out/occurence_ref.shp') ##### Zone d occupation ##### ##Par annne et par periode occup_an=occupation('annee') occup_an.to_file(path+'/out/occup_an.shp') occup_per=occupation('periode') occup_per.to_file(path+'/out/occup_per.shp') #Caclule de la zone pour la periode de ref data_centroid=gpd.GeoDataFrame(data.copy()) data_centroid['geometry']=data['geometry'].centroid data_centroid_ref=data_centroid[data_centroid["date_obs"].dt.year.isin(years)] occup_ref=sjoin(maillage[['ID','geometry']],data_centroid_ref[['geometry']]) occup_ref=occup_ref.drop_duplicates(['geometry']) occup_ref.to_file(path+'/out/occupation.shp') ##### Stats ##### ref=pd.DataFrame() ref["annee"]=years #Pour les jointures ##Surface des mcps par an mcp_an["mcp_area"]=mcp_an.area/1000000 stats=pd.merge(ref,mcp_an[["annee","mcp_area"]], on="annee",how="left") #mcp_ref.area.sum() #sur la periode de ref ##Surface des mailles par an et sur la periode de ref occup_an["occup_area"]=occup_an.area/1000000 occup_grp=pd.DataFrame(occup_an.groupby('annee')["occup_area"].sum()) occup_grp["annee"]=occup_grp.index #il faudrait que j apprenne a utiliser les index sur pandas
def occupation(col): #colone pour regrouper donnes data_centroid=gpd.GeoDataFrame(data[data[col].notnull()].copy()) #On ne prend que les centroid des polygone (seuil bas) data_centroid['geometry']=data['geometry'].centroid occup=sjoin(maillage[['ID','geometry']],data_centroid[[col,'geometry']]) occup=occup.drop_duplicates(['geometry',col]) #On vire les doublons return occup
census_blocks_file = 'nycb2010_15b/nycb2010.shp' pluto_file = 'Update/NYC_PLUTO.shp' print "Reading files..." pluto = gp.GeoDataFrame.from_file(os.path.join(dpath, pluto_file)) dsny_sections = gp.GeoDataFrame.from_file(os.path.join(dpath, dsny_section_file)) census_blocks = gp.GeoDataFrame.from_file(os.path.join(dpath, census_blocks_file)) census_blocks.to_crs(dsny_sections.crs, inplace=True) pluto.crs = dsny_sections.crs pluto['geometry'] = pluto['geometry'].centroid print "Begin spatial joins..." census_pluto = sjoin(pluto, census_blocks, how="left", op="within") census_pluto.drop(['geometry', 'CB2010_right', 'CT2010_right','Shape_Area','Shape_Leng','index_right'], axis=1, inplace=True) # -- # Re-merge the old file to use the Polygon instead of the point census_pluto = census_pluto.merge(census_blocks[['geometry', 'BCTCB2010']]) census_pluto['geometry'] = census_pluto['geometry'].centroid dsny_pluto_census = sjoin(census_pluto, dsny_sections, how="left", op="within") writePath = dpath + 'Output/' writePathFile = writePath + 'nyc.shp' dsny_pluto_census['AssessTot'] = dsny_pluto_census['AssessTot'].astype(str)
#### DECLARE FILE PATHS utility = '/home/kircheis/data/shp/Electric_Retail_Service_Ter.shp' util = gpd.read_file(utility) urbarea = '/home/kircheis/data/shp/census/cb_2013_us_ua10_500k/cb_2013_us_ua10_500k.shp' ua = gpd.read_file(urbarea) ua = ua.to_crs(util.crs) urbpop = '/home/kircheis/data/census/ua/ua_list_all.txt' uapop = pd.read_fwf(urbpop, colspecs=[(0,5), (10,70), (75,84), (89,98), (103,117), (122,131), (136,150), (155,164), (169,178), (183,185)], names=['UACE', 'NAME', 'POP', 'HU', 'AREALAND', 'AREALANDSQMI', 'AREAWATER', 'AREAWATERSQMI', 'POPDEN', 'LSADC'], skiprows=1) uapop['UACE'] = uapop['UACE'].astype(str).str.pad(5, side='left', fillchar='0') uapop = uapop.set_index('UACE') #### FIND WHICH URBAN AREAS ARE IN WHICH UTILITY SERVICE AREAS j = tools.sjoin(util, ua) #### ALLOCATE GRID FOR TEMPERATURE FORCINGS g = rect_grid((-130, 24, -65, 50), 0.125) coords = g.centroid.apply(lambda x: x.coords[0]) coordstr = coords.apply(lambda x: 'data_%s_%s' % (x[1], x[0])) g = gpd.GeoDataFrame(geometry=g.geometry, index=g.index) g.crs = util.crs g['coordstr'] = coordstr #### JOIN UTILITY SERVICE AREAS WITH TEMPERATURE FORCINGS ua_g = tools.sjoin(ua, g)
rapport_list=[] for file in os.listdir(pathData): if file.endswith('.shp') and file.startswith("espace_"): shapes.append(file) for shape in shapes: listData.append(gpd.GeoDataFrame.from_file(pathData+'/'+shape)) data=gpd.GeoDataFrame(gpd.pd.concat(listData,ignore_index=True)) data.geometry=data.geometry.centroid data.crs=lamb93 listSp=set(data.nom_s.values) #Compter le nombre de mailles prospectees occup_total=sjoin(grille[['ID','geometry']],data[['geometry']]) occup_total_agreg=occup_total.groupby(['geometry']).count() #ici, geometry est l index occup_total_agreg=gpd.GeoDataFrame(occup_total_agreg) occup_total_agreg.geometry=occup_total_agreg.index.values.copy() #Copy de l'index dans la colonne geometry occup_total_agreg.reset_index(drop=True, inplace=True) #On remplace par un index normal occup_total_agreg["nb_cit"]=occup_total_agreg["index_right"].values.copy() del occup_total_agreg["ID"] del occup_total_agreg["index_right"] nbMaillesProsp=len(occup_total_agreg[occup_total_agreg["nb_cit"]>=citation_mini]) P=100*float((nbMaillesTotal-nbMaillesProsp))/nbMaillesTotal #Ajuster les seuils des indicdes de rarete for indice,seuil in seuil_orig.items(): seuil_ajust[indice][0]=seuil[0]+P-(seuil[0]*P/100) seuil_ajust[indice][1]=seuil[1]+P-(seuil[1]*P/100) def rarete_espece(datasp,maille,seuil):
def test_sjoin_outer(self): df = sjoin(self.pointdf, self.polydf, how="outer") self.assertEquals(df.shape, (21,8))
# finish new_regions.append(new_region.tolist()) return new_regions, np.asarray(new_vertices) b = gpd.GeoDataFrame.from_file('/home/akagi/Desktop/electricity_data/Electric_Retail_Service_Ter.shp') #### LOOP THROUGH UTILITY SERVICE AREAS sub = gpd.read_file(substations) util = gpd.read_file(utility) invalid_util = util[~util['geometry'].apply(lambda x: x.is_valid)] util.loc[invalid_util.index, 'geometry'] = util.loc[invalid_util.index, 'geometry'].apply(lambda x: x.buffer(0)) sub_util = tools.sjoin(sub, util, op='within', how='left') sub_xy = np.vstack(sub['geometry'].apply(lambda u: np.concatenate(u.xy)).values) #util_poly = b.set_index('UNIQUE_ID')['geometry'] vor = spatial.Voronoi(sub_xy) reg, vert = voronoi_finite_polygons_2d(vor,1) v_poly = gpd.GeoSeries(pd.Series(reg).apply(lambda x: geometry.Polygon(vert[x]))) v_gdf = gpd.GeoDataFrame(pd.concat([sub.drop('geometry', axis=1), v_poly], axis=1)).rename(columns={0:'geometry'}) v_gdf.crs = sub.crs j = tools.sjoin(util, v_gdf, op='intersects') j['right_geom'] = j['UNIQUE_ID_right'].map(v_gdf.set_index('UNIQUE_ID')['geometry']) j = j.dropna(subset=['geometry', 'right_geom']).set_index('UNIQUE_ID_left')
def test_sjoin_duplicate_column_name(self): pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'}) df = sjoin(pointdf2, self.polydf, how="left") self.assertTrue('Shape_Area_left' in df.columns) self.assertTrue('Shape_Area_right' in df.columns)
output["occup"]="" output["citations"]="" for file in os.listdir(path): if file.endswith('.shp') and file.startswith("espace_"): shapes.append(file) for shape in shapes: listData.append(gpd.GeoDataFrame.from_file(path+'/'+shape)) data=gpd.GeoDataFrame(pd.concat(listData,ignore_index=True)) ###Travail sur lot de donnees data=data[data["nb"] >= 0] #filtre des obs négative data=data[data.geometry.area < 5000000] #filtre des polygon sup a 5km2 data["date_obs"]=pd.to_datetime(data["date_obs"]) #convertire la date data["annee"]=data["date_obs"].dt.year for debut in range(annee_min,annee_max+1-4): for fin in range(debut+5-1,annee_max+1): print str(debut)+" - "+str(fin) data_centroid=gpd.GeoDataFrame(data[data["annee"].between(debut,fin)].copy()) #On ne prend que les centroid des data qui entre dans la periode data_centroid['geometry']=data['geometry'].centroid count=len(data_centroid.index) occup=sjoin(maillage[['ID','geometry']],data_centroid[['geometry']]) occup=occup.drop_duplicates(['geometry']) #On vire les doublons line=pd.DataFrame({'debut':debut,'fin':fin,'occup':occup.area.sum()/1000000,'citations':count},index=[0]) output=output.append(line,ignore_index=True) output.to_csv(path+"data_grp.csv",index=False)
def test_sjoin_inner(self): df = sjoin(self.pointdf, self.polydf, how="inner") self.assertEquals(df.shape, (11, 8))
# line_shp = gpd.read_file(network_shp_file) country_shp = gpd.read_file(country_shp_file) myanmar = country_shp[country_shp.ADMIN=='Myanmar'] nigeria = country_shp[country_shp.ADMIN=='Nigeria'] osm_places_mmr = osm.query_osm(typ='node', bbox=myanmar.total_bounds, recurse='down', tags='place') osm_power_mmr = osm.query_osm(typ='node', bbox=myanmar.total_bounds, recurse='down', tags='power') osm_places_nga = osm.query_osm(typ='node', bbox=nigeria.total_bounds, recurse='down', tags='place') osm_power_nga = osm.query_osm(typ='node', bbox=nigeria.total_bounds, recurse='down', tags='power') osm_power_way_nga = osm.query_osm(typ='way', bbox=nigeria.total_bounds, recurse='down', tags='power') # find places that have a population places_nga = osm_places_nga[['geometry', 'population', 'name']] places_nga = sjoin(places_nga, nigeria, how="inner", op="within") places_nga = places_nga[~pd.isnull(places_nga.population)] # find length of power line data in nigeria power_lines_nga = osm_power_way_nga[osm_power_way_nga.geom_type == 'LineString'][['geometry']] power_lines_nga = sjoin(power_lines_nga, nigeria, how="inner", op="within") # assign line distances back to geopandas df and add styling to display via geojson.io power_lines_nga['line_length_m'] = power_lines_nga.geometry.apply(lambda linestring: sum(su.linestring_distances(linestring))) # style red power_lines_nga['stroke'] = "#00ff00" # output as geojson power_line_json = "/home/cjn/geodata/nga_power_lines_osm.json" json_file = open(power_line_json, mode='w') json_file.write(power_lines_nga.to_json())
from shapely.geometry import Point here = os.path.dirname(os.path.abspath('__file__')) data_dir = os.path.join(here, '..', 'data') vector_file = os.path.join(here, 'nybb_15b', 'nybb.shp') boros = read_file(vector_file) xmin, ymin, xmax, ymax = boros.total_bounds N = 1000 X = np.random.uniform(low=xmin, high=xmax, size=N) Y = np.random.uniform(low=ymin, high=ymax, size=N) points = GeoDataFrame(geometry=GeoSeries([Point(x, y) for x, y in zip(X, Y)])) points.crs = boros.crs joined = sjoin(points, boros, how='inner') joined.geometry = joined.buffer(2000) ax = plt.subplot(121) boros.plot(column='BoroCode', categorical=True, axes=ax) points.plot(axes=ax) ax.set_aspect('equal') locs, labels = plt.xticks() plt.setp(labels, rotation=90) ax = plt.subplot(122) joined.plot(column='BoroCode', categorical=True, axes=ax) ax.set_aspect('equal') locs, labels = plt.xticks() plt.setp(labels, rotation=90) plt.tight_layout()
con = sqlite3.connect('test.db') df = pd.read_sql_query('select * from telemetry where boat_id =5', con, parse_dates=['received'], index_col=['received']) # geopandas requires this to be called 'geometry' df['geometry'] = df.apply(lambda y: Point(y.lat, y.lon), axis=1) return df def label_dockings(data): """ add a label to indicate when the bus was stopped at one of the docks """ for row in data: import pdb pdb.set_trace() return data def label_arrive(data): """ add a label to indicate when the bus arrives at one of the docks """ pass def label_depart(data): """ add a label to indicate when the bus departs one of the docks """ if __name__ == '__main__': print 'creating bounds' spots = GeoDataFrame({'geometry': [bounds['WATERFRONT'], bounds['LONSDALE'], bounds['PARKING']]}) print 'loading bus data' data = loadbus() gdf = gpd.GeoDataFrame(data) print 'joining' joined = sjoin(gdf, spots, how='left', op='contains') import pdb pdb.set_trace()
import numpy as np import pandas as pd import geopandas as gpd from geopandas import tools census_old = '/home/kircheis/data/shp/census/census_tracts_all/census_tracts_1990.shp' census_new = '/home/kircheis/data/shp/census/census_tracts_all/census_tracts_2014.shp' df_90 = gpd.read_file(census_old) df_14 = gpd.read_file(census_new) df_14_c = df_14.copy() df_14_c['geometry'] = df_14_c.centroid j = tools.sjoin(df_90, df_14_c, op='contains') #### FORMAT CENSUS TRACT NAMES #### NONDECIMAL ENTRIES j['TRACT_NAME'][~j['TRACT_NAME'].str.contains('\.')] = (j['TRACT_NAME'][~j['TRACT_NAME'].str.contains('\.')] + '00').str.pad(6, side='left', fillchar='0') #### DECIMAL ENTRIES j['TRACT_NAME'][j['TRACT_NAME'].str.contains('\.')] = j['TRACT_NAME'][j['TRACT_NAME'].str.contains('\.')].str.replace('.', '').str.pad(6, side='left', fillchar='0') #### CREATE FIPS j['GEOID_1990'] = j['ST'].astype(str).str.cat(j['CO'].astype(str)).str.cat(j['TRACT_NAME']) j_cross = j.rename(columns={'GEOID':'GEOID_2014'})[['GEOID_1990', 'GEOID_2014']].sort('GEOID_1990')