# In[16]: #Count the number of occurrences for each zip code in the data frame, #then converting the data series to a data frame for merging. counts = data['Zip Code'].value_counts() counts = counts.to_frame(name='count') counts = counts.reset_index() # In[22]: #Merge the number of occurences for each zip code, with the corresponding zip code polygon counts = GeoDataFrame( counts.merge(zip_codes, how='left', left_on='index', right_on='zip_code')) #Dropping all NaNs in the geometry column. counts = counts.dropna() #drop null values #Plotting the data fig, ax = plt.subplots(figsize=(8, 8)) counts.plot(column='count', cmap='Blues', alpha=1, linewidth=0.1, ax=ax) plt.title('Building Permits by Zipcode', size=20) plt.axis('off') plt.show() # In[ ]:
class Source: """Source Args: source (str, pandas.DataFrame, geopandas.GeoDataFrame): a table name, SQL query, DataFrame, GeoDataFrame instance. credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`, optional): A Credentials instance. If not provided, the credentials will be automatically obtained from the default credentials if available. geom_col (str, optional): string indicating the geometry column name in the source `DataFrame`. encode_data (bool, optional): Indicates whether the data needs to be encoded. Default is True. Example: Table name. >>> Source('table_name') SQL query. >>> Source('SELECT * FROM table_name') DataFrame object. >>> Source(df, geom_col='my_geom') GeoDataFrame object. >>> Source(gdf) Setting the credentials. >>> Source('table_name', credentials) """ def __init__(self, source, credentials=None, geom_col=None, encode_data=True): self.credentials = None self.datetime_column_names = None self.encode_data = encode_data if isinstance(source, str): # Table, SQL query self.type = SourceType.QUERY self.manager = ContextManager(credentials) self.query = self.manager.compute_query(source) self.credentials = self.manager.credentials elif isinstance(source, DataFrame): if isinstance(source, GeoDataFrame): if is_reprojection_needed(source): source = reproject(source) # DataFrame, GeoDataFrame self.type = SourceType.GEOJSON self.gdf = GeoDataFrame(source, copy=True) self.set_datetime_columns() if geom_col in self.gdf: set_geometry(self.gdf, geom_col, inplace=True) elif has_geometry(source): self.gdf.set_geometry(source.geometry.name, inplace=True) else: raise ValueError('No valid geometry found. Please provide an input source with ' + 'a valid geometry or specify the "geom_col" param with a geometry column.') # Remove nan geometries self.gdf.dropna(subset=[self.gdf.geometry.name], inplace=True) # Remove empty geometries self.gdf = self.gdf[~self.gdf.geometry.is_empty] # Checking the uniqueness of the geometry type geometry_types = set(self.gdf.geom_type.unique()).difference({None}) if geometry_types not in VALID_GEOMETRY_TYPES: raise ValueError('No valid geometry column types ({}), it has '.format(geometry_types) + 'to be one of the next type sets: {}.'.format(VALID_GEOMETRY_TYPES)) else: raise ValueError('Wrong source input. Valid values are str and DataFrame.') def get_credentials(self): if self.type == SourceType.QUERY: if self.credentials: return { # CARTO VL requires a username but CARTOframes allows passing only the base_url. # That's why 'user' is used by default if username is empty. 'username': self.credentials.username or 'user', 'api_key': self.credentials.api_key, 'base_url': self.credentials.base_url } elif self.type == SourceType.GEOJSON: return None def set_datetime_columns(self): if self.type == SourceType.GEOJSON: self.datetime_column_names = get_datetime_column_names(self.gdf) if self.datetime_column_names: for column in self.datetime_column_names: self.gdf[column] = self.gdf[column].dt.strftime(RFC_2822_DATETIME_FORMAT) def get_datetime_column_names(self): return self.datetime_column_names def get_geom_type(self): if self.type == SourceType.QUERY: return self.manager.get_geom_type(self.query) or 'point' elif self.type == SourceType.GEOJSON: return get_geodataframe_geom_type(self.gdf) def compute_metadata(self, columns=None): if self.type == SourceType.QUERY: self.data = self.query self.bounds = self.manager.get_bounds(self.query) elif self.type == SourceType.GEOJSON: if columns is not None: columns += [self.gdf.geometry.name] self.gdf = self.gdf[columns] self.data = get_geodataframe_data(self.gdf, self.encode_data) self.bounds = get_geodataframe_bounds(self.gdf) def is_local(self): return self.type == SourceType.GEOJSON def is_public(self): if self.type == SourceType.QUERY: return self.manager.is_public(self.query) elif self.type == SourceType.GEOJSON: return True def schema(self): if self.type == SourceType.QUERY: return self.manager.get_schema() elif self.type == SourceType.GEOJSON: return None def get_table_names(self): if self.type == SourceType.QUERY: return self.manager.get_table_names(self.query) elif self.type == SourceType.GEOJSON: return []
def orig_dest(fd: gpd.GeoDataFrame, taz: gpd.GeoDataFrame, taz_name="tazce", plc_name="placefp", cnt_name="countyfp") -> gpd.GeoDataFrame: r""" Add County and Place codes to origin-destination data This function adds County and Census Designated Place codes from the GeoDataFrame `taz` to the origin-destination or flow GeoDataFrame `fd`. The relevant column names are defined in `taz_name`, `plc_name`, and `cnt_name`, respectively. The column names in the output GeoDataFrame are "orig_taz", "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". Parameters ---------- taz : geopandas.GeoDataFrame GeoDataFrame containing Traffic Analysis (TAZ) codes, Census Designated Place codes, and County codes. taz_name : str, defaults to "tazce" Column name in `taz` GeoDataFrame that contains TAZ codes. Defaults to "tazce". plc_name : str, defaults to "placefp" Column name in `taz` GeoDataFrame that contains Census Designated Place codes. Defaults to "placefp". cnt_name : str, defaults to "countyfp" Column name in `taz` GeoDataFrame that contains County codes. Defaults to "countyfp". Returns ------- geopandas.GeoDataFrame GeoDataFrame with origin and destination TAZ, County, and Place codes. The column names are "orig_taz", "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". See Also -------- ~stplanpy.acs.read_acs ~stplanpy.geo.in_place Examples -------- The example data files: "`od_data.csv`_", "`tl_2011_06_taz10.zip`_", and "`tl_2020_06_place.zip`_", can be downloaded from github. .. code-block:: python from stplanpy import acs from stplanpy import geo from stplanpy import od # Read origin-destination flow data flow_data = acs.read_acs("od_data.csv") flow_data = flow_data.clean_acs() # San Francisco Bay Area counties counties = ["001", "013", "041", "055", "075", "081", "085", "095", "097"] # Place code East Palo Alto places = ["20956"] # Read place data place = geo.read_shp("tl_2020_06_place.zip") # Keep only East Palo Alto place = place[place["placefp"].isin(places)] # Read taz data taz = geo.read_shp("tl_2011_06_taz10.zip") # Rename columns for consistency taz.rename(columns = {"countyfp10":"countyfp", "tazce10":"tazce"}, inplace = True) # Filter on county codes taz = taz[taz["countyfp"].isin(counties)] # Compute which taz lay inside a place and which part taz = taz.in_place(place) # Add county and place codes to data frame. flow_data = flow_data.orig_dest(taz) .. _od_data.csv: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/od_data.csv .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip .. _tl_2020_06_place.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2020_06_place.zip """ # Drop lines that have no valid countyfp or placefp. i.e. are not within a # county or place cnt = taz.dropna(subset=[cnt_name]) cnt = cnt.drop(columns="geometry") plc = taz.dropna(subset=[plc_name]) plc = plc.drop(columns="geometry") # We do not know the distribution of origins or destinations within a TAZ. # Therefore, add TAZ to place if more than 0.5 of its surface area is within # this place. plc = plc.loc[plc["area"] > 0.5] # Merge on countyfp codes fd = fd.merge(cnt, how="left", left_on="orig_taz", right_on=taz_name) fd.rename(columns={cnt_name: "orig_cnt"}, inplace=True) fd = fd.drop(columns=[taz_name, plc_name, "area"]) fd = fd.merge(cnt, how="left", left_on="dest_taz", right_on=taz_name) fd.rename(columns={cnt_name: "dest_cnt"}, inplace=True) fd = fd.drop(columns=[taz_name, plc_name, "area"]) # Merge on placefp codes fd = fd.merge(plc, how="left", left_on="orig_taz", right_on=taz_name) fd.rename(columns={plc_name: "orig_plc"}, inplace=True) fd = fd.drop(columns=[taz_name, cnt_name, "area"]) fd = fd.merge(plc, how="left", left_on="dest_taz", right_on=taz_name) fd.rename(columns={plc_name: "dest_plc"}, inplace=True) fd = fd.drop(columns=[taz_name, cnt_name, "area"]) # Clean up data frame fd.fillna({"orig_plc": "", "dest_plc": ""}, inplace=True) return fd
concat_df.zip_code.value_counts() concat_df['water_debt_only'].value_counts() # Mapping NYC Zip Codes zip_codes = GeoDataFrame.from_file( 'C:/Users/ghodg/Desktop/Project Data/Tax Lien/Zip Code Data/ZIP_CODE_040114/' 'ZIP_CODE_040114.shp') zip_codes['zip_code'] = zip_codes['ZIPCODE'].astype(int) concat_df['zip_code'] = concat_df['zip_code'].astype(int) counts = concat_df['zip_code'].value_counts() counts = counts.to_frame(name='count_buildings') counts = counts.reset_index() counts = GeoDataFrame( counts.merge(zip_codes, how='left', left_on='index', right_on='zip_code')) counts = counts.dropna() # Plotting the map and colorbar norm = colors.Normalize(vmin=counts.count_buildings.min(), vmax=counts.count_buildings.max()) cbar = plt.cm.ScalarMappable(norm=norm, cmap='Blues') fig, ax = plt.subplots(figsize=(10, 10)) counts.plot(column='count_buildings', cmap='Blues', legend=False, alpha=1, linewidth=0.5, edgecolor='black', ax=ax) ax_cbar = fig.colorbar(cbar, ax=ax, fraction=0.046, pad=0.04)