Python GeoDataFrame.dropna примеры использования

Язык программирования: Python

Пространство имен/Пакет: geopandas

Класс/Тип: GeoDataFrame

Метод/Функция: dropna

Примеров на hotexamples.com: 4

Python GeoDataFrame.dropna - 4 примера найдено. Это лучшие примеры Python кода для geopandas.GeoDataFrame.dropna, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GeoDataFrame(30)

copy(30)

iterrows(30)

groupby(30)

from_file(30)

from_features(30)

drop(30)

crs(30)

from_postgis(21)

apply(21)

append(16)

geometry(15)

explode(15)

columns(14)

itertuples(13)

head(11)

buffer(10)

index(10)

dissolve(9)

drop_duplicates(8)

equals(7)

intersects(5)

fillna(5)

join(4)

dropna(4)

info(4)

keys(3)

_generate_sindex(3)

_repr_html_(3)

assign(3)

filter(3)

from_dict(3)

distance(3)

iterfeatures(2)

intersection(2)

astype(2)

from_records(2)

from_csv(1)

find_next_level_larger(1)

_geometry_column_name(1)

_networks(1)

STUSPS(1)

isna(1)

from_url(1)

duplicated(1)

classify_objects(1)

contains(1)

Пример #1

Показать файл

Файл: Data Analysis NYC Permit Issuance.py Проект: abhilasha-lock/Topos-Intern-Assessment

# In[16]:

#Count the number of occurrences for each zip code in the data frame,
#then converting the data series to a data frame for merging.

counts = data['Zip Code'].value_counts()
counts = counts.to_frame(name='count')
counts = counts.reset_index()

# In[22]:

#Merge the number of occurences for each zip code, with the corresponding zip code polygon

counts = GeoDataFrame(
    counts.merge(zip_codes, how='left', left_on='index', right_on='zip_code'))

#Dropping all NaNs in the geometry column.

counts = counts.dropna()  #drop null values

#Plotting the data
fig, ax = plt.subplots(figsize=(8, 8))

counts.plot(column='count', cmap='Blues', alpha=1, linewidth=0.1, ax=ax)

plt.title('Building Permits by Zipcode', size=20)
plt.axis('off')
plt.show()

# In[ ]:

Пример #2

Показать файл

Файл: source.py Проект: lukereed76/cartoframes

class Source:
    """Source

    Args:
        source (str, pandas.DataFrame, geopandas.GeoDataFrame): a table name,
            SQL query, DataFrame, GeoDataFrame instance.
        credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`, optional):
            A Credentials instance. If not provided, the credentials will be automatically
            obtained from the default credentials if available.
        geom_col (str, optional): string indicating the geometry column name in the source `DataFrame`.
        encode_data (bool, optional): Indicates whether the data needs to be encoded.
            Default is True.

    Example:

        Table name.

        >>> Source('table_name')

        SQL query.

        >>> Source('SELECT * FROM table_name')

        DataFrame object.

        >>> Source(df, geom_col='my_geom')

        GeoDataFrame object.

        >>> Source(gdf)

        Setting the credentials.

        >>> Source('table_name', credentials)

    """
    def __init__(self, source, credentials=None, geom_col=None, encode_data=True):
        self.credentials = None
        self.datetime_column_names = None
        self.encode_data = encode_data

        if isinstance(source, str):
            # Table, SQL query
            self.type = SourceType.QUERY
            self.manager = ContextManager(credentials)
            self.query = self.manager.compute_query(source)
            self.credentials = self.manager.credentials
        elif isinstance(source, DataFrame):
            if isinstance(source, GeoDataFrame):
                if is_reprojection_needed(source):
                    source = reproject(source)

            # DataFrame, GeoDataFrame
            self.type = SourceType.GEOJSON
            self.gdf = GeoDataFrame(source, copy=True)
            self.set_datetime_columns()

            if geom_col in self.gdf:
                set_geometry(self.gdf, geom_col, inplace=True)
            elif has_geometry(source):
                self.gdf.set_geometry(source.geometry.name, inplace=True)
            else:
                raise ValueError('No valid geometry found. Please provide an input source with ' +
                                 'a valid geometry or specify the "geom_col" param with a geometry column.')

            # Remove nan geometries
            self.gdf.dropna(subset=[self.gdf.geometry.name], inplace=True)

            # Remove empty geometries
            self.gdf = self.gdf[~self.gdf.geometry.is_empty]

            # Checking the uniqueness of the geometry type
            geometry_types = set(self.gdf.geom_type.unique()).difference({None})
            if geometry_types not in VALID_GEOMETRY_TYPES:
                raise ValueError('No valid geometry column types ({}), it has '.format(geometry_types) +
                                 'to be one of the next type sets: {}.'.format(VALID_GEOMETRY_TYPES))

        else:
            raise ValueError('Wrong source input. Valid values are str and DataFrame.')

    def get_credentials(self):
        if self.type == SourceType.QUERY:
            if self.credentials:
                return {
                    # CARTO VL requires a username but CARTOframes allows passing only the base_url.
                    # That's why 'user' is used by default if username is empty.
                    'username': self.credentials.username or 'user',
                    'api_key': self.credentials.api_key,
                    'base_url': self.credentials.base_url
                }
        elif self.type == SourceType.GEOJSON:
            return None

    def set_datetime_columns(self):
        if self.type == SourceType.GEOJSON:
            self.datetime_column_names = get_datetime_column_names(self.gdf)

            if self.datetime_column_names:
                for column in self.datetime_column_names:
                    self.gdf[column] = self.gdf[column].dt.strftime(RFC_2822_DATETIME_FORMAT)

    def get_datetime_column_names(self):
        return self.datetime_column_names

    def get_geom_type(self):
        if self.type == SourceType.QUERY:
            return self.manager.get_geom_type(self.query) or 'point'
        elif self.type == SourceType.GEOJSON:
            return get_geodataframe_geom_type(self.gdf)

    def compute_metadata(self, columns=None):
        if self.type == SourceType.QUERY:
            self.data = self.query
            self.bounds = self.manager.get_bounds(self.query)
        elif self.type == SourceType.GEOJSON:
            if columns is not None:
                columns += [self.gdf.geometry.name]
                self.gdf = self.gdf[columns]
            self.data = get_geodataframe_data(self.gdf, self.encode_data)
            self.bounds = get_geodataframe_bounds(self.gdf)

    def is_local(self):
        return self.type == SourceType.GEOJSON

    def is_public(self):
        if self.type == SourceType.QUERY:
            return self.manager.is_public(self.query)
        elif self.type == SourceType.GEOJSON:
            return True

    def schema(self):
        if self.type == SourceType.QUERY:
            return self.manager.get_schema()
        elif self.type == SourceType.GEOJSON:
            return None

    def get_table_names(self):
        if self.type == SourceType.QUERY:
            return self.manager.get_table_names(self.query)
        elif self.type == SourceType.GEOJSON:
            return []

Пример #3

Показать файл

Файл: od.py Проект: pctBayArea/stplanpy

def orig_dest(fd: gpd.GeoDataFrame,
              taz: gpd.GeoDataFrame,
              taz_name="tazce",
              plc_name="placefp",
              cnt_name="countyfp") -> gpd.GeoDataFrame:
    r"""
    Add County and Place codes to origin-destination data           

    This function adds County and Census Designated Place codes from the
    GeoDataFrame `taz` to the origin-destination or flow GeoDataFrame `fd`. The
    relevant column names are defined in `taz_name`, `plc_name`, and `cnt_name`,
    respectively. The column names in the output GeoDataFrame are "orig_taz",
    "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". 
    
    Parameters
    ----------
    taz : geopandas.GeoDataFrame
        GeoDataFrame containing Traffic Analysis (TAZ) codes, Census Designated
        Place codes, and County codes.
    taz_name : str, defaults to "tazce"
        Column name in `taz` GeoDataFrame that contains TAZ codes. Defaults to
        "tazce".
    plc_name : str, defaults to "placefp"
        Column name in `taz` GeoDataFrame that contains Census Designated Place
        codes. Defaults to "placefp".
    cnt_name : str, defaults to "countyfp"
        Column name in `taz` GeoDataFrame that contains County codes. Defaults
        to "countyfp".
    
    Returns
    -------
    geopandas.GeoDataFrame
        GeoDataFrame with origin and destination TAZ, County, and Place codes.
        The column names are "orig_taz", "dest_taz", "orig_plc", "dest_plc",
        "orig_cnt", and "dest_cnt". 
    
    See Also
    --------
    ~stplanpy.acs.read_acs
    ~stplanpy.geo.in_place
    
    Examples
    --------
    The example data files: "`od_data.csv`_", "`tl_2011_06_taz10.zip`_", and
    "`tl_2020_06_place.zip`_", can be downloaded from github.

    .. code-block:: python

        from stplanpy import acs
        from stplanpy import geo
        from stplanpy import od

        # Read origin-destination flow data
        flow_data = acs.read_acs("od_data.csv")
        flow_data = flow_data.clean_acs()

        # San Francisco Bay Area counties
        counties = ["001", "013", "041", "055", "075", "081", "085", "095", "097"]

        # Place code East Palo Alto
        places = ["20956"]

        # Read place data
        place = geo.read_shp("tl_2020_06_place.zip")

        # Keep only East Palo Alto
        place = place[place["placefp"].isin(places)]

        # Read taz data
        taz = geo.read_shp("tl_2011_06_taz10.zip")

        # Rename columns for consistency
        taz.rename(columns = {"countyfp10":"countyfp", "tazce10":"tazce"}, inplace = True)

        # Filter on county codes
        taz = taz[taz["countyfp"].isin(counties)]

        # Compute which taz lay inside a place and which part
        taz = taz.in_place(place)

        # Add county and place codes to data frame.
        flow_data = flow_data.orig_dest(taz)

    .. _od_data.csv: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/od_data.csv
    .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip
    .. _tl_2020_06_place.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2020_06_place.zip
    """
    # Drop lines that have no valid countyfp or placefp. i.e. are not within a
    # county or place
    cnt = taz.dropna(subset=[cnt_name])
    cnt = cnt.drop(columns="geometry")
    plc = taz.dropna(subset=[plc_name])
    plc = plc.drop(columns="geometry")
    # We do not know the distribution of origins or destinations within a TAZ.
    # Therefore, add TAZ to place if more than 0.5 of its surface area is within
    # this place.
    plc = plc.loc[plc["area"] > 0.5]

    # Merge on countyfp codes
    fd = fd.merge(cnt, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "orig_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])
    fd = fd.merge(cnt, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "dest_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])

    # Merge on placefp codes
    fd = fd.merge(plc, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "orig_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])
    fd = fd.merge(plc, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "dest_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])

    # Clean up data frame
    fd.fillna({"orig_plc": "", "dest_plc": ""}, inplace=True)

    return fd

Пример #4

Показать файл

concat_df.zip_code.value_counts()
concat_df['water_debt_only'].value_counts()

# Mapping NYC Zip Codes
zip_codes = GeoDataFrame.from_file(
    'C:/Users/ghodg/Desktop/Project Data/Tax Lien/Zip Code Data/ZIP_CODE_040114/'
    'ZIP_CODE_040114.shp')
zip_codes['zip_code'] = zip_codes['ZIPCODE'].astype(int)
concat_df['zip_code'] = concat_df['zip_code'].astype(int)

counts = concat_df['zip_code'].value_counts()
counts = counts.to_frame(name='count_buildings')
counts = counts.reset_index()
counts = GeoDataFrame(
    counts.merge(zip_codes, how='left', left_on='index', right_on='zip_code'))
counts = counts.dropna()

# Plotting the map and colorbar
norm = colors.Normalize(vmin=counts.count_buildings.min(),
                        vmax=counts.count_buildings.max())
cbar = plt.cm.ScalarMappable(norm=norm, cmap='Blues')

fig, ax = plt.subplots(figsize=(10, 10))
counts.plot(column='count_buildings',
            cmap='Blues',
            legend=False,
            alpha=1,
            linewidth=0.5,
            edgecolor='black',
            ax=ax)
ax_cbar = fig.colorbar(cbar, ax=ax, fraction=0.046, pad=0.04)