示例#1
0
def prune_columns(gds: GeoDataFrame):
    """Remove unneeded columns."""

    columns_remove = gds.keys().difference(['osmid', 'geometry', 'main_tag'])
    if len(columns_remove) == 0:
        return
    gds.drop(columns_remove, inplace=True)
def building_density_per_block(bldgs: gpd.GeoDataFrame,
                               blocks: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    '''
    Adds a column to blocks dataframe which contains the total
    building area and the building density
    '''

    assert 'block_id' in bldgs.columns, "ERROR: bldgs dataframe does not have block_id"

    bldgs['bldg_area'] = bldgs.area
    bldgs['bldg_count'] = 1
    bldg_area_by_block = bldgs[['block_id', 'bldg_area',
                                'bldg_count']].groupby('block_id').sum()
    bldg_area_by_block.reset_index(inplace=True)

    for c in ['bldg_area', 'bldg_count']:
        if c in blocks.columns:
            blocks.drop(columns=[c], inplace=True)
    blocks = blocks.merge(bldg_area_by_block, how='left', on='block_id')
    blocks['block_area'] = blocks.area
    blocks['bldg_density'] = blocks['bldg_area'] / blocks['block_area']

    blocks.fillna(value=0.0, inplace=True)

    return blocks
示例#3
0
def from_lines(
    gdf: gpd.GeoDataFrame,
    attrs: List[str] = [],
    tolerance: float = .001,
) -> nx.DiGraph:
    if attrs:
        gdf.drop(
            [
                col for col in gdf.columns
                if col not in attrs and col != 'geometry'
            ],
            inplace=True,
            axis=1,
        )
    gdf['_source'] = gdf.geometry.map(lambda geom: geom.coords[0])
    gdf['_target'] = gdf.geometry.map(lambda geom: geom.coords[-1])
    if tolerance > 0:
        rounding = int(np.ceil(-np.log10(tolerance)))

        def rounder(tup):
            return tuple(round(value, rounding) for value in tup)

        gdf['_source'] = gdf['_source'].map(rounder)
        gdf['_target'] = gdf['_target'].map(rounder)
    return nx.from_pandas_edgelist(gdf,
                                   '_source',
                                   '_target',
                                   edge_attr=True,
                                   create_using=nx.DiGraph)
示例#4
0
def get_pair_footprints(pair_ids: List, plot=True, save_path=None):
    """
    Plot or save overlapping areas between pairs of NAC images

    :param save_path: Path to output a vector file of the geometries, or None for no output. File based on extension.
    :param plot: Create a figure
    :param pair_ids: Pair ids, a list given like ['M106761561LExxM1101080055RE', 'M1096364254RExxM1142334242LE']
    :return: GeoDataFrame containing the pair footprints
    """
    pairs = [pair_id.split('xx') for pair_id in pair_ids]
    df = DataFrame(pairs, index=pair_ids, columns=['prod_id_0', 'prod_id_1'])
    df = df.applymap(get_geometry_from_ODE).applymap(wkt.loads)
    df['intersection'] = df.apply(lambda a: a[0].intersection(a[1]),
                                  axis='columns')
    gdf = GeoDataFrame(df, geometry='intersection')
    gdf['pair_ids'] = gdf.index.values
    if save_path:
        if save_path.endswith('.json'):
            save_driver = 'GeoJSON'
        gdf.drop(['prod_id_0', 'prod_id_1'],
                 axis='columns').to_file(save_path, driver=save_driver)
    if plot:
        gdf.geometry = gdf.geometry.boundary
        intersection_plot = gdf.plot(column='pair_ids',
                                     legend=True,
                                     legend_kwds={
                                         'loc': 'center left',
                                         'bbox_to_anchor': (1, 0.5)
                                     })
        pyplot.xlabel('Longitude, degrees E')
        pyplot.ylabel('Latitude, degrees N')
    return gdf
示例#5
0
def paginate(
    pop_by_plot: gpd.GeoDataFrame,
    order: Sequence,
    page_distribution: Distribution,
    page_col: str,
    plot_number_col: str,
    district_number_col: str,
    n_plots: int = None,
):
    if not n_plots:
        n_plots = len(pop_by_plot.index)

    if order:
        if n_plots % len(order) != 0:
            raise ValueError('orders and plots do not match')

        pop_by_plot['order'] = order
        pop_by_plot.sort_values(by='district order'.split(), inplace=True)
        pop_by_plot.drop(columns='order', inplace=True)

    pages = _get_simulated_plots_by_page(page_distribution, n_plots)
    pop_by_page = _get_simulated_pop_by_page(
        pop_by_plot,
        pages,
        page_col=page_col,
        plot_number_col=plot_number_col,
        district_number_col=district_number_col,
    )

    return pop_by_page
示例#6
0
    def remove_erroneous_pv_polygons(
        self,
        raw_PV_installations_on_rooftop: gpd.GeoDataFrame = None
    ) -> gpd.GeoDataFrame:
        """
        Removes PV polygons whose aggregated intersected area is larger than their original raw area

        Parameters
        ----------
        raw_PV_installations_on_rooftop: GeoPandas.GeoDataFrame
            GeoDataFrame which must contain the columns "area_inter", "raw_area", and "identifier"
        Returns
        -------
        GeoPandas.GeoDataFrame
            Input GeoDataFrame where erroneous PV polygons have been removed
        """

        # Compute share of raw area that the intersected pv polygon covers
        raw_PV_installations_on_rooftop["percentage_intersect"] = (
            raw_PV_installations_on_rooftop["area_inter"] /
            raw_PV_installations_on_rooftop["raw_area"])

        # Group intersection by polygon identifier and sum percentage
        group_intersection_id = raw_PV_installations_on_rooftop.groupby(
            "identifier").agg({
                "area_inter": "sum",
                "Street": "first",
                "Street_Address": "first",
                "raw_area": "first",
                "City": "first",
                "PostalCode": "first",
                "percentage_intersect": "sum",
            })

        # Find erroneous polygons whose area after intersection is larger than their original (raw) area
        polygone = group_intersection_id[
            group_intersection_id["percentage_intersect"] > 1.1].index.tolist(
            )

        # Filter out erroneous polygons identified above and all their respective sub-parts
        raw_PV_installations_on_rooftop = raw_PV_installations_on_rooftop.drop(
            raw_PV_installations_on_rooftop.index[
                (raw_PV_installations_on_rooftop["identifier"].isin(polygone))
                &
                (raw_PV_installations_on_rooftop["percentage_intersect"] < 1)])

        # Drop duplicate identifiers for erroneous polygons
        raw_PV_installations_on_rooftop = raw_PV_installations_on_rooftop.drop(
            raw_PV_installations_on_rooftop.index[
                (raw_PV_installations_on_rooftop["identifier"].isin(polygone))
                &
                (raw_PV_installations_on_rooftop["identifier"].duplicated())])

        return raw_PV_installations_on_rooftop
示例#7
0
def delete_small_polygons(polygons: geopandas.GeoDataFrame, area=1e-6):
    """deletion is in place"""
    todrop = []
    for id, p in enumerate(polygons.geometry):
        if p.area < 1e-6:
    #         print("null area")
            todrop.append(id)


    polygons.drop(todrop, inplace=True)
    return len(todrop)
示例#8
0
文件: core.py 项目: amauryval/OsmGT
    def _clean_attributes(self,
                          input_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        for col_name in input_gdf.columns:
            if col_name in self.__USELESS_COLUMNS:
                input_gdf.drop(columns=[col_name], inplace=True)

        if self._ID_DEFAULT_FIELD not in input_gdf.columns:
            input_gdf.loc[:,
                          self._ID_DEFAULT_FIELD] = input_gdf.index.astype(str)

        return input_gdf
示例#9
0
def prepare_parcels(bldgs: gpd.GeoDataFrame, blocks: gpd.GeoDataFrame,
                    parcels: gpd.GeoDataFrame) -> pd.DataFrame:
    '''
    For a single GADM, this script (1) creates the PlanarGraph associated
    with each respective parcel and (2) maps all buildings to their corresponding
    parcel. The buildings are converted to centroids and then to Node types so
    they can just be added to the PlanarGraph
    '''

    # Convert buildings to centroids
    bldgs['centroids'] = bldgs['geometry'].centroid
    bldgs.set_geometry('centroids', inplace=True)

    # We want to map each building to a given block to then map the buildings to a parcel
    bldgs = gpd.sjoin(bldgs, blocks, how='left', op='within')
    bldgs.drop(columns=['index_right'], inplace=True)

    # Now, join the parcels with the buildings
    parcels = parcels.merge(bldgs[['block_id', 'centroids']],
                            how='left',
                            on='block_id')
    parcels.rename(columns={
        'geometry': 'parcel_geometry',
        'centroids': 'buildings'
    },
                   inplace=True)

    # Now collapse on the block and clean
    parcels = parcels.groupby('block_id').agg(list)
    parcels['parcel_geometry'] = parcels['parcel_geometry'].apply(
        lambda x: x[0])
    parcels['buildings'] = parcels['buildings'].apply(lambda x: []
                                                      if x == [np.nan] else x)

    # Checks
    assert blocks.shape[0] == parcels.shape[
        0]  # We should maintain block count
    parcels['buildings_count'] = parcels['buildings'].apply(lambda x: len(x))
    #assert parcels['buildings_count'].sum() == bldgs.shape[0]  # We should maintain bldgs count

    parcels.reset_index(inplace=True)

    # Now, create the graph for each parcel
    parcels['planar_graph'] = parcels['parcel_geometry'].apply(
        PlanarGraph.multilinestring_to_planar_graph)

    # And convert the buildings from shapely.Points -> topology.Nodes
    parcels['buildings'] = parcels['buildings'].apply(
        lambda x: [point_to_node(p) for p in x])

    return parcels
示例#10
0
def get_flat_priority(pou_src, out_file):
    flat = GeoDataFrame(columns=['id', 'DT', 'geo', 'obj'])
    pou = read_file(pou_src)
    pou = pou[(pou['PURPOSE'] == 'IRRIGATION') & (pou['WRSTATUS'] == 'ACTIVE')]
    pou['ENFRPRIDAT'] = [to_datetime(x) for x in pou['ENFRPRIDAT']]
    pou = pou.rename(columns={'geometry': 'geo', 'ENFRPRIDAT': 'dt'})
    pou = pou.sort_values(by='dt')
    pou = pou[['dt', 'geo', 'OBJECTID']]
    pou = pou.reset_index(drop=True)
    good_rows = [
        i for i, x in enumerate(pou['dt']) if isinstance(x, Timestamp)
    ]
    pou = pou.loc[good_rows]
    pou = pou.astype({'OBJECTID': int})

    first, covered = True, None
    ct = 0
    for i, (dt, g, obj) in tqdm(pou.iterrows(), total=pou.shape[0]):
        if first:
            flat.loc[ct] = [ct, dt, g, obj]
            ct += 1
            first = False
        else:
            equal = [
                i for i, x in enumerate(flat['geo']) if g.almost_equals(x)
            ]
            if any(equal):
                continue
            inter = [i for i, x in enumerate(flat['geo']) if g.intersects(x)]
            if not any(inter):
                flat.loc[ct] = [ct, dt, g, obj]
                ct += 1
            else:
                for ix in inter:
                    g = g.difference(flat.loc[ix]['geo'])
                if g.area > 0:
                    flat.loc[ct] = [ct, dt, g, obj]
                    ct += 1

    good_rows = [
        i for i, x in enumerate(flat['geo'])
        if isinstance(x, Polygon) or isinstance(x, MultiPolygon)
    ]
    flat = flat.loc[good_rows]
    geo = flat['geo']
    flat['DT'] = [str(x)[:10] for x in flat['DT']]
    flat['dt_int'] = [int(''.join(x.split('-'))) for x in flat['DT']]
    flat.drop(columns=['geo'], inplace=True)
    gdf = GeoDataFrame(flat, geometry=geo, crs='EPSG:32100')
    gdf.to_file(out_file)
示例#11
0
def write_outputs(
    cfg: dict,
    bin_gdf: GeoDataFrame,
    eq_gdf: GeoDataFrame,
    write_index: bool = False,
) -> None:
    """
    Writes output GIS files and plots (i.e., maps or MFD plots.)

    All of the options for what to write are specified in the `cfg`.

    :param cfg:
        Configuration for the evaluations, such as that parsed from the YAML
        config file.

    :param bin_gdf:
        :class:`GeoDataFrame` with the spatial bins for testing

    :param eq_gdf:
        :class:`GeoDataFrame` with the observed earthquake catalog.
    """

    logger.info("writing outputs")

    if "plots" in cfg["output"].keys():
        write_mfd_plots_to_gdf(bin_gdf, **cfg["output"]["plots"]["kwargs"])

    if "map_epsg" in cfg["config"]:
        out_gdf = out_gdf.to_crs(cfg["config"]["map_epsg"])

    if "bin_gdf" in cfg["output"].keys():
        outfile = cfg["output"]["bin_gdf"]["file"]
        out_format = outfile.split(".")[-1]
        bin_gdf["bin_index"] = bin_gdf.index
        bin_gdf.index = np.arange(len(bin_gdf))

        if out_format == "csv":
            write_bin_gdf_to_csv(outfile, bin_gdf)

        else:
            try:

                bin_gdf.drop("SpacemagBin", axis=1).to_file(
                    outfile,
                    driver=OUTPUT_FILE_MAP[out_format],
                    index=write_index,
                )
            except KeyError:
                raise Exception(f"No writer for {out_format} format")
def add_block_id(bldg_pop: gpd.GeoDataFrame,
                 block: Union[gpd.GeoDataFrame, str],
                 ) -> gpd.GeoDataFrame:
    """
    add_block_id()
    Step 2: some bldg files don't have the block_id so that may need 
    to be joined on
    NOTE: block can be a path to the block GeoDataFrame, or the already loaded GeoDataFrame
    Joins block_id column on to the builing geodf.
    """
    block = flex_load(block)
    bldg_pop = utils.join_block_building(block, bldg_pop)
    if 'index_right' in bldg_pop.columns:
        bldg_pop.drop(columns=['index_right'], inplace=True)
    return bldg_pop
def add_block_bldg_area(bldg_pop: gpd.GeoDataFrame,
                        block: gpd.GeoDataFrame,
                        ) -> gpd.GeoDataFrame:
    """
    Calculates the number of buildings in a block and adds that to the bldg_pop geodf
    """
    bldg_pop = bldg_pop.to_crs("EPSG:3395")
    bldg_pop['bldg_area'] = (bldg_pop.area * 1e-6)
    block_bldg_area = bldg_pop[['block_id', 'bldg_area']].groupby('block_id').sum().reset_index()
    block_bldg_area.rename(columns={'bldg_area': 'block_bldg_area'}, inplace=True)

    bldg_pop = bldg_pop.merge(block_bldg_area, how='left', on='block_id')
    bldg_pop = bldg_pop.to_crs("EPSG:4326")
    bldg_pop.drop(columns=["bldg_area"], inplace=True)
    return bldg_pop
示例#14
0
def convert_GeoPandas_to_Bokeh_format(
    gdf : gpd.GeoDataFrame
) -> ColumnDataSource :
    """
    Function to convert a GeoPandas GeoDataFrame to a Bokeh
    ColumnDataSource object.
    
    :param: (GeoDataFrame) gdf: GeoPandas GeoDataFrame with polygon(s) under
                                the column name 'geometry.'
                                
    :return: ColumnDataSource for Bokeh.
    """
    gdf_new = gdf.drop('geometry', axis=1).copy()
    gdf_new['x'] = gdf.apply(getGeometryCoords, 
                             geom='geometry', 
                             coord_type='x', 
                             shape_type='polygon', 
                             axis=1)
    
    gdf_new['y'] = gdf.apply(getGeometryCoords, 
                             geom='geometry', 
                             coord_type='y', 
                             shape_type='polygon', 
                             axis=1)
    
    return ColumnDataSource(gdf_new)
示例#15
0
 def __geometrize_gdf(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     try:
         geometry = gdf['geometry'].map(shapely.wkt.loads)
         geometrized = gdf.drop(columns='geometry')
         return gpd.GeoDataFrame(geometrized, geometry=geometry)
     except:
         return gdf
示例#16
0
def dissolve(
    gdf: gpd.GeoDataFrame,
    by: Iterable[str],
    func: Union[Callable, str, list, dict],
    how: Union[Literal["union", "first"], Callable[[gpd.GeoSeries],
                                                   BaseGeometry]] = "union",
) -> gpd.GeoDataFrame:
    """
    Dissolve layer by aggregating features based on common attributes.

    Args:
        gdf: GeoDataFrame with non-empty (Multi)Polygon geometries.
        by: Names of columns to group features by.
        func: Aggregation function for data columns (see :meth:`pd.DataFrame.groupby`).
        how: Aggregation function for geometry column.
            Either 'union' (:meth:`gpd.GeoSeries.unary_union`),
            'first' (first geometry in group),
            or a function aggregating multiple geometries into one.

    Returns:
        GeoDataFrame with dissolved geometry and data columns,
        and grouping columns set as the index.

    """
    check_gdf(gdf)
    merges = {"union": lambda x: x.unary_union, "first": lambda x: x.iloc[0]}
    data = gdf.drop(columns=gdf.geometry.name).groupby(by=by).aggregate(func)
    geometry = gdf.groupby(by=by,
                           group_keys=False)[gdf.geometry.name].aggregate(
                               merges.get(how, how))
    return gpd.GeoDataFrame(geometry, geometry=gdf.geometry.name,
                            crs=gdf.crs).join(data)
示例#17
0
def get_aggregate_locations_by_district(
        population_data: pd.DataFrame,
        location_data: gpd.GeoDataFrame,
) -> gpd.GeoDataFrame:

    len_pop = len(population_data.index)
    len_loc = len(location_data.index)

    if len_loc == 0 or len_pop == 0:
        return gpd.GeoDataFrame()

    elif len_loc < len_pop:
        sample_index = interval_sample(
            population_data.index,
            len_loc,
        )
        new_geom = gpd.GeoDataFrame(
            {'geometry': location_data.geometry},
            index=sample_index,
        )

        try:
            new_geom = new_geom.align(
                population_data,
                join='outer',
                method='pad',
            )
        except NotImplementedError:
            return gpd.GeoDataFrame()

        location_data = new_geom

    elif len_pop < len_loc:
        sample_index = interval_sample(
            location_data.index,
            len_pop,
        )
        location_data = location_data.loc[sample_index, ]

    location_data = location_data.reset_index()
    location_data = location_data.drop(
        columns=['level_0',  'index'],
        errors='ignore',
    )
    population_data = population_data.reset_index()
    population_data = population_data.drop(
        columns=['plot_number', 'district'],
        errors='ignore',
    )

    geodata = pd.concat(
        [location_data, population_data],
        axis=1,
    )
    geodata = geodata.drop(
        columns=['index', 'Unnamed: 0'],
        errors='ignore',
    )

    return geodata
示例#18
0
def save_selection(df: gpd.GeoDataFrame, name: str, project_path: str) -> None:
    '''
    Saves selection to directory
    '''
    out_dir = Path(project_path) / 'exported'
    out_dir.mkdir(parents=True, exist_ok=True)
    v = 0
    out_path = out_dir / "{}.v{}.geojson".format(name, v)
    while out_path.is_file():
        v += 1
        out_path = out_dir / "{}.v{}.geojson".format(name, v)
    if 'x' in df.columns:
        df = df.drop(columns=['x'])
    if 'y' in df.columns:
        df = df.drop(columns=['y'])
    df.to_file(str(out_path), driver='GeoJSON')
    print('Saved to: {}'.format(out_path.resolve()))
示例#19
0
def clip_bands_to_polygon(bands, out_bands, mask):
    with fiona.open(mask, 'r') as src:
        feat = [f for f in src]
    bounds = shape(feat[0]['geometry'])
    df = read_csv(bands)
    gdf = GeoDataFrame(df, geometry=points_from_xy(y=df['LAT_GCS'], x=df['Lon_GCS']))
    gdf = clip(gdf, mask=bounds)
    df = DataFrame(gdf.drop(columns='geometry'))
    df.to_csv(out_bands)
def map_choropleth(
    gdf: geopandas.GeoDataFrame,
    color_field,
    *,
    highlight_polygon: str = "",
    min_color: str = "#F4D2D2",
    max_color: str = "#CC0000",
    color_steps: int = 5,
    legend_title: str = None,
) -> alt.Chart:
    """
    Creates a choropleth map of covid data from a geopandas dataframe.

    Args:
        gdf (geopandas.GeoDataFrame): geodataframe of covid data.
        color_field (str): Column from gdf that will be used for the choropleth map.
        highlight_polygon (str, optional): Creates a border around a selected polygon to emphasise it.
        min_color (str, optional): HSL, RGB, HEX, WEB of min color of choropleth range. Defaults to
            "#F4D2D2"
        max_color (str, optional): HSL, RGB, HEX, WEB of  max color of choropleth range. Defaults to "#CC0000"
        color_steps (int, optional): Number of steps between min and max for final choropleth color range.
            Defaults to 5.
        legend_title (str, optional): Title for legend. Defaults to color_field value.

    Returns:
        Altair chart instance.
    """

    gdf = gdf.drop(
        ["id"],
        axis=1)  # dropping ID col to avoid warning message from gpdvega/altair
    data = convert_gfp_to_alt(gdf)
    color_range = list(
        Color(min_color).range_to(Color(max_color), color_steps))
    color_range = [x.hex for x in color_range]
    legend_title = color_field if not legend_title else legend_title

    chart = (
        alt.Chart(data).mark_geoshape(
            strokeWidth=1,
            stroke="#fff",
            # width=300,height=200
        ).project().encode(color=alt.Color(
            f"properties.{color_field}:Q",
            scale=alt.Scale(
                type="quantize",
                nice=True,
                range=color_range,
            ),
            legend=alt.Legend(orient="top", title=legend_title,
                              titleLimit=200),
        )).properties(width=600, height=460))

    if highlight_polygon:
        gdf_highlight = gdf[gdf["NAME"].str.contains(highlight_polygon,
                                                     case=False)]
示例#21
0
def _import_gdf(
    gdf: GeoDataFrame, sql_tablename: str, geom_type: str, uri: str = DEFAULT_DB_URI
) -> None:
    """
    Import a geopandas GeoDataFrame to SQL
    """

    gdf.columns = [x.lower() for x in gdf.columns]
    epsg_code = int(str(gdf.crs).split(":")[1])

    gdf["geom"] = gdf["geometry"].apply(lambda x: WKTElement(x.wkt, srid=epsg_code))
    gdf.drop("geometry", 1, inplace=True)

    engine = sqlalchemy.create_engine(uri)
    gdf.to_sql(
        sql_tablename,
        engine,
        dtype={"geom": Geometry(geom_type.upper(), srid=epsg_code)},
        if_exists="replace",
    )
    engine.dispose()
示例#22
0
    def __init__(
            self,
            data: gpd.GeoDataFrame,
            variable: str,
            kernel: Kernel,
            cell_size,
            polygon=None,
    ):
        if not isinstance(data, gpd.GeoDataFrame):
            raise TypeError('data should be a geopandas GeoDataFrame')

        if 'geometry' not in data.columns:
            data['geometry'] = data[data._geometry_column_name]
            data = data.drop([data._geometry_column_name], axis=1)
            data = data.set_geometry('geometry')

        self.polygon = polygon

        data = data.rename(columns={
            variable: 'variable',
        })

        self.cell_size = cell_size
        data.points = data.geometry.centroid
        convex = MultiPoint(data.geometry).convex_hull

        if not self.polygon:
            self.polygon = convex.buffer(kernel.bandwidth)

        xmin, ymin, xmax, ymax = self.bbox = self.polygon.bounds
        x = np.arange(xmin, xmax, self.cell_size)
        y = np.arange(ymin, ymax, self.cell_size)
        y = np.flipud(y)
        x, y = np.meshgrid(x, y)
        self.shape = x.shape
        flat = x.flatten()[:, np.newaxis], y.flatten()[:, np.newaxis]
        df = pd.DataFrame(np.hstack(flat), columns=['x', 'y'])
        outside = [row.Index for row in df.itertuples() if not self.polygon.contains(Point(row.x, row.y))]
        self.df = df.drop(outside)

        self.kernel = kernel
        x1, x2 = np.meshgrid(self.df['x'], data.geometry.x)
        y1, y2 = np.meshgrid(self.df['y'], data.geometry.y)
        self.d = np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
        self.w = self.kernel(self.d)
        vals = data['variable'].values.reshape(len(data), 1)
        self.df['density'] = np.sum(self.w * vals, axis=0)
        zeros = pd.Series(np.zeros(len(outside)), index=outside)
        grid = self.df['density'].append(zeros, verify_integrity=True).sort_index()
        self.grid = grid.values.reshape(self.shape)
示例#23
0
def to_geojson(gdf: gpd.GeoDataFrame, file_name, crs="EPSG:4326"):
    r"""
    Write GeoDataFrame to GeoJson file

    Write GeoDataFrame to a GeoJson file with the default coordinate reference
    system (crs) "EPSG:4326". If a GeoDataFrame has multiple columns containing
    geometries, only the column `GeoDataFrame.geometry.name` is kept.

    Parameters
    ----------
    crs : str, defaults to "EPSG:4326"
        The coordinate reference system (crs) of the output GeoJson file. The
        default value is "EPSG:4326".
 
    See Also
    --------
    ~stplanpy.geo.read_shp
    
    Examples
    --------
    The example data file, "`tl_2011_06_taz10.zip`_", can be downloaded from github.
 
    .. code-block:: python

        from stplanpy import geo

        # Read taz data from zip file
        taz = geo.read_shp("tl_2011_06_taz10.zip")

        # Write to file
        taz.to_geojson("taz.GeoJson")

    .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip
    """
    gdf.drop(gdf.loc[:, (gdf.dtypes == "geometry") & 
        (gdf.columns != gdf.geometry.name)].columns, 
        axis = 1).to_crs(crs).to_file(file_name, driver="GeoJSON")
def load_crime_stats(population_group=None, crime_list=None, provence=None):
    # lower provers
    if provence is not None:
        provence = provence.lower()
    # get data set dir
    data_path = get_work_path()
    # load an clean police
    police_stats = clean_police_stats(
        data_path.joinpath('Police_Statistics___2005_-_2017.csv'))
    if crime_list is not None:
        police_stats = police_stats[police_stats['Crime'].isin(crime_list)]
    if provence is not None:
        police_stats = police_stats.query(f"Province == '{provence}'")
    # population shape file
    pop_stats = clean_popluation_stats(
        data_path.joinpath(
            'population/geo_export_3ec3ac74-ddff-4220-8007-b9b5643f79af.shp'))
    base_group = ['sal_code_i', 'pr_name', 'sp_name', 'geometry']
    if population_group is not None:
        # filter out columns
        pop_stats = pop_stats[pop_groups[population_group] + base_group]
    if provence is not None:
        pop_stats = pop_stats.query(f"pr_name == '{provence}'")
    # shape id to weights
    precinct = clean_area_2_precint(
        data_path.joinpath('Precinct_to_small_area_weights.csv'))
    # munge data
    df = merge(precinct,
               pop_stats,
               left_on='small_area',
               right_on='sal_code_i')
    df = merge(df, police_stats, left_on='precinct', right_on='Police Station')
    # calclate crime per shape file as proportion of precint weight
    df['total_crime'] = df.weight * df.Incidents
    # keep as geo-dataframe
    df = GeoDataFrame(df, crs=pop_stats.crs)
    # clean data frame
    df = df.drop([
        'sal_code_i', 'pr_name', 'sp_name', 'Police Station', 'Incidents',
        'weight'
    ],
                 axis=1)
    # agg precinct back into shapes
    temp_df = df.groupby(['small_area', 'Year',
                          'Crime'])[['total_crime']].sum().round()
    df = df.drop_duplicates(subset=['small_area', 'Year', 'Crime']).drop(
        ['total_crime'], axis=1)
    df = merge(df, temp_df, on=['small_area', 'Year', 'Crime'])
    return df
示例#25
0
def geocode_dataframe(address_dataframe, address_column):
    df = address_dataframe.copy()
    df['geocode'] = df[address_column].apply(geolocator.geocode)

    df['geometry'] = df['geocode'].apply(
        lambda x: Point(x.longitude, x.latitude))  # create a geometry column

    gdf = GeoDataFrame(
        df,
        geometry='geometry',
    )

    gdf.crs = {'init': 'epsg:4326'}

    return gdf.drop(columns=['geocode'])
示例#26
0
def geocode_dataframe(address_dataframe, address_column):
    df = address_dataframe.copy()

    # geocode address column by apply-ing geolocator.geocode
    df['geocode'] = df[address_column].apply(geolocator.geocode)

    # create a geometry column
    df['geometry'] = df['geocode'].apply(
        lambda x: Point(x.longitude, x.latitude))

    # create a geodataframe called gdf from df
    # define the .crs =  {'init': 'epsg:4326'}
    gdf = GeoDataFrame(df, geometry='geometry', crs='epsg:4326')

    # drop the geocode column from the gdf
    gdf = gdf.drop(['geocode'], axis=1)

    return gdf
示例#27
0
def surface_dissim(
        data: gpd.GeoDataFrame,
        group_1_pop_var: str,
        group_2_pop_var: str,
        w: Kernel = None,
):
    if not isinstance(data, gpd.GeoDataFrame):
        raise TypeError('data should be a geopandas GeoDataFrame')

    if 'geometry' not in data.columns:
        data['geometry'] = data[data._geometry_column_name]
        data = data.drop([data._geometry_column_name], axis=1)
        data = data.set_geometry('geometry')

    data = data.rename(columns={
        group_1_pop_var: 'group_1_pop_var',
        group_2_pop_var: 'group_2_pop_var',
    })
    sum_1 = data['group_1_pop_var'].sum()
    data['group_1_pop_var_norm'] = data['group_1_pop_var'] / sum_1
    sum_2 = data['group_2_pop_var'].sum()
    data['group_2_pop_var_norm'] = data['group_2_pop_var'] / sum_2

    if not w:
        points = [(p.x, p.y) for p in data.centroid]
        w = Kernel(points)

    w_, _ = w.full()

    density_1 = w_ * data['group_1_pop_var_norm'].values
    density_2 = w_ * data['group_2_pop_var_norm'].values
    densities = np.vstack([density_1.sum(axis=1), density_2.sum(axis=1)])
    v_union = densities.max(axis=0).sum()
    v_intersect = densities.min(axis=0).sum()

    s = 1 - v_intersect / v_union

    core_data = data[['group_1_pop_var', 'group_2_pop_var', 'geometry']]

    return s, core_data
示例#28
0
def get_all_connections(roads: gpd.GeoDataFrame, points: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    """
    find long roads and split them and merge them

    Parameters
    ----------
    roads
    points

    Returns
    -------

    """
    long_roads = find_long_roads(roads=roads, points=points)
    smaller_parts = split_long_roads(long_roads, roads=roads, points=points)

    small_roads = roads.drop(axis=0, index=long_roads.keys())
    gdf_long_roads_split = gpd.GeoDataFrame(crs=roads.crs, geometry=smaller_parts)
    gdf_small_roads = gpd.GeoDataFrame(crs=roads.crs, geometry=small_roads.geometry)
    connections = pd.concat([gdf_small_roads, gdf_long_roads_split], axis=0, ignore_index=True, sort=False)
    print('prepared all connections')
    return connections
示例#29
0
def fill_hex_grid(gdf: gpd.GeoDataFrame,
                  geom_column: str = "geometry") -> gpd.GeoDataFrame:
    bbox = gdf.total_bounds
    # Pandas somehow mangles Geopandas geometry column types so that the types
    # become mixed after concatenation and may cause TypeErrors, i.e. some
    # Shapely geometries may be cast as strings in the process. We have to
    # concatenate regular dataframes instead and reconstruct a geodataframe
    # from the hex indices afterwards. Utterly stupid.
    df = gdf.drop(columns=['geometry'])

    bbox_polygon = box(*bbox)
    hex_column = next((col for col in df.columns if col.startswith("hex")),
                      False)
    if not hex_column:
        raise AssertionError(
            "Cannot calculate clusters, hex column not found.")
    resolution = int(hex_column.replace("hex", ""))
    # H3 polyfill needs geojson-like stuff. geo_json_conformant switches coordinate order
    hexes_in_bbox = h3.polyfill(mapping(bbox_polygon),
                                resolution,
                                geo_json_conformant=True)
    # Add only missing hexes here
    missing_hexes = set(hexes_in_bbox).difference(df[hex_column])
    missing_df = pd.DataFrame(list(missing_hexes),
                              columns=[hex_column]).set_index(hex_column,
                                                              drop=False)
    columns_to_add = df.columns.difference(missing_df.columns)
    for column in columns_to_add:
        # Just add zeroes for missing index values
        missing_df.insert(0, column, 0)
    combined_df = pd.concat((df, missing_df))

    # Add centroid geometries and reconstruct the geodataframe
    centroid_lat_lon = [h3.h3_to_geo(hex) for hex in combined_df[hex_column]]
    centroids = [Point(geom[1], geom[0]) for geom in centroid_lat_lon]
    combined_gdf = gpd.GeoDataFrame(combined_df)
    combined_gdf = combined_gdf.set_geometry(centroids)
    return combined_gdf
示例#30
0
    def _remove_overlapping(
            df: geopandas.GeoDataFrame,
            overlapping_threshold: float) -> geopandas.GeoDataFrame:

        rows_to_remove: Set[int] = set()

        org_len = len(df)
        df = df[df.geometry.is_valid]
        print(f"Removed {org_len - len(df)} invalid geometries")

        for idx_1, row_1 in tqdm(df.iterrows(), total=len(df)):
            for idx_2, row_2 in df.iloc[idx_1 + 1:].iterrows():
                if row_1.geometry.intersects(row_2.geometry):
                    max_intersection_area = overlapping_threshold * min(
                        row_1.geometry.area, row_2.geometry.area)
                    if row_1.geometry.intersection(
                            row_2.geometry).area >= max_intersection_area:
                        rows_to_remove.add(idx_1)
                        rows_to_remove.add(idx_2)

        cleaned_df = df.drop(rows_to_remove)

        print(f"New df has len {len(cleaned_df)}, from {len(df)}")
        return cleaned_df
示例#31
0
def assert_geodataframe_equal(left, right,
                              check_dtype=True,
                              check_index_type='equiv',
                              check_column_type='equiv',
                              check_frame_type=True,
                              check_like=False,
                              check_less_precise=False,
                              check_geom_type=False,
                              check_crs=True):
    """
    Check that two GeoDataFrames are equal/

    Parameters
    ----------
    left, right : two GeoDataFrames
    check_dtype : bool, default True
        Whether to check the DataFrame dtype is identical.
    check_index_type, check_column_type : bool, default 'equiv'
        Check that index types are equal.
    check_frame_type : bool, default True
        Check that both are same type (*and* are GeoDataFrames). If False,
        will attempt to convert both into GeoDataFrame.
    check_like : bool, default False
        If true, ignore the order of rows & columns
    check_less_precise : bool, default False
        If True, use geom_almost_equals. if False, use geom_equals.
    check_geom_type : bool, default False
        If True, check that all the geom types are equal.
    check_crs: bool, default True
        If `check_frame_type` is True, then also check that the
        crs matches.
    """
    try:
        # added from pandas 0.20
        from pandas.testing import assert_frame_equal, assert_index_equal
    except ImportError:
        from pandas.util.testing import assert_frame_equal, assert_index_equal

    # instance validation
    if check_frame_type:
        assert isinstance(left, GeoDataFrame)
        assert isinstance(left, type(right))

        if check_crs:
            # no crs can be either None or {}
            if not left.crs and not right.crs:
                pass
            else:
                assert left.crs == right.crs
    else:
        if not isinstance(left, GeoDataFrame):
            left = GeoDataFrame(left)
        if not isinstance(right, GeoDataFrame):
            right = GeoDataFrame(right)

    # shape comparison
    assert left.shape == right.shape, (
        'GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n'
        'Left columns: {lcols!r}, right columns: {rcols!r}'.format(
            lshape=left.shape, rshape=right.shape,
            lcols=left.columns, rcols=right.columns))

    if check_like:
        left, right = left.reindex_like(right), right

    # column comparison
    assert_index_equal(left.columns, right.columns, exact=check_column_type,
                       obj='GeoDataFrame.columns')

    # geometry comparison
    assert_geoseries_equal(
        left.geometry, right.geometry, check_dtype=check_dtype,
        check_less_precise=check_less_precise,
        check_geom_type=check_geom_type, check_crs=False)

    # drop geometries and check remaining columns
    left2 = left.drop([left._geometry_column_name], axis=1)
    right2 = right.drop([right._geometry_column_name], axis=1)
    assert_frame_equal(left2, right2, check_dtype=check_dtype,
                       check_index_type=check_index_type,
                       check_column_type=check_column_type,
                       obj='GeoDataFrame')
示例#32
0
class TestSpatialJoinNYBB(unittest.TestCase):

    def setUp(self):
        nybb_filename, nybb_zip_path = download_nybb()
        self.polydf = read_file(nybb_zip_path, vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.crs = self.polydf.crs
        N = 20
        b = [int(x) for x in self.polydf.total_bounds]
        self.pointdf = GeoDataFrame([
            {'geometry' : Point(x, y), 'pointattr1': x + y, 'pointattr2': x - y}
            for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                            range(b[1], b[3], int((b[3]-b[1])/N)))], crs=self.crs)

    def tearDown(self):
        shutil.rmtree(self.tempdir)

    def test_geometry_name(self):
        # test sjoin is working with other geometry name
        polydf_original_geom_name = self.polydf.geometry.name
        self.polydf = (self.polydf.rename(columns={'geometry': 'new_geom'})
                                  .set_geometry('new_geom'))
        self.assertNotEqual(polydf_original_geom_name, self.polydf.geometry.name)
        res = sjoin(self.polydf, self.pointdf, how="left")
        self.assertEqual(self.polydf.geometry.name, res.geometry.name)

    def test_sjoin_left(self):
        df = sjoin(self.pointdf, self.polydf, how='left')
        self.assertEquals(df.shape, (21,8))
        for i, row in df.iterrows():
            self.assertEquals(row.geometry.type, 'Point')
        self.assertTrue('pointattr1' in df.columns)
        self.assertTrue('BoroCode' in df.columns)

    def test_sjoin_right(self):
        # the inverse of left
        df = sjoin(self.pointdf, self.polydf, how="right")
        df2 = sjoin(self.polydf, self.pointdf, how="left")
        self.assertEquals(df.shape, (12, 8))
        self.assertEquals(df.shape, df2.shape)
        for i, row in df.iterrows():
            self.assertEquals(row.geometry.type, 'MultiPolygon')
        for i, row in df2.iterrows():
            self.assertEquals(row.geometry.type, 'MultiPolygon')

    def test_sjoin_inner(self):
        df = sjoin(self.pointdf, self.polydf, how="inner")
        self.assertEquals(df.shape, (11, 8))

    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        self.assertEquals(df.shape, (21,8))
        self.assertEquals(df.ix[1]['BoroName'], 'Staten Island')

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        self.assertEquals(df.shape, (21, 8))
        self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))

    def test_sjoin_bad_op(self):
        # AttributeError: 'Point' object has no attribute 'spandex'
        self.assertRaises(ValueError, sjoin,
            self.pointdf, self.polydf, how="left", op="spandex")

    def test_sjoin_duplicate_column_name(self):
        pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'})
        df = sjoin(pointdf2, self.polydf, how="left")
        self.assertTrue('Shape_Area_left' in df.columns)
        self.assertTrue('Shape_Area_right' in df.columns)

    def test_sjoin_values(self):
        # GH190
        self.polydf.index = [1, 3, 4, 5, 6]
        df = sjoin(self.pointdf, self.polydf, how='left')
        self.assertEquals(df.shape, (21,8))
        df = sjoin(self.polydf, self.pointdf, how='left')
        self.assertEquals(df.shape, (12,8))

    @unittest.skipIf(str(pd.__version__) < LooseVersion('0.19'), pandas_0_18_problem)
    @pytest.mark.xfail
    def test_no_overlapping_geometry(self):
        # Note: these tests are for correctly returning GeoDataFrame
        # when result of the join is empty

        df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how='inner')
        df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how='left')
        df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how='right')

        # Recent Pandas development has introduced a new way of handling merges
        # this change has altered the output when no overlapping geometries
        if str(pd.__version__) > LooseVersion('0.18.1'):
            right_idxs = pd.Series(range(0,5), name='index_right',dtype='int64')
        else:
            right_idxs = pd.Series(name='index_right',dtype='int64')

        expected_inner_df = pd.concat([self.pointdf.iloc[:0],
                                       pd.Series(name='index_right', dtype='int64'),
                                       self.polydf.drop('geometry', axis = 1).iloc[:0]], axis = 1)

        expected_inner = GeoDataFrame(expected_inner_df, crs = {'init': 'epsg:4326', 'no_defs': True})

        expected_right_df = pd.concat([self.pointdf.drop('geometry', axis = 1).iloc[:0],
                                       pd.concat([pd.Series(name='index_left',dtype='int64'), right_idxs], axis=1),
                                       self.polydf], axis = 1)

        expected_right = GeoDataFrame(expected_right_df, crs = {'init': 'epsg:4326', 'no_defs': True})\
                            .set_index('index_right')

        expected_left_df = pd.concat([self.pointdf.iloc[17:],
                                      pd.Series(name='index_right', dtype='int64'),
                                      self.polydf.iloc[:0].drop('geometry', axis=1)], axis = 1)

        expected_left = GeoDataFrame(expected_left_df, crs = {'init': 'epsg:4326', 'no_defs': True})

        self.assertTrue(expected_inner.equals(df_inner))
        self.assertTrue(expected_right.equals(df_right))
        self.assertTrue(expected_left.equals(df_left))

    @unittest.skip("Not implemented")
    def test_sjoin_outer(self):
        df = sjoin(self.pointdf, self.polydf, how="outer")
        self.assertEquals(df.shape, (21,8))
示例#33
0
class TestSpatialJoinNYBB:

    def setup_method(self):
        nybb_filename = geopandas.datasets.get_path('nybb')
        self.polydf = read_file(nybb_filename)
        self.crs = self.polydf.crs
        N = 20
        b = [int(x) for x in self.polydf.total_bounds]
        self.pointdf = GeoDataFrame(
            [{'geometry': Point(x, y),
              'pointattr1': x + y, 'pointattr2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=self.crs)

    def test_geometry_name(self):
        # test sjoin is working with other geometry name
        polydf_original_geom_name = self.polydf.geometry.name
        self.polydf = (self.polydf.rename(columns={'geometry': 'new_geom'})
                                  .set_geometry('new_geom'))
        assert polydf_original_geom_name != self.polydf.geometry.name
        res = sjoin(self.polydf, self.pointdf, how="left")
        assert self.polydf.geometry.name == res.geometry.name

    def test_sjoin_left(self):
        df = sjoin(self.pointdf, self.polydf, how='left')
        assert df.shape == (21, 8)
        for i, row in df.iterrows():
            assert row.geometry.type == 'Point'
        assert 'pointattr1' in df.columns
        assert 'BoroCode' in df.columns

    def test_sjoin_right(self):
        # the inverse of left
        df = sjoin(self.pointdf, self.polydf, how="right")
        df2 = sjoin(self.polydf, self.pointdf, how="left")
        assert df.shape == (12, 8)
        assert df.shape == df2.shape
        for i, row in df.iterrows():
            assert row.geometry.type == 'MultiPolygon'
        for i, row in df2.iterrows():
            assert row.geometry.type == 'MultiPolygon'

    def test_sjoin_inner(self):
        df = sjoin(self.pointdf, self.polydf, how="inner")
        assert df.shape == (11, 8)

    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        assert df.shape == (21, 8)
        assert df.loc[1]['BoroName'] == 'Staten Island'

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        assert df.shape == (21, 8)
        assert np.isnan(df.loc[1]['Shape_Area'])

    def test_sjoin_bad_op(self):
        # AttributeError: 'Point' object has no attribute 'spandex'
        with pytest.raises(ValueError):
            sjoin(self.pointdf, self.polydf, how="left", op="spandex")

    def test_sjoin_duplicate_column_name(self):
        pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'})
        df = sjoin(pointdf2, self.polydf, how="left")
        assert 'Shape_Area_left' in df.columns
        assert 'Shape_Area_right' in df.columns

    @pytest.mark.parametrize('how', ['left', 'right', 'inner'])
    def test_sjoin_named_index(self, how):
        #original index names should be unchanged
        pointdf2 = self.pointdf.copy()
        pointdf2.index.name = 'pointid'
        df = sjoin(pointdf2, self.polydf, how=how)
        assert pointdf2.index.name == 'pointid'
        assert self.polydf.index.name == None

    def test_sjoin_values(self):
        # GH190
        self.polydf.index = [1, 3, 4, 5, 6]
        df = sjoin(self.pointdf, self.polydf, how='left')
        assert df.shape == (21, 8)
        df = sjoin(self.polydf, self.pointdf, how='left')
        assert df.shape == (12, 8)

    @pytest.mark.skipif(str(pd.__version__) < LooseVersion('0.19'),
                        reason=pandas_0_18_problem)
    @pytest.mark.xfail
    def test_no_overlapping_geometry(self):
        # Note: these tests are for correctly returning GeoDataFrame
        # when result of the join is empty

        df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how='inner')
        df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how='left')
        df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how='right')

        # Recent Pandas development has introduced a new way of handling merges
        # this change has altered the output when no overlapping geometries
        if str(pd.__version__) > LooseVersion('0.18.1'):
            right_idxs = pd.Series(range(0, 5), name='index_right',
                                   dtype='int64')
        else:
            right_idxs = pd.Series(name='index_right', dtype='int64')

        expected_inner_df = pd.concat(
            [self.pointdf.iloc[:0],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.drop('geometry', axis=1).iloc[:0]],
            axis=1)

        expected_inner = GeoDataFrame(
            expected_inner_df, crs={'init': 'epsg:4326', 'no_defs': True})

        expected_right_df = pd.concat(
            [self.pointdf.drop('geometry', axis=1).iloc[:0],
             pd.concat([pd.Series(name='index_left', dtype='int64'),
                        right_idxs],
                       axis=1),
             self.polydf],
            axis=1)

        expected_right = GeoDataFrame(
            expected_right_df, crs={'init': 'epsg:4326', 'no_defs': True})\
            .set_index('index_right')

        expected_left_df = pd.concat(
            [self.pointdf.iloc[17:],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.iloc[:0].drop('geometry', axis=1)],
            axis=1)

        expected_left = GeoDataFrame(
            expected_left_df, crs={'init': 'epsg:4326', 'no_defs': True})

        assert expected_inner.equals(df_inner)
        assert expected_right.equals(df_right)
        assert expected_left.equals(df_left)

    @pytest.mark.skip("Not implemented")
    def test_sjoin_outer(self):
        df = sjoin(self.pointdf, self.polydf, how="outer")
        assert df.shape == (21, 8)