Exemplo n.º 1
0
def agg_catch(catch_del_shp,
              catch_sites_csv,
              catch_sites_col=['GRIDCODE', 'SITE'],
              catch_col='GRIDCODE'):
    """
    Function to take the output of the ArcGIS catchment delineation polygon shapefile and cathcment sites csv and return a shapefile with appropriately delineated polygons.
    """

    ## Catchment areas shp
    catch = read_file(catch_del_shp)[[catch_col, 'geometry']]

    ## dissolve the polygon
    catch3 = catch.dissolve(catch_col)

    ## Determine upstream catchments
    catch_df, singles_df = catch_net(catch_sites_csv, catch_sites_col)

    base1 = catch3[in1d(catch3.index, singles_df)].geometry
    for i in catch_df.index:
        t1 = append(catch_df.loc[i, :].dropna().values, i)
        t2 = GeoSeries(catch3[in1d(catch3.index, t1)].unary_union, index=[i])
        base1 = GeoSeries(concat([base1, t2]))

    ## Convert to GeoDataFrame (so that all functions can be applied to it)
    base2 = GeoDataFrame(base1.index,
                         geometry=base1.geometry.values,
                         crs=catch.crs)
    base2.columns = ['site', 'geometry']
    return (base2)
Exemplo n.º 2
0
    def _standardize(cls, geo_df: gpd.GeoDataFrame, source: Union[str, Path, gpd.GeoDataFrame], feature_type_str: str):
        """
        Standardize the format of a given catchment/nexus geodataframe.

        Method first makes all column names lower case.  Then, the unless initial index already has a name of ``id`` (or
        some case-insensitive equivalent), the ``id`` column is set as the index.  An error is raised this cannot be
        done because there is no ``id`` column.

        In the case of the index name already being a case-insensitive equivalent of ``id``, but not ``id`` precisely
        (e.g., ``ID``), the index's name is also standardized to ``id``.

        Parameters
        ----------
        geo_df : gpd.GeoDataFrame
            The geodataframe in question.
        source : Union[str, Path, gpd.GeoDataFrame]
            Either the source file for the data or a passed "base" geodataframe (when file, included in error messages).
        feature_type_str : str
            A string describing the type of feature for this data.
        """
        geo_df.columns = geo_df.columns.astype(str).str.lower()
        # Standardize capitalization if it looks like this is already set properly
        if geo_df.index.name != 'id' and str(geo_df.index.name).lower() == 'id':
            geo_df.index.name = 'id'
        # Otherwise, set the index as the 'id' column
        elif geo_df.index.name != 'id':
            # This requires 'id' column to be present of course
            if 'id' not in geo_df.columns:
                # Adjust error message depending on whether the source was an existing dataframe or a data file
                if not isinstance(source, gpd.GeoDataFrame):
                    msg = 'Bad format of {} file {}: no \'id\' or \'ID\' column'.format(feature_type_str, source)
                else:
                    msg = 'Bad format of {} dataframe: no \'id\' or \'ID\' column'.format(feature_type_str)
                raise RuntimeError(msg.format(msg))
            geo_df.set_index('id', inplace=True)
Exemplo n.º 3
0
def pretty_plot(gg: GeoDataFrame, islands: GeoDataFrame, poly_viewsheds: GeoDataFrame, save_figure_to: str,
                proj=PROJECTION):
    x = gg[gg.apply(lambda x: not x.is_empty and x.area > 1e-9)]
    xa = GeoDataFrame(x.centroid, geometry=0, crs=islands.crs)
    xa.columns = ['geometry']
    xa_tmp = xa.reset_index()
    xa_tmp['idx'] = xa_tmp.apply(lambda y: (y.idx_a, y.idx_b), axis=1)
    xa_tmp['idx_other'] = xa_tmp.apply(lambda y: (y.idx_b, y.idx_a), axis=1)
    xa_tmp = xa_tmp.set_index('idx')
    paths = xa_tmp.join(xa_tmp, on='idx_other', lsuffix='_ab', rsuffix='_ba')
    paths = paths[paths.apply(lambda y: y.geometry_ab is not np.nan and y.geometry_ba is not np.nan, axis=1)]

    ax = gplt.polyplot(
        islands,
        projection=proj,
        figsize=(20, 20),
        color='darkgray'
    )
    gplt.polyplot(
        poly_viewsheds,
        projection=proj,
        ax=ax,
        linewidth=0,
        facecolor='lightgray',
        alpha=0.3
    )
    gplt.polyplot(
        x,
        projection=proj,
        ax=ax,
        linewidth=0,
        facecolor='red',
        alpha=0.3
    )
    gplt.sankey(
        paths,
        start='geometry_ab',
        end='geometry_ba',
        ax=ax,
        projection=proj,
        alpha=0.05,
        rasterized=False
    )

    plt.savefig(save_figure_to)
Exemplo n.º 4
0
def data_frame_2_geo_data_frame(data_frame: DataFrame) -> GeoDataFrame:
    """
    Creates a ``GeoDataFrame`` from the given ``DataFrame``

    Args:
        data_frame (DataFrame): ``DataFrame`` that is used to create a ``GeoDataFrame`` with geometric information.

    Returns:
        GeoDataFrame: Created ``GeoDataFrame``.
    """

    data_frame_sorted = data_frame["x"].sort_values()
    tile_size = data_frame_sorted[1] - data_frame_sorted[0]

    geo_data_frame = GeoDataFrame([[x, y, height, Polygon(((x, y), (x + tile_size, y), (x + tile_size, y + tile_size), (x, y + tile_size), (x, y)))] for x, y, height in data_frame.values], crs={"init": "epsg:25833"}) # Coordinate System Source: https://www.stadtentwicklung.berlin.de/geoinformation/landesvermessung/atkis/de/dgm.shtml
    geo_data_frame.columns = ["x", "y", "height", "geometry"]

    return geo_data_frame
Exemplo n.º 5
0
    def convert_data_cols_to_datetime(
            self, gdf: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
        """
        Convert all data columns to datetime with `pd.to_datetime`

        Args:
            gdf (geopandas.GeoDataFrame): Input GeoDataFrame

        Returns:
            geopandas.GeoDataFrame: GeoDataFrame with data columns converted to `Timestamp`
        """
        converted_column_names = []
        for col in gdf.columns:
            if col != "geometry":
                col = pd.to_datetime(col)

            converted_column_names.append(col)
        gdf.columns = converted_column_names
        return gdf
Exemplo n.º 6
0
def format_gdf(gdf: gpd.GeoDataFrame,
               *,
               index_col: str = None) -> gpd.GeoDataFrame:
    """A function to prepare a GeoDataFrame for usage.

    Args:
        gdf (GeoDataFrame): The GeoDataFrame to format.
        index_col (str, optional): Defaults to ``None``. The name of the column in `gdf` to use as the index.

    Returns:
        gpd.GeoDataFrame
    """
    gdf = gdf.to_crs(epsg=26917)
    gdf.columns = gdf.columns.str.lower()
    if index_col is not None:
        gdf = gdf.set_index(index_col.lower()).sort_index()
    gdf['geometry'] = gdf['geometry'].apply(
        lambda x: loads(dumps(x, output_dimension=2)))  # flatten 3d to 2d

    return gdf
Exemplo n.º 7
0
def _import_gdf(
    gdf: GeoDataFrame, sql_tablename: str, geom_type: str, uri: str = DEFAULT_DB_URI
) -> None:
    """
    Import a geopandas GeoDataFrame to SQL
    """

    gdf.columns = [x.lower() for x in gdf.columns]
    epsg_code = int(str(gdf.crs).split(":")[1])

    gdf["geom"] = gdf["geometry"].apply(lambda x: WKTElement(x.wkt, srid=epsg_code))
    gdf.drop("geometry", 1, inplace=True)

    engine = sqlalchemy.create_engine(uri)
    gdf.to_sql(
        sql_tablename,
        engine,
        dtype={"geom": Geometry(geom_type.upper(), srid=epsg_code)},
        if_exists="replace",
    )
    engine.dispose()
def func(arg):
    last_idterm_idx, idterm = arg

    # for last_track_idx, idterm in enumerate(idterms_cars):
    print(idterm)
    idterm = str(idterm)
    # print('VIASAT GPS track:', track_ID)
    viasat_data = pd.read_sql_query(
        '''
                SELECT * FROM public.routecheck_2019 
                WHERE idterm = '%s' ''' % idterm, conn_HAIG)
    if len(viasat_data) > 0:
        viasat_data = viasat_data.sort_values('timedate')
        ## add a field with the "NEXT timedate" in seconds
        viasat_data['next_totalseconds'] = viasat_data.totalseconds.shift(-1)
        viasat_data['next_timedate'] = viasat_data.timedate.shift(-1)
        viasat_data['next_totalseconds'] = viasat_data[
            'next_totalseconds'].astype('Int64')
        viasat_data['next_totalseconds'] = viasat_data[
            'next_totalseconds'].fillna(0)
        viasat_data['next_lon'] = viasat_data.longitude.shift(
            -1)  # longitude of the next trip
        viasat_data['next_lat'] = viasat_data.latitude.shift(
            -1)  # latitude of the next trip
        all_trips = list(viasat_data.idtrajectory.unique())
        ### initialize an empty dataframe
        # route_CATANIA = pd.DataFrame([])
        for idx, idtrajectory in enumerate(all_trips):
            # idtrajectory = 122344050
            # print(idtrajectory)
            ## filter data by idterm and by idtrajectory (trip)
            data = viasat_data[viasat_data.idtrajectory == idtrajectory]
            ## group by TRIP_ID, check numbers of line, if > 1 then only get the one with larger number of lines
            counts_TRIP_ID = data.groupby(
                data[['TRIP_ID']].columns.tolist(),
                sort=False).size().reset_index().rename(columns={0: 'counts'})
            data = data[data.TRIP_ID == counts_TRIP_ID[
                counts_TRIP_ID.counts == max(
                    counts_TRIP_ID.counts)].TRIP_ID[0]]
            ### zip the coordinates into a point object and convert to a GeoData Frame ####
            if len(data) > 3:
                geometry = [
                    Point(xy) for xy in zip(data.longitude, data.latitude)
                ]
                df = GeoDataFrame(data, geometry=geometry)
                # Aggregate these points with the GroupBy
                df = df.groupby([
                    'idtrajectory'
                ])['geometry'].apply(lambda x: LineString(x.tolist()))
                df = GeoDataFrame(df, geometry='geometry')
                # df.plot()
                df.columns = ['geometry']
                idtrace_o = data[data.segment == min(data.segment)][[
                    'id'
                ]].iloc[0][0]
                idtrace_d = data[data.segment == max(data.segment)][[
                    'id'
                ]].iloc[0][0]
                # latitude_o = data[data.segment == min(data.segment)][['latitude']].iloc[0][0]  ## at the ORIGIN
                # longitude_o = data[data.segment == min(data.segment)][['longitude']].iloc[0][0]  ## at the ORIGIN
                # latitude_d = data[data.segment == max(data.segment)][['latitude']].iloc[0][0]  ## at the DESTINATION
                # longitude_d = data[data.segment == max(data.segment)][['longitude']].iloc[0][0]  ## at the DESTINATION
                timedate = str(data[data.segment == min(data.segment)][[
                    'timedate'
                ]].iloc[0][0])  ## at the ORIGIN
                ## trip distance in meters (sum of the increment of the "progressive"
                ## add a field with the "previous progressive"
                data['last_progressive'] = data.progressive.shift()  # <-------
                data['last_progressive'] = data['last_progressive'].astype(
                    'Int64')
                data['last_progressive'] = data['last_progressive'].fillna(0)
                ## compute increments of the distance (in meters)
                data['increment'] = data.progressive - data.last_progressive
                ## sum all the increments
                tripdistance_m = sum(
                    data['increment'][1:len(data['increment'])][
                        data.increment > 0])
                ## trip time in seconds (duration)
                time_o = data[data.segment == min(data.segment)][['path_time'
                                                                  ]].iloc[0][0]
                time_d = data[data.segment == max(data.segment)][['path_time'
                                                                  ]].iloc[0][0]
                triptime_s = time_d - time_o
                # time_o = data[data.segment == min(data.segment)][['totalseconds']].iloc[0][0]
                # time_d = data[data.segment == max(data.segment)][['totalseconds']].iloc[0][0]
                # triptime_s = time_d - time_o
                checkcode = data[data.segment == min(data.segment)][[
                    'anomaly'
                ]].iloc[0][0]  ## at the ORIGIN
                ## intervallo di tempo tra un l'inizio di due viaggi successivi
                breaktime_s = data[data.segment == max(data.segment)][['next_timedate']].iloc[0][0] - \
                              data[data.segment == max(data.segment)][['timedate']].iloc[0][0]
                breaktime_s = breaktime_s.total_seconds()
                if breaktime_s < 0:
                    breaktime_s = None
                ### get distance between the position of two consecutive TRIPS (from END of a TRIP to START of a NEW TRIP)
                lon_end = data[data.segment == max(data.segment)][[
                    'longitude'
                ]].iloc[0][0]  # longitude at the END of a TRIP
                lat_end = data[data.segment == max(data.segment)][[
                    'latitude'
                ]].iloc[0][0]
                lon_start = data[data.segment == max(data.segment)][[
                    'next_lon'
                ]].iloc[0][0]  # longitude at the START of a NEW TRIP
                lat_start = data[data.segment == max(data.segment)][[
                    'next_lat'
                ]].iloc[0][0]
                ### find distance between coordinates of two consecutive TRIPS in METERS!!!
                ### end = (37.571518, 14.895852)
                ### start = (37.570873, 14.896243)
                deviation_pos = great_circle_track_node(
                    lon_end, lat_end, lon_start, lat_start)
                ### build the final dataframe ("route" table)
                if tripdistance_m > 0:
                    df_ROUTE = pd.DataFrame({
                        'idtrajectory': [idtrajectory],
                        'idterm': [idterm],
                        'idtrace_o': [idtrace_o],
                        'idtrace_d': [idtrace_d],
                        # 'latitude_o': [latitude_o],
                        # 'longitude_o': [longitude_o],
                        # 'latitude_d': [latitude_d],
                        # 'longitude_d': [longitude_d],
                        'timedate_o': [timedate],
                        'tripdistance_m': [tripdistance_m],
                        'triptime_s': [triptime_s],
                        'checkcode': [checkcode],
                        'breaktime_s': [breaktime_s]
                    })
                    geom = df['geometry'].apply(wkb_hexer)
                    df_ROUTE['geom'] = geom.iloc[0]
                    df_ROUTE['deviation_pos_m'] = deviation_pos
                    # route_CATANIA = route_CATANIA.append(df_ROUTE)
                    connection = engine.connect()
                    df_ROUTE.to_sql("PROVA_route_2019",
                                    con=connection,
                                    schema="public",
                                    if_exists='append')
                    connection.close()
Exemplo n.º 9
0
# Making 2000 Random locations
lat = []
long = []
for x, y in zip(range(2000), range(2000)):
    lat.append(random.uniform(a, b))
    long.append(random.uniform(c, d))

bar = progressbar.ProgressBar()
for x, y, i in zip(long, lat, bar(range(len(long)))):
    response = requests.get(
        'https://mobile.o.bike/api/v1/bike/list?longitude=' + str(x) +
        '&latitude=' + str(y))
    Lil_data = response.json()

    df_new = pd.DataFrame(Lil_data['data']['list'])
    frames = [df, df_new]
    df = pd.concat(frames)

df = df.drop_duplicates(keep='first')
df = df.reset_index()

geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
df = df.drop(['longitude', 'latitude'], axis=1)
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(df, crs=crs, geometry=geometry)

geo_df.columns = [['index', 'countyId', 'helmet', 'id', 'imei', date]]

geo_df.to_csv(date + '2000obikesZH.csv')
Exemplo n.º 10
0
def prepare_nhdplus(
    flw: gpd.GeoDataFrame,
    min_network_size: float,
    min_path_length: float,
    min_path_size: float = 0,
    purge_non_dendritic: bool = False,
    verbose: bool = False,
) -> gpd.GeoDataFrame:
    """Clean up and fix common issues of NHDPlus flowline database.

    Ported from `nhdplusTools <https://github.com/USGS-R/nhdplusTools>`__

    Parameters
    ----------
    flw : geopandas.GeoDataFrame
        NHDPlus flowlines with at least the following columns:
        COMID, LENGTHKM, FTYPE, TerminalFl, FromNode, ToNode, TotDASqKM,
        StartFlag, StreamOrde, StreamCalc, TerminalPa, Pathlength,
        Divergence, Hydroseq, LevelPathI
    min_network_size : float
        Minimum size of drainage network in sqkm
    min_path_length : float
        Minimum length of terminal level path of a network in km.
    min_path_size : float, optional
        Minimum size of outlet level path of a drainage basin in km.
        Drainage basins with an outlet drainage area smaller than
        this value will be removed. Defaults to 0.
    purge_non_dendritic : bool, optional
        Whether to remove non dendritic paths, defaults to False
    verbose : bool, optional
        Whether to show a message about the removed features, defaults to True.

    Returns
    -------
    geopandas.GeoDataFrame
        Cleaned up flowlines. Note that all column names are converted to lower case.
    """
    flw.columns = flw.columns.str.lower()
    nrows = flw.shape[0]

    req_cols = [
        "comid",
        "terminalfl",
        "terminalpa",
        "hydroseq",
        "streamorde",
        "streamcalc",
        "divergence",
        "fromnode",
        "ftype",
    ]

    _check_requirements(req_cols, flw)
    flw[req_cols[:-1]] = flw[req_cols[:-1]].astype("Int64")

    if not any(flw.terminalfl == 1):
        if all(flw.terminalpa == flw.terminalpa.iloc[0]):
            flw.loc[flw.hydroseq == flw.hydroseq.min(), "terminalfl"] = 1
        else:
            raise ZeroMatched("No terminal flag were found in the dataframe.")

    if purge_non_dendritic:
        flw = flw[((flw.ftype != "Coastline") | (flw.ftype != 566))
                  & (flw.streamorde == flw.streamcalc)]
    else:
        flw = flw[(flw.ftype != "Coastline") | (flw.ftype != 566)]
        flw.loc[flw.divergence == 2, "fromnode"] = pd.NA

    flw = _remove_tinynetworks(flw, min_path_size, min_path_length,
                               min_network_size)

    if verbose:
        print(f"Removed {nrows - flw.shape[0]} paths from the flowlines.")

    if flw.shape[0] > 0:
        flw = _add_tocomid(flw)

    return flw
Exemplo n.º 11
0
def geodataframe_to_postgis(
        geodataframe: gpd.GeoDataFrame,
        output_table_name: str,
        uri: str,
        src_epsg: Union[bool, int] = None,
        output_epsg: Union[bool, int] = None,
        debug: bool = False
):
    """
    Write a ``geopandas.GeoDataFrame`` to a PostGIS table in a SQL database.

    Assumes that the geometry column has already been named 'geometry'

    :param geodataframe: geopandas.GeoDataFrame
    :param output_table_name: 'name_of_the_output_table'
    :param src_epsg: if not None, will assign the geodataframe this EPSG in the format of {"init": "epsg:2227"}
    :param output_epsg: if not None, will reproject data from input EPSG to specified EPSG
    :param uri: connection string
    :return: None
    """
    start_time = time.time()

    # Get the geometry type
    # It's possible there are both MULTIPOLYGONS and POLYGONS. This grabs the MULTI variant
    geom_types = list(geodataframe.geometry.geom_type.unique())
    geom_typ = max(geom_types, key=len).upper()

    if debug:
        print(f'## PROCESSING {geom_typ} geodataframe to {output_table_name} in SQL')

    # Manually set the EPSG if the user passes one
    if src_epsg:
        geodataframe.crs = f"epsg:{src_epsg}"
        epsg_code = src_epsg

    # Otherwise, try to get the EPSG value directly from the geodataframe
    else:
        try:
            # Older geodataframes has CRS stored as a dict: {'init': 'epsg:4326'}
            if type(geodataframe.crs) == dict:
                epsg_code = int(geodataframe.crs['init'].split(" ")[0].split(':')[1])
            # Now geopandas has a different approach:
            else:
                epsg_code = int(str(geodataframe.crs).split(':')[1])
        except:
            print('This geodataframe does not have a valid EPSG. Aborting.')
            print(geodataframe.crs)
            return

    # Sanitize the columns before writing to the database
    # Make all column names lower case
    geodataframe.columns = [x.lower() for x in geodataframe.columns]

    # Replace the 'geom' column with 'geometry'
    if 'geom' in geodataframe.columns:
        geodataframe['geometry'] = geodataframe['geom']
        geodataframe.drop('geom', 1, inplace=True)

    # Drop the 'gid' column
    if 'gid' in geodataframe.columns:
        geodataframe.drop('gid', 1, inplace=True)

    # Rename 'uid' to 'old_uid'
    if 'uid' in geodataframe.columns:
        geodataframe['old_uid'] = geodataframe['uid']
        geodataframe.drop('uid', 1, inplace=True)

    # Build a 'geom' column using geoalchemy2 and drop the source 'geometry' column
    geodataframe['geom'] = geodataframe['geometry'].apply(lambda x: WKTElement(x.wkt, srid=epsg_code))
    geodataframe.drop('geometry', 1, inplace=True)

    # write geodataframe to SQL database
    if debug:
        print(f'## -> WRITING TO {uri}')

    engine = sqlalchemy.create_engine(uri)
    geodataframe.to_sql(output_table_name, engine,
                        if_exists='replace', index=True, index_label='gid',
                        dtype={'geom': Geometry(geom_typ, srid=epsg_code)})
    engine.dispose()

    if debug:
        runtime = round((time.time() - start_time), 2)
        print(f'\t FINISHED IN {runtime} seconds')

    log_activity("pGIS.geodataframe_to_postgis",
                 uri=uri,
                 query_text=f"Wrote geopandas.GeoDataFrame to {output_table_name}",
                 debug=debug)

    # If provided an EPSG, alter whatever the native projection was to the output_epsg
    if output_epsg:
        project_spatial_table(output_table_name, geom_typ, epsg_code, output_epsg, uri=uri, debug=debug)

    # Add a unique_id column and do a spatial index
    prep_spatial_table(output_table_name, uri=uri, debug=debug)
#################################################
#### Plotting

### Extract x and y data for plotting

print('Creating the plot')

zones1 = multipoly_to_poly(view_zones)

zones1['x'] = zones1.apply(getPolyCoords, coord_type='x', axis=1)
zones1['y'] = zones1.apply(getPolyCoords, coord_type='y', axis=1)

zones2 = zones1.drop('geometry', axis=1)

cant1 = GeoDataFrame(['Canterbury'], geometry=[zones1.unary_union])
cant1.columns = ['site', 'geometry']
cant1['x'] = cant1.apply(getPolyCoords, coord_type='x', axis=1)
cant1['y'] = cant1.apply(getPolyCoords, coord_type='y', axis=1)

cant2 = cant1.drop('geometry', axis=1)

## Catchments
catch1 = multipoly_to_poly(site_catch2)
catch1['x'] = catch1.apply(getPolyCoords, coord_type='x', axis=1)
catch1['y'] = catch1.apply(getPolyCoords, coord_type='y', axis=1)
catch2 = catch1.drop('geometry', axis=1)

### Combine with time series data
data1 = merge(cat1.unstack('time').reset_index(), zones2, on=['zone'])
time_index = hy_summ2.time.unique().tolist()
data1['cat'] = data1[time_index[-1]]
    def import_geodataframe(
        self,
        gdf: gpd.GeoDataFrame,
        table_name: str,
        src_epsg: Union[int, bool] = False,
        if_exists: str = "replace",
        schema: str = None,
        uid_col: str = "uid",
    ):
        """
        Import an in-memory ``geopandas.GeoDataFrame`` to the SQL database.

        :param gdf: geodataframe with data you want to save
        :type gdf: gpd.GeoDataFrame
        :param table_name: name of the table that will get created
        :type table_name: str
        :param src_epsg: The source EPSG code can be passed as an integer.
                         By default this function will try to read the EPSG
                         code directly, but some spatial data is funky and
                         requires that you explicitly declare its projection.
                         Defaults to False
        :type src_epsg: Union[int, bool], optional
        :param if_exists: pandas argument to handle overwriting data,
                          defaults to "replace"
        :type if_exists: str, optional
        """
        if not schema:
            schema = self.ACTIVE_SCHEMA

        # Read the geometry type. It's possible there are
        # both MULTIPOLYGONS and POLYGONS. This grabs the MULTI variant

        geom_types = list(gdf.geometry.geom_type.unique())
        geom_typ = max(geom_types, key=len).upper()

        print(f"\t -> SQL tablename: {schema}.{table_name}")
        print(f"\t -> Geometry type: {geom_typ}")
        print(f"\t -> Beginning DB import...")

        start_time = datetime.now()

        # Manually set the EPSG if the user passes one
        if src_epsg:
            gdf.crs = f"epsg:{src_epsg}"
            epsg_code = src_epsg

        # Otherwise, try to get the EPSG value directly from the geodataframe
        else:
            # Older gdfs have CRS stored as a dict: {'init': 'epsg:4326'}
            if type(gdf.crs) == dict:
                epsg_code = int(gdf.crs["init"].split(" ")[0].split(":")[1])
            # Now geopandas has a different approach
            else:
                epsg_code = int(str(gdf.crs).split(":")[1])

        # Sanitize the columns before writing to the database
        # Make all column names lower case
        gdf.columns = [x.lower() for x in gdf.columns]

        # Replace the 'geom' column with 'geometry'
        if "geom" in gdf.columns:
            gdf["geometry"] = gdf["geom"]
            gdf.drop("geom", 1, inplace=True)

        # Drop the 'gid' column
        if "gid" in gdf.columns:
            gdf.drop("gid", 1, inplace=True)

        # Rename 'uid' to 'old_uid'
        if uid_col in gdf.columns:
            gdf[f"old_{uid_col}"] = gdf[uid_col]
            gdf.drop(uid_col, 1, inplace=True)

        # Build a 'geom' column using geoalchemy2
        # and drop the source 'geometry' column
        gdf["geom"] = gdf["geometry"].apply(
            lambda x: WKTElement(x.wkt, srid=epsg_code))
        gdf.drop("geometry", 1, inplace=True)

        # Write geodataframe to SQL database
        self.add_schema(schema)

        engine = sqlalchemy.create_engine(self.uri())
        gdf.to_sql(
            table_name,
            engine,
            if_exists=if_exists,
            # index=True,
            # index_label=uid_col,
            schema=schema,
            dtype={"geom": Geometry(geom_typ, srid=epsg_code)},
        )
        engine.dispose()

        end_time = datetime.now()

        runtime = end_time - start_time
        print(f"\t -> ... import completed in {runtime}")

        self.table_add_uid_column(table_name, schema=schema, uid_col=uid_col)
        self.table_add_spatial_index(table_name, schema=schema)
Exemplo n.º 14
0
def makeGrid(ipoints, experiment, gridsize):
    # Projections 
    gridproj = {'init': 'epsg:3740', 'no_defs': True}
    wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'}
    # import grid script
    sys.path.insert(0, os.getcwd()+'/mapping/libs/')
    import grid as g

    opath =  os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-grid'
    if(os.path.isdir(opath)):
        print "already a folder!"
    else:
        os.mkdir(opath)

    # gridsize = 200
    ogridname = "grid_"+str(gridsize)+"m.shp"
    ofile = opath + "/" + ogridname
    print "making grid"
    g.main(ofile, ipoints.total_bounds[0], ipoints.total_bounds[2], 
        ipoints.total_bounds[1], ipoints.total_bounds[3],
        gridsize, gridsize)

    print "grid complete! "
    # read in the grid that was just made
    grid = GeoDataFrame.from_file(ofile)
    grid.crs = gridproj
    # create grid id to groupby
    grid['id'] = [i for i in range(len(grid))]

    # Read in transect to spatial subset grids in transect
    transect = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/study-area/' +'transect_epicc2sp_woss.shp')
    transect.crs = gridproj

    # subset grid
    # transectgrid = grid[grid.geometry.intersects(transect.geometry)]; print transectgrid
    sagrid = []
    for i in range(len(grid)):
        if np.array(transect.intersects(grid.geometry[i]))[0] != False:
            sagrid.append(grid.geometry[i])

    transectgrid = GeoDataFrame(sagrid)
    transectgrid.columns = ['geometry']
    transectgrid['id'] = [i for i in range(len(transectgrid))]
    transectgrid.crs = gridproj

    

    transectgrid.to_file(ofile[:-4]+"_transect.shp")
    # transectgrid.to_file(ofile[:-4]+"_transect.geojson",driver="GeoJSON")

    ## !!!Some weird things with reading in data makes the sjoin work !!! :(
    transectgrid = GeoDataFrame.from_file(ofile[:-4]+"_transect.shp")
    transectgrid.crs = gridproj
    print transectgrid.head()

    ipoints = GeoDataFrame.from_file( os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/all_20150528.shp')
    ipoints.crs = gridproj
    print ipoints.head()

    # ipoints['id'] = [i for i in range(len(ipoints))]
    # Spatial join points to grid
    oname = "gridjoin_"+str(gridsize)+"m.shp"
    # join_inner_df = sjoin(transectgrid, ipoints, how="inner")
    join_inner_df = sjoin(transectgrid, ipoints, how="left", op='intersects')
    # join_inner_df.to_file(opath+ "/"+oname)

    return join_inner_df
Exemplo n.º 15
0
    def heatmap(self, a, poly_list):
        "provide density of agents positions as a heatmap"
        "!! add poly list not working yet"
        #sample_agents = [self.base_model.agents[j] for j in self.index]
        #swap if restricting observed agents
        filter_class = self.filter_class
        bin_size = filter_class.filter_params["bin_size"]
        width = filter_class.model_params["width"]
        height = filter_class.model_params["height"]
        os.mkdir(self.save_dir + "output_heatmap")

        "cmap set up. defining bottom value (0) to be black"
        cmap = cm.cividis
        cmaplist = [cmap(i) for i in range(cmap.N)]
        cmaplist[0] = (0.0, 0.0, 0.0, 1.0)
        cmap = col.LinearSegmentedColormap("custom_cmap", cmaplist, N=cmap.N)
        cmap = cmap.from_list("custom", cmaplist)
        "split norm for better vis"
        n = self.filter_class.model_params["pop_total"]
        norm = DivergingNorm(1, n / 3, 0.1, 0.9, 1e-8, n)

        for i in range(a.shape[0]):
            locs = a[i, :]

            counts = self.filter_class.poly_count(poly_list, locs)
            if np.nansum(counts) != 0:
                densities = np.array(counts) / np.nansum(counts)  #density
            else:
                densities = np.array(counts)
            #counts[np.where(counts==0)]=np.nan
            frame = GeoDataFrame([densities, counts, poly_list]).T
            frame.columns = ["densities", "counts", "geometry"]
            #norm =col.DivergingNorm(0.2)

            f = plt.figure(figsize=(12, 8))
            ax = f.add_subplot(111)
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.05)
            "plot density histogram and locations scatter plot assuming at least one agent available"
            if np.nansum(counts) != 0:
                #ax.scatter(locs[0::2],locs[1::2],color="cyan",label="True Positions")
                ax.set_ylim(0, height)
                ax.set_xlim(0, width)
                column = frame["counts"].astype(float)
                im = frame.plot(column=column,
                                ax=ax,
                                cax=cax,
                                cmap=cmap,
                                norm=norm,
                                vmin=0,
                                vmax=n)

                for k, count in enumerate(counts):
                    if count > 0:
                        ax.annotate(s=count,
                                    xy=poly_list[k].centroid.coords[0],
                                    ha='center',
                                    va="center",
                                    color="w")

            else:
                """
                dummy frame if no locations present e.g. at the start. 
                prevents divide by zero error in hist2d
                """
                ax.set_ylim(0, height)
                ax.set_xlim(0, width)
                column = frame["densities"].astype(float)
                im = frame.plot(column=column,
                                ax=ax,
                                cax=cax,
                                cmap=cmap,
                                norm=norm,
                                vmin=0,
                                vmax=1)

            "set up cbar. colouration proportional to number of agents"
            ax.text(0,
                    101,
                    s="Total Agents: " + str(np.sum(counts)),
                    color="k")

            sm = cm.ScalarMappable(norm=norm, cmap=cmap)
            cbar = plt.colorbar(sm, cax=cax, spacing="proportional")
            cbar.set_label("Agent Counts")
            cbar.set_alpha(1)
            #cbar.draw_all()

            "set legend to bottom centre outside of plot"
            box = ax.get_position()
            ax.set_position([
                box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9
            ])

            "labels"
            ax.set_xlabel("Corridor width")
            ax.set_ylabel("Corridor height")
            #ax.set_title("Agent Densities vs True Positions")
            cbar.set_label(f"Agent Counts (out of {n})")
            """
            frame number and saving. padded zeroes to keep frames in order.
            padded to nearest upper order of 10 of number of iterations.
            """
            number = str(i).zfill(ceil(log10(a.shape[0])))
            file = self.save_dir + f"output_heatmap/{number}"
            f.savefig(file)
            plt.close()

        animations.animate(
            self, self.save_dir + "output_heatmap",
            self.save_dir + f"heatmap_{filter_class.pop_total}_", 12)