Пример #1
0
    def test_geo_getitem(self):
        data = {
            "A": range(5),
            "B": range(-5, 0),
            "location": [Point(x, y) for x, y in zip(range(5), range(5))],
        }
        df = GeoDataFrame(data, crs=self.crs, geometry="location")
        assert isinstance(df.geometry, GeoSeries)
        df["geometry"] = df["A"]
        assert isinstance(df.geometry, GeoSeries)
        assert df.geometry[0] == data["location"][0]
        # good if this changed in the future
        assert not isinstance(df["geometry"], GeoSeries)
        assert isinstance(df["location"], GeoSeries)

        df["buff"] = df.buffer(1)
        assert isinstance(df["buff"], GeoSeries)

        df["array"] = from_shapely(
            [Point(x, y) for x, y in zip(range(5), range(5))])
        assert isinstance(df["array"], GeoSeries)

        data["geometry"] = [
            Point(x + 1, y - 1) for x, y in zip(range(5), range(5))
        ]
        df = GeoDataFrame(data, crs=self.crs)
        assert isinstance(df.geometry, GeoSeries)
        assert isinstance(df["geometry"], GeoSeries)
        # good if this changed in the future
        assert not isinstance(df["location"], GeoSeries)
Пример #2
0
def calc_surrounding_area(zone_gdf, buffer_m):
    geometry_without_holes = zone_gdf.convex_hull
    geometry_without_holes_gdf = Gdf(geometry=geometry_without_holes.values)
    geometry_without_holes_gdf["one_class"] = "buildings"
    geometry_merged = geometry_without_holes_gdf.dissolve(by='one_class',
                                                          aggfunc='sum')
    geometry_merged_final = Gdf(geometry=geometry_merged.convex_hull)
    new_buffer = Gdf(geometry=geometry_merged_final.buffer(buffer_m))
    area = overlay(geometry_merged_final,
                   new_buffer,
                   how='symmetric_difference')

    # THIS IS ANOTHER METHOD, NOT FUNCTIONAL THOUGH
    # from shapely.ops import Point
    # # new GeoDataFrame with same columns
    # points = []
    # # Extraction of the polygon nodes and attributes values from polys and integration into the new GeoDataFrame
    # for index, row in zone_gdf.iterrows():
    #     for j in list(row['geometry'].exterior.coords):
    #         points.append(Point(j))
    #
    # concave_hull, edge_points = alpha_shape(points, alpha=0.1)
    # simple_polygons = [x for x in concave_hull]
    # geometry_merged_final = Gdf(geometry=simple_polygons)
    # geometry_merged_final.plot()
    # plt.show()
    # new_buffer = Gdf(geometry=geometry_merged_final.buffer(buffer_m))
    # area = overlay(geometry_merged_final, new_buffer, how='symmetric_difference')
    # area.plot()

    return area, geometry_merged_final
Пример #3
0
def by_buffer(
    ref: gpd.GeoDataFrame,
    add: gpd.GeoDataFrame,
    name: str,
    buffer: float,
    sum_cols: list = ['median', 'mean', 'max', 'min',
                      'sum']) -> gpd.GeoDataFrame:
    """
    Merge a reference and additional geo dataframe by summarising features of
    additional within a buffer of points in reference

    Parameters
    ----------
    ref : gpd.GeoDataFrame
        Reference spatial dataset - usually address base or gazatteer.
    add : gpd.GeoDataFrame
        New spatial dataset to add.
    name : str
        Name to append to add columns.
    buffer : float
        Buffer around points in reference to summarise add in.
    sum_cols : TYPE, optional
        What summary variables to produce.
        The default is ['median', 'mean', 'max', 'min', 'sum']: list.

    Returns
    -------
    df : gpd.GeoDataFrame
        ref with a summary of add left joined.

    """

    add = add.copy()
    add_cols = add.columns
    add_agg = {col: sum_cols for col in add_cols if col != 'geometry'}
    ref = ref.copy()
    ref_buff = ref[['uprn', 'geometry']].assign(geometry=ref.buffer(buffer))
    ref_join = (gpd.sjoin(
        ref_buff, add, how='left').drop(columns=['index_right', 'geometry']))
    ref_sum = (ref_join.groupby("uprn").agg(add_agg).reset_index())
    ref_sum.columns = ["_".join([a, b]) for (a, b) in ref_sum.columns]
    ref_sum.columns = ref_sum.columns.str.replace("_$", "")

    ref_sum.columns = [f"{col}_{name}" for col in ref_sum.columns]
    ref_sum.columns = ref_sum.columns.str.replace(f"uprn_{name}", "uprn")
    df = ref.merge(ref_sum)

    return df
Пример #4
0
    def _transform_and_buffer(self, aoi: GeoDataFrame) -> GeoDataFrame:
        """Buffer the aoi. If training, then we take a negative buffer, equal
        to the smallest whole number closest to one half the chip size, so chips
        always fall completely in the aoi. Otherwise, it's a positive buffer
        equal to the distance from the center of a chip to any corner, ensuring
        that the full aoi is covered by all chips in a grid"""
        if aoi is not None:
            aoi = aoi.to_crs(self.crs)

            if self.buffer_aoi:
                if self.mode == "train":
                    buf = floor(-1 * (self.feature_chip_size / 2) * self.res)
                else:
                    buf = (self.feature_chip_size / 2) * self.res * sqrt(2)

                buffed_gds = aoi.buffer(buf)
                return gpd.GeoDataFrame(geometry=buffed_gds)

            return aoi

        return None
Пример #5
0
def clip_to_polygon(in_path: str, bounding_gdf: gpd.GeoDataFrame, buffer=None, out_path=None):
    """clips a raster, saves the clipped raster, opens, and returns
       a handle to it
    
    Arguments:
        in_path {str} -- Path of original raster
        bounding_gdf {gpd.GeoDataFrame} -- gdf where the first line is the bounding geometry
    """
    if buffer:
        bounding_gdf = bounding_gdf.buffer(buffer, join_style=2)

    def get_features(gdf):
    # Function to parse features from GeoDataFrame in such a manner that rasterio wants them
        return [json.loads(gdf.to_json())['features'][0]['geometry']]

    geometry = get_features(bounding_gdf)

    with rasterio.open(in_path) as src:
        out_image, out_transform = rasterio.mask.mask(src, geometry, crop=True)
        out_meta = src.meta

    out_meta.update(
        {
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
        }
    )
    if out_path is None:
        out_path = in_path.split(".tif")[0] + "_clipped.tif"
    
    with rasterio.open(out_path, "w", **out_meta) as dest:
        dest.write(out_image)

    return rasterio.open(out_path)
Пример #6
0
                        fill_color="black",
                        fill_opacity=1).add_to(my_map)
my_map.save("matched_route.html")


######################################################

# build a geodataframe with VIASAT data
geometry = [Point(xy) for xy in zip(viasat.longitude, viasat.latitude)]
# viasat = viasat.drop(['longitude', 'latitude'], axis=1)
crs = {'init': 'epsg:4326'}
viasat_gdf = GeoDataFrame(viasat, crs=crs, geometry=geometry)
# viasat_gdf.plot()

# Buffer the points by some units (unit is kilometer)
buffer = viasat_gdf.buffer(0.00025)  #50 meters # this is a geoseries
# buffer.plot()
# make a dataframe
buffer_viasat = pd.DataFrame(buffer)
buffer_viasat.columns = ['geometry']
type(buffer_viasat)
# transform a geoseries into a geodataframe
# https://gis.stackexchange.com/questions/266098/how-to-convert-a-geoserie-to-a-geodataframe-with-geopandas

## circumscript the area of the track (buffer zone)
# union = buffer.unary_union
# envelope = union.envelope
# rectangle_viasat = gpd.GeoDataFrame(geometry=gpd.GeoSeries(envelope))
# rectangle_viasat.plot()

# geodataframe with edges
Пример #7
0
def transform_islands(islands: GeoDataFrame) -> GeoDataFrame:
    islands.geometry = islands.buffer(0)
    islands = islands[['Id', 'geometry']]
    islands['FID'] = islands.Id - 1
    islands = islands.set_index('FID', drop=False, verify_integrity=True)
    return islands
Пример #8
0
def request_netatmo(client_ID: str,
                    client_secret: str,
                    pw: str,
                    username: str,
                    bounding_gdf: gpd.GeoDataFrame,
                    areal_buffer: float,
                    output_crs: str = "EPSG:32632",
                    verbose=False):
    # TODO: update docstring for new inputs
    """Gets precipitation stations through netatmos api

    Arguments:
        client_ID {str} --
        client_secret {str} --
        pw {str} -- password
        username {str} -- username
        bounding_gdf {gpd.GeoDataFrame} -- gdf where first line has bounding geometry

    Keyword Arguments:
        output_crs {str} -- Output coordinate reference system (default: {'EPSG:32632'})

    Raises:
        Exception: [description]
        Exception: [description]

    Returns:
        gpd.GeoDataFrame -- Georeferenced dataframe of the values
    """

    # Authentication
    auth_params = {
        "client_id": client_ID,
        "client_secret": client_secret,
        "grant_type": "password",
        "username": username,
        "password": pw,
        "scope": "read_station",
    }
    auth_endpoint = "https://api.netatmo.com/oauth2/token"
    auth = requests.post(auth_endpoint, auth_params)
    auth_json = auth.json()
    if auth.status_code != 200:
        raise Exception(f"token request failed, response: {auth.text}")
    else:
        if verbose:
            print(f"token request succeded.")
    token = auth_json["access_token"]

    names = ("lon_sw", "lat_sw", "lon_ne", "lat_ne")
    coords = (bounding_gdf.buffer(areal_buffer).to_crs(
        "epsg:4326").geometry.iloc[0].bounds)
    bbox = dict(zip(names, coords))
    if verbose:
        print(
            f"after {areal_buffer}m buffer, requested bounding box was: {bbox}"
        )

    # Get data
    endpoint = "https://api.netatmo.com/api/getpublicdata"
    parameters = {
        **bbox,
        "required_data": "rain",
    }
    r = requests.get(endpoint,
                     parameters,
                     headers={"Authorization": "Bearer " + token})
    json = r.json()
    if r.status_code != 200:
        raise Exception(f"data request returned error code {r.status_code}.\
              {json['error']['message']}")
    else:
        if verbose:
            print("data resquest succeded")

    # Initial processing
    df_raw = pd.DataFrame.from_dict(json)
    df = pd.DataFrame.from_records(df_raw.body)
    df = df.drop(["measures", "modules", "module_types"], axis=1)

    df["lon"] = df.place.apply(lambda x: x["location"][0])
    df["lat"] = df.place.apply(lambda x: x["location"][1])
    df["masl"] = df.place.apply(lambda x: x["altitude"])
    df["country"] = df.place.apply(lambda x: x["country"])
    df = df.drop(["place"], axis=1)
    df = df[df.country == "NO"]  # filters by stations in norway
    df = df.rename(columns={"_id": "id"})

    # Generate GeoDataFrame
    gdf = gpd.GeoDataFrame(df)
    gdf["geometry"] = gdf.apply(lambda x: Point(x["lon"], x["lat"]), axis=1)
    gdf = gdf.drop(["lon", "lat"], axis=1)

    # Assign, then change the crs
    gdf.crs = CRS.from_epsg(4326)
    gdf = gdf.to_crs(output_crs)

    gdf["source"] = "NETATMO"
    gdf['owner'] = "PRIVATE"
    gdf["resolution"] = 60
    if bounding_gdf is not None:
        bound_polygon = bounding_gdf.geometry.iloc[0]
        clipped_gdf = gdf[gdf.geometry.within(bound_polygon)]
        removed_stations = len(gdf) - len(clipped_gdf)
        if verbose:
            print(
                f"{removed_stations} statinos exceeded study area and were removed."
            )

    return clipped_gdf
Пример #9
0
def populate_scan_environment():
    print "[-] Loading accounts"
    accounts = get_available_accounts_queue()
    print "[-] Loading accounts - Done"

    print "[-] Loading proxies"
    proxies = get_available_proxies_queue()
    print "[-] Loading proxies - Done"

    print "[-] Loading shapefiles"
    cities = GeoDataFrame.from_file('./geodata/ca_cities/Cities2015.shp')
    water = GeoDataFrame.from_file('./geodata/baywater/bayarea_allwater.shp')
    print "[-] Loading shapefiles - Done"

    print "[-] Reduce + crop geometry"
    #lng1 = -122.6
    #lat1 = 37.1
    #lng2 = -121.5
    #lat2 = 38




    lng1 = -122.457
    lat1 = 37.76
    lng2 = -122.402
    lat2 = 37.80



    # This is the cropped area - full geojson is all cities in CA
    bayarea = Polygon([(lng1, lat1), (lng2,lat1), (lng2,lat2), (lng1,lat1)])
    bayarea_crop_frame = GeoDataFrame(geometry=GeoSeries([bayarea]),crs={'init': 'epsg:4269'})
    ca_cities = cities.intersection(bayarea_crop_frame.geometry.unary_union)
    ca_water = water.to_crs(ca_cities.crs)
    ca_cities_clean = ca_cities.difference(ca_water.geometry.unary_union)
    simple_ca = GeoDataFrame(geometry=GeoSeries(ca_cities_clean.buffer(0).geometry.unary_union),
                             crs={'init': 'epsg:4269'})

    # Buffer and reduce complexity of geometry - covers costal regions + improves performance
    simple_ca = simple_ca.buffer(0.003).simplify(0.01, preserve_topology=True)
    print "[-] Reduce + crop geometry - Done"

    print "[-] Generate coverings"
    circle_covering = create_circle_covering(70, 50, 65)
    norm_covering = transform_and_normalize_circle_covering(circle_covering, ca_cities_clean.crs)
    search_rects = chunked_rect_covering(lng1 - 0.05, lat1 - 0.1, 0.038, 30, 30, (simple_ca))
    print "[-] Generate coverings - Done"

    with open('scanallocation.csv', 'wb') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        allocated_proxy_count = 0
        allocated_region_count = 0
        allocated_account_count = 0

        for i, r in enumerate(tqdm(search_rects.geometry, position=0)):
            regions = get_scan_region_covering(i, r, ca_cities_clean, norm_covering).geometry
            # one ip for every 40*45 scans (40 workers per ip)
            for region_group_ips in chunks(regions, 40 * 45):
                try:
                    allocated_proxy_count += 1
                    proxy = proxies.pop()
                except Exception as e:
                    print "\nRan out of --- proxies --- {} regions, using {} accounts, using {} proxies".\
                        format(allocated_region_count, allocated_account_count, allocated_proxy_count)
                    return
                # one worker for every 45 scans

                for i, region_group in enumerate(chunks(region_group_ips, 45)):
                    try:
                        allocated_account_count += 1
                        allocate_account = accounts.pop()
                    except Exception as e:
                        print "\nRan out of --- accounts --- {} regions, using {} accounts, using {} proxies".\
                            format(allocated_region_count, allocated_account_count, allocated_proxy_count)
                        return
                    allocated_region_count += 1
                    for region in region_group:
                        # proxy, username, password, region_lat, region_lng
                        spamwriter.writerow(["https://"+proxy, allocate_account["username"], allocate_account["password"], region.centroid.x, region.centroid.y])
        print "\nDone!: {} regions, using {} accounts, using {} proxies".\
            format(allocated_region_count, allocated_account_count, allocated_proxy_count)
        fill=True,
        fill_color="black",
        fill_opacity=1).add_to(my_map)
my_map.save("matched_route.html")

######################################################

# build a geodataframe with VIASAT data
geometry = [Point(xy) for xy in zip(viasat.longitude, viasat.latitude)]
# viasat = viasat.drop(['longitude', 'latitude'], axis=1)
crs = {'init': 'epsg:4326'}
viasat_gdf = GeoDataFrame(viasat, crs=crs, geometry=geometry)
# viasat_gdf.plot()

# Buffer the points by some units (unit is kilometer)
buffer = viasat_gdf.buffer(
    buffer_diam)  # 25 meters # this is a geoseries (0.00025)
# buffer.plot()
# make a dataframe
buffer_viasat = pd.DataFrame(buffer)
buffer_viasat.columns = ['geometry']

# geodataframe with edges
type(gdf_edges)
# gdf_edges.plot()
'''
## circumscript the area of the track (buffer zone)
union = buffer.unary_union
envelope = union.envelope
rectangle_viasat = gpd.GeoDataFrame(geometry=gpd.GeoSeries(envelope))
rectangle_viasat.plot()