def test_query_bulk_with_prepared(tree): geom = np.array([box(0, 0, 1, 1), box(3, 3, 5, 5)]) expected = tree.query_bulk(geom, predicate="intersects") # test with array of partially prepared geometries pygeos.prepare(geom[0]) assert_array_equal(expected, tree.query_bulk(geom, predicate="intersects")) # test with fully prepared geometries pygeos.prepare(geom) assert_array_equal(expected, tree.query_bulk(geom, predicate="intersects"))
def test_set_precision_intersection(): """Operations should use the most precise presision grid size of the inputs""" box1 = pygeos.normalize(pygeos.box(0, 0, 0.9, 0.9)) box2 = pygeos.normalize(pygeos.box(0.75, 0, 1.75, 0.75)) assert pygeos.get_precision(pygeos.intersection(box1, box2)) == 0 # GEOS will use and keep the most precise precision grid size box1 = pygeos.set_precision(box1, 0.5) box2 = pygeos.set_precision(box2, 1) out = pygeos.intersection(box1, box2) assert pygeos.get_precision(out) == 0.5 assert pygeos.equals(out, pygeos.Geometry("LINESTRING (1 1, 1 0)"))
class _TestPoints(_TestSimilarity): SIMILAR = [points(0, 1), box(-1, 0, 1, 2)] DISSIMILAR = [points(10, 10), box(0.5, 0.5, 3, 3)] VALUE_GEOMETRIES = [[points(10, 10), points(10, 10), points(10, 10)], [ box(9, 8, 11, 14), box(8, 9, 14, 11), box(8, 8, 14, 14) ]] VALUE = [0, 0, 1 - np.sqrt(2)] DISSIMILAR_VALUE = -np.inf
def intersection(self, coordinates): # convert bounds to geometry # the old API uses tuples of bound, but pygeos uses geometries try: iter(coordinates) except TypeError: # likely not an iterable # this is a check that rtree does, we mimic it # to ensure a useful failure message raise TypeError( "Invalid coordinates, must be iterable in format " "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). " "Got `coordinates` = {}.".format(coordinates)) # need to convert tuple of bounds to a geometry object if len(coordinates) == 4: indexes = super().query(pygeos.box(*coordinates)) elif len(coordinates) == 2: indexes = super().query(pygeos.points(*coordinates)) else: raise TypeError( "Invalid coordinates, must be iterable in format " "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). " "Got `coordinates` = {}.".format(coordinates)) return indexes
def test_relate_pattern(): g = pygeos.linestrings([(0, 0), (1, 0), (1, 1)]) polygon = pygeos.box(0, 0, 2, 2) assert pygeos.relate(g, polygon) == "11F00F212" assert pygeos.relate_pattern(g, polygon, "11F00F212") assert pygeos.relate_pattern(g, polygon, "*********") assert not pygeos.relate_pattern(g, polygon, "F********")
def summarize_raster_by_geometry(geometries, extract_func, outfilename, progress_label="", bounds=None, **kwargs): """Summarize values of input dataset by geometry and writes results to a feather file, with one column for shape_mask and one for each raster value. Parameters ---------- geometries : Series of pygeos geometries, indexed by HUC12 / marine block extract_func : function that extracts results for each geometry outfilename : str progress_label : str """ if bounds is not None: # select only those areas that overlap input area tree = pg.STRtree(geometries) ix = tree.query(pg.box(*bounds)) geometries = geometries.iloc[ix].copy() if not len(geometries): return index = [] results = [] for ix, geometry in Bar(progress_label, max=len(geometries)).iter(geometries.iteritems()): zone_results = extract_func([to_dict(geometry)], bounds=pg.total_bounds(geometry), **kwargs) if zone_results is None: continue index.append(ix) results.append(zone_results) if not len(results): return df = pd.DataFrame(results, index=index) results = df[["shape_mask"]].copy() results.index.name = "id" avg_cols = [c for c in df.columns if c.endswith("_avg")] # each column is an array of counts for each for col in df.columns.difference(["shape_mask"] + avg_cols): s = df[col].apply(pd.Series).fillna(0) s.columns = [f"{col}_{c}" for c in s.columns] results = results.join(s) if len(avg_cols) > 0: results = results.join(df[avg_cols]).round() results.reset_index().to_feather(outfilename)
def test_coverage_union_reduce_1dim(n): """ This is tested seperately from other set operations as it differs in two ways: 1. It expects only non-overlapping polygons 2. It expects GEOS 3.8.0+ """ test_data = [ pygeos.box(0, 0, 1, 1), pygeos.box(1, 0, 2, 1), pygeos.box(2, 0, 3, 1), ] actual = pygeos.coverage_union_all(test_data[:n]) # perform the reduction in a python loop and compare expected = test_data[0] for i in range(1, n): expected = pygeos.coverage_union(expected, test_data[i]) assert pygeos.equals(actual, expected)
def test_destroy_prepared(): arr = np.array([pygeos.points(1, 1), None, pygeos.box(0, 0, 1, 1)]) pygeos.prepare(arr) assert arr[0]._ptr_prepared != 0 assert arr[2]._ptr_prepared != 0 pygeos.destroy_prepared(arr) assert arr[0]._ptr_prepared == 0 assert arr[1] is None assert arr[2]._ptr_prepared == 0 pygeos.destroy_prepared(arr) # does not error
def convert_point_to_constant_box(input_array, box_size): input_array = np.copy(input_array) for i in range(input_array.shape[0]): x, y = get_coordinates(centroid(input_array[i, 0]))[0] input_array[i, 0] = box(x - (box_size / 2), y - (box_size / 2), x + (box_size / 2), y + (box_size / 2)) return input_array
class _TestPolygons(_TestSimilarity): SIMILAR = [ box(0, 0, 1, 1), polygons([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]), polygons([[0.5, 0], [1, 0], [1, 1], [0, 1], [0, 0.5], [0, 0]]) ] DISSIMILAR = [ box(10, 10, 11, 11), polygons([[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]), polygons([[8, 8], [8, 13], [13, 13], [13, 8], [8, 8]], holes=[[[9, 9], [9, 12], [12, 12], [12, 9], [9, 9]]]), polygons([[4.5, 4], [5, 4], [5, 5], [4, 5], [4, 4.5], [4, 4]]), points([0.5, 0.5]) ] VALUE_GEOMETRIES = [[ box(0, 0, 2, 2), polygons([[8, 8], [8, 13], [13, 13], [13, 8], [8, 8]], holes=[[[9, 9], [9, 12], [12, 12], [12, 9], [9, 9]]]) ], [box(-1, -1, 1, 1), box(8, 8, 13, 13)]] VALUE = [0.14285714285714285, 0.64]
def setup(self): # create irregular polygons by merging overlapping point buffers self.polygon = pygeos.union_all( pygeos.buffer(pygeos.points(np.random.random((1000, 2)) * 500), 10)) xmin = np.random.random(100) * 100 xmax = xmin + 100 ymin = np.random.random(100) * 100 ymax = ymin + 100 self.bounds = np.array([xmin, ymin, xmax, ymax]).T self.boxes = pygeos.box(xmin, ymin, xmax, ymax)
def intersection(self, coordinates, objects=False): """Wrapper for pygeos.query that uses the RTree API. Parameters ---------- coordinates : sequence or array Sequence of the form (min_x, min_y, max_x, max_y) to query a rectangle or (x, y) to query a point. objects : boolean, default False If True, return the label based indexes. If False, integer indexes are returned. """ if objects: warn( "`objects` is deprecated and will be removed in a future version. " "Instead, use `iloc` to index your GeoSeries/GeoDataFrame using " "integer indexes returned by `intersection`.", FutureWarning, ) # convert bounds to geometry # the old API uses tuples of bound, but pygeos uses geometries try: iter(coordinates) except TypeError: # likely not an iterable # this is a check that rtree does, we mimic it # to ensure a useful failure message raise TypeError( "Invalid coordinates, must be iterable in format " "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). " "Got `coordinates` = {}.".format(coordinates)) # need to convert tuple of bounds to a geometry object if len(coordinates) == 4: indexes = super().query(pygeos.box(*coordinates)) elif len(coordinates) == 2: indexes = super().query(pygeos.points(*coordinates)) else: raise TypeError( "Invalid coordinates, must be iterable in format " "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). " "Got `coordinates` = {}.".format(coordinates)) if objects: objs = self.objects[indexes].values ids = self.ids[indexes] return [ self.with_objects(id=id, object=obj) for id, obj in zip(ids, objs) ] else: return indexes
def test_coverage_union_reduce_axis(): # shape = (3, 2), all polygons - none of them overlapping data = [[pygeos.box(i, j, i + 1, j + 1) for i in range(2)] for j in range(3)] actual = pygeos.coverage_union_all(data) assert actual.shape == (2, ) actual = pygeos.coverage_union_all(data, axis=0) # default assert actual.shape == (2, ) actual = pygeos.coverage_union_all(data, axis=1) assert actual.shape == (3, ) actual = pygeos.coverage_union_all(data, axis=-1) assert actual.shape == (3, )
def as_boxes(x): """Convert an array of geometries to an array of bounding boxes polygons. Args: x (numpy.ndarray): An array of points. Returns: numpy.ndarray: An array of polygons. """ coordinates = bounds(x) return box(coordinates[:, 0], coordinates[:, 1], coordinates[:, 2], coordinates[:, 3])
def test_prepare(): arr = np.array([pygeos.points(1, 1), None, pygeos.box(0, 0, 1, 1)]) assert arr[0]._ptr_prepared == 0 assert arr[2]._ptr_prepared == 0 pygeos.prepare(arr) assert arr[0]._ptr_prepared != 0 assert arr[1] is None assert arr[2]._ptr_prepared != 0 # preparing again actually does nothing original = arr[0]._ptr_prepared pygeos.prepare(arr) assert arr[0]._ptr_prepared == original
def test_confidence_from_max_iou_bbox(): detections = np.array([[box(0, 0, 9, 5)], [box(23, 13, 29, 18)]]) ground_truths = np.array([[box(5, 2, 15, 9)], [box(18, 10, 26, 15)]]) res = add_confidence_from_max_iou(detections, ground_truths) assert np.all(res == np.array([[box(0, 0, 9, 5), 0.11650485436893204], [box(23, 13, 29, 18), 0.09375]], dtype=object))
def classify(self): self.squares = pygeos.box(self.xmin, self.ymin, self.xmax, self.ymax) # can maybe optimize by ordering tests and only doing tests where needed??? test1 = pygeos.disjoint(self.aecb.SH, self.squares) test2 = pygeos.contains(self.aicb.SH, self.squares) test3 = pygeos.contains(self.emb.SH, self.squares) test4 = pygeos.disjoint(self.imb.SH, self.squares) # test34 = np.logical_and(test3, test4) # old way, is wasteful, especially for FullCoordinateTree test34 = np.logical_and.reduce([test3, test4, ~test1, ~test2]) self.codes = np.zeros(self.n_node, dtype=int) self.codes[test1] = 2 self.codes[test2] = 3 self.codes[test34] = 1 self.test34 = test34 # useful for later
def mbr(self): """Computes the MBR of the dataset. Returns: (string) The WKT representation of the MBR. """ variables = self.dataset.variables.keys() if self._lat_attr not in variables or self._lon_attr not in variables: return None lat_min = self.dataset.variables[self._lat_attr][:].min() lat_max = self.dataset.variables[self._lat_attr][:].max() lon_min = self.dataset.variables[self._lon_attr][:].min() lon_max = self.dataset.variables[self._lon_attr][:].max() if self._short_crs == 'WGS 84' and (lon_min >= 0 and lon_max > 180.): lon_min -= 180. lon_max -= 180. return to_wkt(box(lon_min, lat_min, lon_max, lat_max))
def geojson(geojson, basemap_provider='OpenStreetMap', basemap_name='Mapnik', width='100%', height='100%', styled=False): """Plots into a Folium map. Parameters: geojson (dict): A geojson object. basemap_provider (string): The basemap provider. basemap_name: The basemap itself as named by the provider. List and preview of available providers and their basemaps can be found in https://leaflet-extras.github.io/leaflet-providers/preview/ width (int|string): Width of the map in pixels or percentage (default: 100%). height (int|string): Height of the map in pixels or percentage (default: 100%). styled (bool): If True, follows the mapbox simple style, as proposed in https://github.com/mapbox/simplestyle-spec/tree/master/1.1.0. Returns: (object) A Folium Map object displaying the geoJSON. """ df = gpd.GeoDataFrame.from_features(geojson['features'], crs="epsg:4326") bb = ymin, xmin, ymax, xmax = df.geometry.total_bounds map_center = pg.get_coordinates(pg.centroid(pg.box(xmin, ymin, xmax, ymax)))[0] tiles, attribution, max_zoom = get_provider_info(basemap_provider, basemap_name) m = folium.Map(location=map_center, tiles=tiles, attr=attribution, max_zoom=max_zoom, width=width, height=height) m.fit_bounds([[xmin, ymin], [xmax, ymax]]) if styled: folium.GeoJson(df, name='geojson', style_function=lambda x: dict( color=x['properties']['stroke'], fillColor=x['properties']['fill'], fillOpacity=x['properties']['fill-opacity'], opacity=0.1, weight=x['properties']['stroke-width'])).add_to(m) else: folium.GeoJson(df, name='geojson').add_to(m) return m
def intersection(self, coordinates, objects=False): """Wrapper for pygeos.query that uses the RTree API. Parameters ---------- coordinates : sequence or array Sequence of the form (min_x, min_y, max_x, max_y) to query a rectangle or (x, y) to query a point. objects : True or False If True, return the label based indexes. If False, integer indexes are returned. """ # convert bounds to geometry # the old API uses tuples of bound, but pygeos uses geometries try: iter(coordinates) except TypeError: # likely not an iterable # this is a check that rtree does, we mimick it # to ensure a useful failure message raise TypeError( "Invalid coordinates, must be iterable in format " "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points)." ) # need to convert tuple of bounds to a geometry object if len(coordinates) == 4: indexes = super().query(box(*coordinates)) elif len(coordinates) == 2: indexes = super().query(points(*coordinates)) else: raise TypeError( "Invalid coordinates, must be iterable in format " "(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points)." ) if objects: objs = self.objects[indexes].values ids = self.ids[indexes] return [ self.with_objects(id=id, object=obj) for id, obj in zip(ids, objs) ] else: return indexes
def point_to_box(x, width=64., height=64.): """Convert an array of points to an array of constant size boxes. Args: x (numpy.ndarray): An array of points. width (float): The output boxes' width. height (float): The output boxes' width. Returns: numpy.ndarray: An array of boxes. """ x = enforce_point(x) coordinates = get_coordinates(x) return box(coordinates[:, 0] - (width / 2), coordinates[:, 1] - (height / 2), coordinates[:, 0] + (width / 2), coordinates[:, 1] + (height / 2))
def convert_to_bounding_box(input_array, trim_invalid_geometry=False, autocorrect_invalid_geometry=False): r"""Convert an input array to a BoundingBox array. Args: input_array (ndarray, list): A ndarray of BoundingBox optionally followed by a confidence value and/or a label where each row is: ``[xmin, ymin, xmax, ymax, (confidence), (label)]`` trim_invalid_geometry (bool): Optional, default to ``False``. If set to ``True`` conversion will ignore invalid geometries and leave them out of ``output_array``. This means that the function will return an array where ``output_array.shape[0] <= input_array.shape[0]``. If set to ``False``, an invalid geometry will raise an :exc:`~playground_metrics.utils.geometry_utils.InvalidGeometryError`. autocorrect_invalid_geometry (Bool): Optional, default to ``False``. Doesn't do anything, introduced to unify convert functions interfaces. Returns: ndarray: A BoundingBox ndarray where each row contains a geometry followed by optionally confidence and a label e.g.: ``[BoundingBox, (confidence), (label)]`` Raises: ValueError: If ``input_array`` have invalid dimensions. """ input_array = np.array(input_array, dtype=np.dtype('O')) if input_array.size == 0: return 'undefined', input_array if len(input_array.shape) == 1 or len(input_array.shape) > 2: raise ValueError('Invalid array number of dimensions: ' 'Expected a 2D array, found {}D.'.format( len(input_array.shape))) coordinates_array = input_array[:, :4].astype(np.float64) object_array = np.ndarray((input_array.shape[0], input_array.shape[1] - 3), dtype=np.dtype('O')) object_array[:, 0] = box(coordinates_array[:, 0], coordinates_array[:, 1], coordinates_array[:, 2], coordinates_array[:, 3]) object_array[:, 1:] = input_array[:, 4:] if trim_invalid_geometry: object_array = object_array[is_valid(object_array[:, 0]), :] return object_array
def get_sample(self, n_obs=None, frac=None, method="first", bbox=None, random_state=None): """Creates a sample of the dataframe. Parameters: n_obs (int): The number of features contained in the sample. frac (float): The fraction of the total number of features contained in the sample. It overrides n_obs. method (string): The method it will be used to extract the sample. One of: first, last, random. bbox (list): The desired bounding box of the sample. random_state (int): Seed or RandomState for reproducability, when None a random seed it chosen. Returns: (object): A sample dataframe. """ df = self.df if bbox is not None: if not self._has_geometry: warnings.warn('DataFrame is not spatial.') else: df = self.df.within(pg.box(*bbox)) length = len(df) if n_obs is None and frac is None: n_obs = min(round(0.05 * length), 100000) if (method == "first"): if frac is not None: n_obs = round(frac * length) sample = df.head(n_obs) elif (method == "random"): sample = df.sample(n=n_obs, frac=frac, random_state=random_state) elif (method == "last"): if frac is not None: n_obs = round(frac * length) sample = df.tail(n_obs) else: raise Exception('ERROR: Method %s not supported' % (method)) return sample
def window(geometries, distance): """Return windows around geometries bounds +/- distance Parameters ---------- geometries : Series or ndarray geometries to window distance : number or ndarray radius of window if ndarry, must match length of geometries Returns ------- Series or ndarray polygon windows """ minx, miny, maxx, maxy = pg.bounds(geometries).T windows = pg.box(minx - distance, miny - distance, maxx + distance, maxy + distance) if isinstance(geometries, pd.Series): return pd.Series(windows, index=geometries.index) return windows
def time_box(self): pygeos.box(*np.hstack([self.coords, self.coords + 100]).T)
import numpy as np import pygeos point_polygon_testdata = ( pygeos.points(np.arange(6), np.arange(6)), pygeos.box(2, 2, 4, 4), ) point = pygeos.points(2, 3) line_string = pygeos.linestrings([(0, 0), (1, 0), (1, 1)]) linear_ring = pygeos.linearrings([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]) polygon = pygeos.polygons([(0, 0), (2, 0), (2, 2), (0, 2), (0, 0)]) multi_point = pygeos.multipoints([(0, 0), (1, 2)]) multi_line_string = pygeos.multilinestrings([[(0, 0), (1, 2)]]) multi_polygon = pygeos.multipolygons([ [(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)], [(2.1, 2.1), (2.2, 2.1), (2.2, 2.2), (2.1, 2.2), (2.1, 2.1)], ]) geometry_collection = pygeos.geometrycollections( [pygeos.points(51, -1), pygeos.linestrings([(52, -1), (49, 2)])]) point_z = pygeos.points(1.0, 1.0, 1.0) polygon_with_hole = pygeos.Geometry( "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0), (2 2, 2 4, 4 4, 4 2, 2 2))") all_types = ( point, line_string, linear_ring, polygon, multi_point, multi_line_string,
def test_box_multiple(): actual = pygeos.box(0, 0, [1, 2], [1, 2]) assert str(actual[0]) == "POLYGON ((1 0, 1 1, 0 1, 0 0, 1 0))" assert str(actual[1]) == "POLYGON ((2 0, 2 2, 0 2, 0 0, 2 0))"
def test_box(): actual = pygeos.box(0, 0, 1, 1) assert str(actual) == "POLYGON ((1 0, 1 1, 0 1, 0 0, 1 0))"
async def create_custom_report(ctx, zip_filename, dataset, layer, name=""): """Create a Blueprint report for a user-uploaded GIS file contained in a zip. Zip must contain either a shapefile or a file geodatabase. Parameters ---------- ctx : job context zip_filename : str full path to zip filename dataset : str full path to dataset within zip file layer : str name of layer within dataset name : str, optional (default: "") Name of area of interest (included in output report) Returns ------- str path to output file Raises ------ DataError Raised if bounds are too large or if area of interest doesn't overalap SA region """ errors = [] await set_progress(ctx["job_id"], 0, "Loading data") path = f"/vsizip/{zip_filename}/{dataset}" df = read_dataframe(path, layer=layer) geometry = pg.make_valid(df.geometry.values.data) await set_progress(ctx["job_id"], 5, "Preparing area of interest") # dissolve geometry = np.asarray([pg.union_all(geometry)]) geo_geometry = to_crs(geometry, df.crs, GEO_CRS) bounds = pg.total_bounds(geo_geometry) # estimate area extent_area = ( pg.area(pg.box(*pg.total_bounds(to_crs(geometry, df.crs, DATA_CRS)))) * M2_ACRES) if extent_area >= CUSTOM_REPORT_MAX_ACRES: raise DataError( f"The bounding box of your area of interest is too large ({extent_area:,.0f} acres), it must be < {CUSTOM_REPORT_MAX_ACRES:,.0f} acres." ) await set_progress(ctx["job_id"], 10, "Calculating results (this might take a while)") # calculate results, data must be in DATA_CRS print("Calculating results...") results = CustomArea(geometry, df.crs, name).get_results() if results is None: raise DataError( "area of interest does not overlap Southeast Blueprint") if name: results["name"] = name has_urban = "proj_urban" in results and results["proj_urban"][4] > 0 has_slr = "slr" in results has_ownership = "ownership" in results has_protection = "protection" in results # compile indicator IDs across all inputs indicators = [] for input_area in results["inputs"]: for ecosystem in input_area.get("ecosystems", []): indicators.extend([i["id"] for i in ecosystem["indicators"]]) await set_progress(ctx["job_id"], 25, "Creating maps (this might take a while)") print("Rendering maps...") maps, scale, map_errors = await render_maps( bounds, geometry=geo_geometry[0], input_ids=results["input_ids"], indicators=indicators, urban=has_urban, slr=has_slr, ownership=has_ownership, protection=has_protection, ) if map_errors: log.error(f"Map rendering errors: {map_errors}") if "basemap" in map_errors: errors.append("Error creating basemap for all maps") if "aoi" in map_errors: errors.append("Error rendering area of interest on maps") if set(map_errors.keys()).difference(["basemap", "aoi"]): errors.append("Error creating one or more maps") await set_progress(ctx["job_id"], 75, "Creating PDF (this might take a while)", errors=errors) results["scale"] = scale pdf = create_report(maps=maps, results=results) await set_progress(ctx["job_id"], 95, "Nearly done", errors=errors) fp, name = tempfile.mkstemp(suffix=".pdf", dir=TEMP_DIR) with open(fp, "wb") as out: out.write(pdf) await set_progress(ctx["job_id"], 100, "All done!", errors=errors) log.debug(f"Created PDF at: {name}") return name, errors
assert tree.query(None).size == 0 def test_query_empty(tree): assert tree.query(empty).size == 0 @pytest.mark.parametrize( "geometry,expected", [ # points do not intersect (pygeos.points(0.5, 0.5), []), # points intersect (pygeos.points(1, 1), [1]), # box contains points (box(0, 0, 1, 1), [0, 1]), # box contains points (box(5, 5, 15, 15), [5, 6, 7, 8, 9]), # envelope of buffer contains points (pygeos.buffer(pygeos.points(3, 3), 1), [2, 3, 4]), # envelope of points contains points (pygeos.multipoints([[5, 7], [7, 5]]), [5, 6, 7]), ], ) def test_query_points(tree, geometry, expected): assert_array_equal(tree.query(geometry), expected) @pytest.mark.parametrize( "geometry,expected", [