def main(): args = gen_args() if not args.geojson: raise ValueError("No geojson file specified") if not args.outfile: raise ValueError("No output file specified") print('Using tiling grid ' + args.grid) with fsspec.open(args.geojson) as fhin: data = json.load(fhin) geom = Geometry(data["features"][0]["geometry"], crs=data["crs"]["properties"]["name"]) africa = GRIDS[args.grid] task_df = pd.read_csv(args.task_csv) aez_tasks = [] for row in task_df.itertuples(): tmp_geom = africa.tile_geobox((row.X, row.Y)).extent if geom.contains(tmp_geom) or geom.overlaps(tmp_geom): aez_tasks.append(row) output_df = pd.DataFrame(aez_tasks) output_df.to_csv(args.outfile, index=False) print("Generated " + str(len(output_df)) + " tasks from geojson") if args.publish: tasks_slices = gen_slices(output_df) publish_task(tasks_slices, args.db, args.sqs)
def geom_from_file(filename, feature_id): """ The geometry of a feature :param filename: name of shape file :param feature_id: the id of the wanted feature """ import fiona with fiona.open(filename) as input_region: geom_list = [] geopolygon_list = [] feature_list = [] crs = CRS(input_region.crs_wkt) for feature in input_region: if feature_id is not None and feature_id != {}: if feature['properties']['ID'] in feature_id: geom = feature['geometry'] return feature['properties'], geom, input_region.crs_wkt, Geometry(geom, crs) else: geom = feature['geometry'] feature_list.append(feature['properties']) geom_list.append(geom) geopolygon_list.append(Geometry(geom, crs)) return feature_list, geom_list, input_region.crs_wkt, geopolygon_list _LOG.info("No geometry found")
def geom_from_file(filename, feature_id): """ The geometry of a feature :param filename: name of shape file :param feature_id: the id of the wanted feature """ import fiona geometry_list = [] geopolygon_list = [] feature_list = [] find_feature = False with fiona.open(filename) as input_region: crs = CRS(input_region.crs_wkt) for feature in input_region: find_feature = False properties = feature['properties'] if feature_id is None or properties.get( 'ID') in feature_id or properties.get('id') in feature_id: feature_list.append(properties) find_feature = True if int(feature.get('id')) in feature_id: feature_list.append(feature) find_feature = True if find_feature: geometry = feature['geometry'] geopolygon = Geometry(geometry, crs) geometry_list.append(geometry) geopolygon_list.append(geopolygon) if not geometry_list: _LOG.info("No geometry found") return feature_list, geometry_list, input_region.crs_wkt, geopolygon_list
def tiles_from_geopolygon( self, geopolygon: geometry.Geometry, tile_buffer: Optional[Tuple[float, float]] = None, geobox_cache: Optional[dict] = None ) -> Iterator[Tuple[Tuple[int, int], geometry.GeoBox]]: """ Returns an iterator of tile_index, :py:class:`GeoBox` tuples across the grid and overlapping with the specified `geopolygon`. .. note:: Grid cells are referenced by coordinates `(x, y)`, which is the opposite to the usual CRS dimension order. :param geometry.Geometry geopolygon: Polygon to tile :param tile_buffer: Optional <float,float> tuple, (extra padding for the query in native units of this GridSpec) :param dict geobox_cache: Optional cache to re-use geoboxes instead of creating new one each time :return: iterator of grid cells with :py:class:`GeoBox` tiles """ geopolygon = geopolygon.to_crs(self.crs) bbox = geopolygon.boundingbox bbox = bbox.buffered(*tile_buffer) if tile_buffer else bbox for tile_index, tile_geobox in self.tiles(bbox, geobox_cache): tile_geobox = tile_geobox.buffered( *tile_buffer) if tile_buffer else tile_geobox if geometry.intersects(tile_geobox.extent, geopolygon): yield (tile_index, tile_geobox)
def boundary_geo_polygon(geometry, crs): import shapely.ops from shapely.geometry import shape, mapping joined = shapely.ops.unary_union(list(shape(geom) for geom in geometry)) final = joined.convex_hull boundary_polygon = Geometry(mapping(final), crs) return boundary_polygon
def __call__(self, product, time, group_by) -> Tile: # Do for a specific poly whose boundary is known output_crs = CRS(self.storage['crs']) filtered_item = [ 'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y' ] filtered_dict = { k: v for k, v in filter(lambda t: t[0] in filtered_item, self.input_region.items()) } if 'feature_id' in self.input_region: filtered_dict['geopolygon'] = Geometry( self.input_region['geom_feat'], CRS(self.input_region['crs_txt'])) geopoly = filtered_dict['geopolygon'] else: geopoly = query_geopolygon(**self.input_region) datasets = self.dc.find_datasets(product=product, time=time, group_by=group_by, **filtered_dict) group_by = query_group_by(group_by=group_by) sources = self.dc.group_datasets(datasets, group_by) output_resolution = [ self.storage['resolution'][dim] for dim in output_crs.dimensions ] geopoly = geopoly.to_crs(output_crs) geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution) return Tile(sources, geobox)
def footprint_wgs84(self) -> Optional[MultiPolygon]: if not self.footprint_geometry: return None if not self.footprint_crs: warnings.warn(f"Geometry without a crs for {self}") return None return (Geometry(self.footprint_geometry, crs=self.footprint_crs).to_crs( "EPSG:4326", wrapdateline=True).geom)
def eo3_lonlat_bbox(doc, tol=None): epsg4326 = CRS('epsg:4326') crs = CRS(doc['crs']) grids = doc['grids'] geometry = doc.get('geometry') if geometry is None: return bbox_union( grid2polygon(grid, crs).to_crs(epsg4326, tol).boundingbox for grid in grids.values()) else: return Geometry(geometry, crs).to_crs(epsg4326, tol).boundingbox
def transform_geojson_wgs_to_epsg(geojson, EPSG): """ Takes a geojson dictionary and converts it from WGS84 (EPSG:4326) to desired EPSG Parameters ---------- geojson: dict a geojson dictionary containing a 'geometry' key, in WGS84 coordinates EPSG: int numeric code for the EPSG coordinate referecnce system to transform into Returns ------- transformed_geojson: dict a geojson dictionary containing a 'coordinates' key, in the desired CRS """ gg = Geometry(geojson['geometry'], CRS('epsg:4326')) gg = gg.to_crs(CRS(f'epsg:{EPSG}')) return gg.__geo_interface__
def boundary_polygon_from_file(filename: str) -> Geometry: # TODO: This should be refactored and moved into datacube.utils.geometry import shapely.ops from shapely.geometry import shape, mapping with fiona.open(filename) as input_region: joined = shapely.ops.unary_union( list(shape(geom['geometry']).buffer(0) for geom in input_region)) final = joined.convex_hull crs = CRS(input_region.crs_wkt) boundary_polygon = Geometry(mapping(final), crs) return boundary_polygon
def _get_shape(geometry: WKBElement, crs) -> Optional[Geometry]: """ Our shapes are valid in the db, but can become invalid on reprojection. We buffer if needed. Eg invalid. 32baf68c-7d91-4e13-8860-206ac69147b0 (the tests reproduce this error.... but it may be machine/environment dependent?) """ if geometry is None: return None shape = Geometry(to_shape(geometry), crs).to_crs("EPSG:4326", wrapdateline=True) if not shape.is_valid: newshape = shape.buffer(0) assert math.isclose( shape.area, newshape.area, abs_tol=0.0001 ), f"{shape.area} != {newshape.area}" shape = newshape return shape
def geom_from_bbox(bbox, crs="EPSG:4326"): geojson = { "type": "Polygon", "coordinates": [[ bbox.points[0], bbox.points[1], bbox.points[3], bbox.points[2], bbox.points[0], ]], } return Geometry(geojson, crs=crs)
def test_wofs(): catalog = read_process_catalog("datacube-wps-config.yaml") wofs = [entry for entry in catalog if isinstance(entry, WOfSDrill)][0] point = Geometry( { "type": "Point", "coordinates": [137.01475095074406, -28.752777955850917, 0] }, crs=CRS("EPSG:4326"), ) results = wofs.query_handler(time="2000", feature=point) assert "data" in results assert "chart" in results
def select_task_generator(input_region, storage, filter_product): if input_region is None or input_region == {}: _LOG.info( 'No input_region specified. Generating full available spatial region, gridded files.' ) return GriddedTaskGenerator(storage) elif 'geometry' in input_region: # Larger spatial region # A large, multi-tile input region, specified as geojson. Output will be individual tiles. geometry = Geometry(input_region['geometry'], CRS('EPSG:4326')) # GeoJSON is always 4326 return GriddedTaskGenerator(storage, geopolygon=geometry, tile_indexes=input_region.get('tiles')) elif 'tile' in input_region: # For one tile return GriddedTaskGenerator(storage, tile_indexes=[input_region['tile']]) elif 'tiles' in input_region: # List of tiles return GriddedTaskGenerator(storage, tile_indexes=input_region['tiles']) elif 'from_file' in input_region: _LOG.info('Input spatial region specified by file: %s', input_region['from_file']) if 'feature_id' in input_region or input_region.get( 'gridded') is False: _LOG.info('Generating tasks based on feature polygons.') features = features_from_file(input_region['from_file'], input_region.get('feature_id')) return NonGriddedTaskGenerator(input_region=input_region, filter_product=filter_product, features=features, storage=storage) else: _LOG.info('Generating tasks based on grid.') geometry = boundary_polygon_from_file(input_region['from_file']) return GriddedTaskGenerator(storage, geopolygon=geometry) else: _LOG.info( 'Generating statistics for an ungridded `input region`. Output as a single file.' ) return NonGriddedTaskGenerator(input_region=input_region, storage=storage, filter_product=filter_product)
def eo3_lonlat_bbox(doc, tol=None): epsg4326 = CRS('epsg:4326') crs = doc.get('crs') grids = doc.get('grids') if crs is None or grids is None: raise ValueError("Input must have crs and grids") crs = CRS(crs) geometry = doc.get('geometry') if geometry is None: return bbox_union( grid2polygon(grid, crs).to_crs(epsg4326, tol).boundingbox for grid in grids.values()) else: return Geometry(geometry, crs).to_crs(epsg4326, tol).boundingbox
def simplify_geom(geom_in, crs="EPSG:4326"): geom = geom_in # Pick biggest polygon from multipolygon if geom.type == "MultiPolygon": geom = max(geom, key=lambda x: x.area) # Triangulate rawtriangles = list(triangulate(geom.geom)) triangles = list( filter( lambda x: geom_in.geom.contains(x.representative_point()) and x. area / geom.area > 0.1, rawtriangles, )) geom = unary_union(triangles) if geom.type == "MultiPolygon": geom = max(geom, key=lambda x: x.area) return Geometry(geom, crs=crs)
def footprint_wgs84(self) -> Optional[MultiPolygon]: if not self.footprint_geometry: return None if not self.footprint_crs: warnings.warn(f"Geometry without a crs for {self}") return None _crs = self.footprint_crs try: # If defined, EPSG is customized _crs = CustomCRSConfigHandlerSingleton( ).get_crs_definition_from_custom_epsg(self.footprint_crs.lower()) except RuntimeError as re: _LOG.warn(str(re)) return (Geometry(self.footprint_geometry, _crs) # crs=self.footprint_crs) .to_crs("EPSG:4326", wrapdateline=True).geom)
def test_fc(): catalog = read_process_catalog("datacube-wps-config.yaml") fc = [entry for entry in catalog if isinstance(entry, FCDrill)][0] poly = Geometry( { "type": "Polygon", "coordinates": [[ (147.28271484375003, -35.89238773935897), (147.03277587890628, -35.663990911348115), (146.65237426757815, -35.90684930677119), (147.09182739257815, -36.15894422111004), (147.28271484375003, -35.89238773935897), ]], }, crs=CRS("EPSG:4326"), ) results = fc.query_handler(time=("2019-03-05", "2019-07-10"), feature=poly) assert "data" in results assert "chart" in results
def test_mangrove(): catalog = read_process_catalog("datacube-wps-config.yaml") fc = [entry for entry in catalog if isinstance(entry, MangroveDrill)][0] poly = Geometry( { "type": "Polygon", "coordinates": [[ (143.98956298828125, -14.689881366618762), (144.26422119140625, -14.689881366618762), (144.26422119140625, -14.394778454856146), (143.98956298828125, -14.394778454856146), (143.98956298828125, -14.689881366618762), ]], }, crs=CRS("EPSG:4326"), ) results = fc.query_handler(time=("2000", "2005"), feature=poly) assert "data" in results assert "chart" in results
def _parse_geom(request_json): features = request_json["features"] if len(features) < 1: # can't drill if there is no geometry raise ProcessError("no features specified") if len(features) > 1: # do we need multipolygon support here? raise ProcessError("multiple features specified") feature = features[0] if hasattr(request_json, "crs"): crs = CRS(request_json["crs"]["properties"]["name"]) elif hasattr(feature, "crs"): crs = CRS(feature["crs"]["properties"]["name"]) else: # http://geojson.org/geojson-spec.html#coordinate-reference-system-objects crs = CRS("urn:ogc:def:crs:OGC:1.3:CRS84") return Geometry(feature["geometry"], crs)
def cal_mean_std(query_poly): landsat_yaml = 'nbart_ld.yaml' with open(landsat_yaml, 'r') as f: recipe = yaml.safe_load(f) landsat_product = construct(**recipe) query = {'time': ('1987-01-01', '2000-01-01')} location = {'geopolygon': Geometry(query_poly, CRS("EPSG:3577"))} query.update(location) dc = Datacube() datasets = landsat_product.query(dc, **query) grouped = landsat_product.group(datasets, **query) _LOG.debug("datasets %s", grouped) mask = generate_raster([(query_poly, 1)], grouped.geobox) coastline_mask = clip_coastline(grouped.geobox) mask[coastline_mask == 0] = 0 _LOG.debug("mask size %s none zero %s", mask.size, np.count_nonzero(mask)) if np.count_nonzero(mask) == 0: return [], [] darkest_mean = [] time_mark = [] future_list = [] with MPIPoolExecutor() as executor: for i in range(grouped.box.time.shape[0]): time_slice = VirtualDatasetBox(grouped.box.sel(time=grouped.box.time.data[i:i+1]), grouped.geobox, grouped.load_natively, grouped.product_definitions, grouped.geopolygon) future = executor.submit(load_cal, landsat_product, time_slice, mask) future_list.append(future) for future in future_list: r = future.result() if r[1] is not None: _LOG.debug("darkest time %s", r[0]) _LOG.debug("darkest mean %s", r[1]) time_mark.append(r[0]) darkest_mean.append(r[1]) return time_mark, darkest_mean
def eo3_lonlat_bbox(doc: Dict[str, Any], resolution: Optional[float] = None) -> BoundingBox: """ Compute bounding box in Lon/Lat for a given EO3 document. """ crs = doc.get('crs') grids = doc.get('grids') if crs is None or grids is None: raise ValueError("Input must have crs and grids") crs = CRS(crs) geom = doc.get('geometry', None) if geom is not None: geom = Geometry(geom, crs) return lonlat_bounds(geom, resolution=resolution) bounds = [ lonlat_bounds(grid2polygon(grid, crs), resolution=resolution) for grid in grids.values() ] return bbox_union(bounds)
def post_processing( data: xr.Dataset, predicted: xr.Dataset, config: FeaturePathConfig, geobox_used: GeoBox, ) -> xr.DataArray: """ filter prediction results with post processing filters. :param data: raw data with all features to run prediction :param predicted: The prediction results :param config: FeaturePathConfig configureation :param geobox_used: Geobox used to generate the prediciton feature :return: only predicted binary class label """ # post prediction filtering predict = predicted.Predictions query = config.query.copy() # Update dc query with geometry # geobox_used = self.geobox_dict[(x, y)] query["geopolygon"] = Geometry(geobox_used.extent.geom, crs=geobox_used.crs) dc = Datacube(app=__name__) # mask with WOFS # wofs_query = query.pop("measurements") wofs = dc.load(product="ga_ls8c_wofs_2_summary", **query) wofs = wofs.frequency > 0.2 # threshold predict = predict.where(~wofs, 0) # mask steep slopes slope = data.slope > 35 predict = predict.where(~slope, 0) # mask where the elevation is above 3600m query.pop("time") elevation = dc.load(product="srtm", **query) elevation = elevation.elevation > 3600 predict = predict.where(~elevation.squeeze(), 0) return predict
def load_process_save_chunk(output_files: OutputDriver, chunk: Tuple[slice, slice, slice], task: StatsTask, timer: MultiTimer): try: with timer.time('loading_data'): data = load_data(chunk, task.sources) # mask as per geometry now if task.geom_feat: geom = Geometry(task.geom_feat, CRS(task.crs_txt)) data = data.where(geometry_mask([geom], data.geobox, invert=True)) # pylint: disable=protected-access if output_files._driver_name == 'None': output_files.get_source(chunk, data) last_idx = len(task.output_products) - 1 for idx, (prod_name, stat) in enumerate(task.output_products.items()): _LOG.debug("Computing %s in tile %s %s; %s", prod_name, task.tile_index, "({})".format(", ".join(prettier_slice(c) for c in chunk)), timer) measurements = stat.data_measurements with timer.time(prod_name): result = stat.compute(data) if idx == last_idx: # make sure input data is released early del data # restore nodata values back result = cast_back(result, measurements) # For each of the data variables, shove this chunk into the output results with timer.time('writing_data'): output_files.write_chunk(prod_name, chunk, result) except EmptyChunkException: _LOG.debug('Error: No data returned while loading %s for %s. May have all been masked', chunk, task)
def load_process_save_chunk(output_files: OutputDriver, chunk: Tuple[slice, slice, slice], task: StatsTask, timer: MultiTimer): try: with timer.time('loading_data'): data = load_data(chunk, task.sources) # mask as per geometry now if task.geom_feat: geom = Geometry(task.geom_feat, CRS(task.crs_txt)) data = data.where( geometry_mask([geom], data.geobox, invert=True)) last_idx = len(task.output_products) - 1 for idx, (prod_name, stat) in enumerate(task.output_products.items()): _LOG.info("Computing %s in tile %s %s. Current timing: %s", prod_name, task.tile_index, chunk, timer) measurements = stat.data_measurements with timer.time(prod_name): result = stat.compute(data) if idx == last_idx: # make sure input data is released early del data # restore nodata values back result = cast_back(result, measurements) # For each of the data variables, shove this chunk into the output results with timer.time('writing_data'): for var_name, var in result.data_vars.items( ): # TODO: Move this loop into output_files output_files.write_data(prod_name, var_name, chunk, var.values) except EmptyChunkException: _LOG.debug( 'Error: No data returned while loading %s for %s. May have all been masked', chunk, task)
def gwf_query(product, lat=None, long=None, region=None, begin=None, end=None, view=True): """Run a spatial query on a datacube product using either coordinates or a region name Wrapper function to call at the begining of nearly all spatial processing command lines Args: product (str): Name of an ingested datacube product. The product to query lat (tuple): OPtional. For coordinate based spatial query. Tuple of min and max latitudes in decimal degreees. long (tuple): OPtional. For coordinate based spatial query. Tuple of min and max longitudes in decimal degreees. region (str): Optional name of a region or country whose geometry is present in the database region or country table. Overrides lat and long when present (not None). Countries must be queried using ISO code (e.g.: 'MEX' for Mexico) begin (str): Date string in the form '%Y-%m-%d'. For temporally bounded queries end (str): Date string in the form '%Y-%m-%d'. For temporally bounded queries view (bool): Returns a view instead of the dictionary returned by ``GridWorkflow.list_cells``. Useful when the output is be used directly as an iterable (e.g. in ``distributed.map``) Default to True Returns: dict or view: Dictionary (view) of Tile index, Tile key value pair Example: >>> from madmex.wrappers import gwf_query >>> # Using region name, time unbounded >>> tiles_list = gwf_query(product='ls8_espa_mexico', region='Jalisco') >>> # Using region name, time windowed >>> tiles_list = gwf_query(product='ls8_espa_mexico', region='Jalisco', ... begin = '2017-01-01', end='2017-03-31') >>> # Using lat long box, time windowed >>> tiles_list = gwf_query(product='ls8_espa_mexico', lat=[19, 22], long=[-104, -102], ... begin = '2017-01-01', end='2017-03-31') """ query_params = {'product': product} if region is not None: # Query database and build a datacube.utils.Geometry(geopolygon) try: query_set = Country.objects.get(name=region) except Country.DoesNotExist: query_set = Region.objects.get(name=region) region_json = json.loads(query_set.the_geom.geojson) crs = CRS('EPSG:%d' % query_set.the_geom.srid) geom = Geometry(region_json, crs) query_params.update(geopolygon=geom) elif lat is not None and long is not None: query_params.update(x=long, y=lat) else: raise ValueError('Either a region name or a lat and long must be provided') if begin is not None and end is not None: begin = datetime.strptime(begin, "%Y-%m-%d") end = datetime.strptime(end, "%Y-%m-%d") query_params.update(time=(begin, end)) # GridWorkflow object dc = datacube.Datacube() gwf = GridWorkflow(dc.index, product=product) tile_dict = gwf.list_cells(**query_params) # Iterable (dictionary view (analog to list of tuples)) if view: tile_dict = tile_dict.items() return tile_dict
class FakeDataset: extent = Geometry(BIG_POLYGON, crs=CRS('EPSG:4326')) center_time = object() crs = CRS('EPSG:4326')
def geopolygon(self): return Geometry(self.geometry, self.crs)
def mv_search_datasets(index, sel=MVSelectOpts.IDS, times=None, layer=None, geom=None, mask=False): """ Perform a dataset query via the space_time_view :param layer: A ows_configuration.OWSNamedLayer object (single or multiproduct) :param index: A datacube index (required) :param sel: Selection mode - a MVSelectOpts enum. Defaults to IDS. :param times: A list of pairs of datetimes (with time zone) :param geom: A datacube.utils.geometry.Geometry object :param mask: Bool, if true use the flags product of layer :return: See MVSelectOpts doc """ engine = get_sqlalc_engine(index) stv = st_view if layer is None: raise Exception("Must filter by product/layer") if mask: prod_ids = [p.id for p in layer.pq_products] else: prod_ids = [p.id for p in layer.products] s = select(sel.sel(stv)).where(stv.c.dataset_type_ref.in_(prod_ids)) if times is not None: s = s.where( or_(*[ stv.c.temporal_extent.op("&&")(DateTimeTZRange(*t)) for t in times ])) orig_crs = None if geom is not None: orig_crs = geom.crs if str(geom.crs) != "EPSG:4326": geom = geom.to_crs("EPSG:4326") geom_js = json.dumps(geom.json) s = s.where(stv.c.spatial_extent.intersects(geom_js)) # print(s) # Print SQL Statement conn = engine.connect() if sel == MVSelectOpts.ALL: return conn.execute(s) if sel == MVSelectOpts.IDS: return [r[0] for r in conn.execute(s)] if sel in (MVSelectOpts.COUNT, MVSelectOpts.EXTENT): for r in conn.execute(s): if sel == MVSelectOpts.COUNT: return r[0] if sel == MVSelectOpts.EXTENT: geojson = r[0] if geojson is None: return None uniongeom = ODCGeom(json.loads(geojson), crs="EPSG:4326") if geom: intersect = uniongeom.intersection(geom) if orig_crs and orig_crs != "EPSG:4326": intersect = intersect.to_crs(orig_crs) else: intersect = uniongeom return intersect if sel == MVSelectOpts.DATASETS: return index.datasets.bulk_get([r[0] for r in conn.execute(s)]) assert False
def extract_geometry(state): from datacube.utils.geometry import Geometry from datacube.testutils.geom import epsg4326 return Geometry(state.selection, epsg4326)