def test_path_unicode(): try: upolygons = unicode(polygons) except NameError: # python3, it's already unicode upolygons = polygons assert list(read_features(upolygons)) == target_features
def test_geointerface(): class MockGeo(object): def __init__(self, features): self.__geo_interface__ = { 'type': "FeatureCollection", 'features': features} # Make it iterable just to ensure that geo interface # takes precendence over iterability def __iter__(self): pass def __next__(self): pass def next(self): pass features = [{ "type": "Feature", "properties": {}, "geometry": { "type": "Point", "coordinates": [0, 0]} }, { "type": "Feature", "properties": {}, "geometry": { "type": "Polygon", "coordinates": [[[-50, -10], [-40, 10], [-30, -10], [-50, -10]]]}}] geothing = MockGeo(features) assert list(read_features(geothing)) == features
def test_geodataframe(): try: import geopandas as gpd df = gpd.read_file(polygons) if not hasattr(df, '__geo_interface__'): pytest.skip("This version of geopandas doesn't support df.__geo_interface__") except ImportError: pytest.skip("Can't import geopands") assert list(read_features(df))
def test_geodataframe(): try: import geopandas as gpd df = gpd.GeoDataFrame.from_file(polygons) if not hasattr(df, '__geo_interface__'): pytest.skip("This version of geopandas doesn't support df.__geo_interface__") except ImportError: pytest.skip("Can't import geopands") assert read_features(df)
def test_direct_features_collections(): polygons = os.path.join(DATA, 'polygons.shp') features = read_features(polygons) collection = read_featurecollection(polygons) stats_direct = zonal_stats(polygons, raster) stats_features = zonal_stats(features, raster) stats_collection = zonal_stats(collection, raster) assert stats_direct == stats_features == stats_collection
def zonalstats(input_geojson, raster, output_geojson, all_touched, band, categorical, global_src_extent, indent, info, nodata, prefix, stats): '''zonalstats generates summary statistics of geospatial raster datasets based on vector features. The input and output arguments of zonalstats should be valid GeoJSON FeatureCollections. The output GeoJSON will be mostly unchanged but have additional properties per feature describing the summary statistics (min, max, mean, etc.) of the underlying raster dataset. The input and output arguments default to stdin and stdout but can also be file paths. The raster is specified by the required -r/--raster argument. Example, calculate rainfall stats for each state and output to file: \b zonalstats states.geojson -r rainfall.tif > mean_rainfall_by_state.geojson ''' if info: logging.basicConfig(level=logging.INFO) mapping = json.loads(input_geojson.read()) input_geojson.close() try: if mapping['type'] == "FeatureCollection": feature_collection = mapping else: feature_collection = {'type': 'FeatureCollection'} features = read_features(mapping) except (AssertionError, KeyError): raise ValueError("input_geojson must be a GeoJSON Feature Collection") if stats is not None: stats = stats.split(" ") if 'all' in [x.lower() for x in stats]: stats = "ALL" zonal_results = zonal_stats(features, raster, all_touched=all_touched, band_num=band, categorical=categorical, global_src_extent=global_src_extent, nodata_value=nodata, stats=stats, copy_properties=False) feature_collection['features'] = list( combine_features_results(features, zonal_results, prefix)) output_geojson.write(json.dumps(feature_collection, indent=indent)) output_geojson.write("\n")
def zonalstats(input_geojson, raster, output_geojson, all_touched, band, categorical, global_src_extent, indent, info, nodata, prefix, stats): '''zonalstats generates summary statistics of geospatial raster datasets based on vector features. The input and output arguments of zonalstats should be valid GeoJSON FeatureCollections. The output GeoJSON will be mostly unchanged but have additional properties per feature describing the summary statistics (min, max, mean, etc.) of the underlying raster dataset. The input and output arguments default to stdin and stdout but can also be file paths. The raster is specified by the required -r/--raster argument. Example, calculate rainfall stats for each state and output to file: \b zonalstats states.geojson -r rainfall.tif > mean_rainfall_by_state.geojson ''' if info: logging.basicConfig(level=logging.INFO) mapping = json.loads(input_geojson.read()) input_geojson.close() try: if mapping['type'] == "FeatureCollection": feature_collection = mapping else: feature_collection = {'type': 'FeatureCollection'} features = read_features(mapping) except (AssertionError, KeyError): raise ValueError("input_geojson must be a GeoJSON Feature Collection") if stats is not None: stats = stats.split(" ") if 'all' in [x.lower() for x in stats]: stats = "ALL" zonal_results = zonal_stats( features, raster, all_touched=all_touched, band_num=band, categorical=categorical, global_src_extent=global_src_extent, nodata_value=nodata, stats=stats, copy_properties=False) feature_collection['features'] = list( combine_features_results(features, zonal_results, prefix)) output_geojson.write(json.dumps(feature_collection, indent=indent)) output_geojson.write("\n")
def rasterize_shapefile(shapes, raster, band=1, layer=0, all_touched=False): data = raster.raster geot = raster.geot nodata = raster.nodata_value affine = Affine.from_gdal(*geot) rast = Raster(data, affine, nodata, band) out = np.zeros(data.shape, dtype=np.bool) features_iter = read_features(shapes, layer) for _, feat in enumerate(features_iter): geom = shape(feat['geometry']) if 'Point' in geom.type: geom = boxify_points(geom, rast) # rasterized geometry rv_array = rasterize_geom(geom, like=rast, all_touched=all_touched) out |= rv_array return out
def pointquery(input_geojson, raster, output_geojson, band, indent, nodata, interpolate, property_name): """ Queries the raster values at the points of the input GeoJSON Features. The raster values are added to the features properties and output as GeoJSON Feature Collection. If the Features are Points, the point geometery is used. For other Feauture types, all of the verticies of the geometry will be queried. For example, you can provide a linestring and get the profile along the line if the verticies are spaced properly. You can use either bilinear (default) or nearest neighbor interpolation. """ mapping = json.loads(input_geojson.read()) input_geojson.close() try: if mapping['type'] == "FeatureCollection": feature_collection = mapping else: feature_collection = {'type': 'FeatureCollection'} features = read_features(mapping) except (AssertionError, KeyError): raise ValueError("input_geojson must be valid GeoJSON") results = point_query( features, raster, band=band, nodata=nodata, interpolate=interpolate, property_name=property_name, geojson_out=True) feature_collection['features'] = results output_geojson.write(json.dumps(feature_collection, indent=indent)) output_geojson.write("\n")
def test_geointerface(): class MockGeo(object): def __init__(self, features): self.__geo_interface__ = { 'type': "FeatureCollection", 'features': features } # Make it iterable just to ensure that geo interface # takes precendence over iterability def __iter__(self): pass def __next__(self): pass def next(self): pass features = [{ "type": "Feature", "properties": {}, "geometry": { "type": "Point", "coordinates": [0, 0] } }, { "type": "Feature", "properties": {}, "geometry": { "type": "Polygon", "coordinates": [[[-50, -10], [-40, 10], [-30, -10], [-50, -10]]] } }] geothing = MockGeo(features) assert list(read_features(geothing)) == features
def gen_zonal_stats(vectors, raster, layer=0, band=1, nodata=None, affine=None, stats=None, all_touched=True, percent_cover_selection=None, percent_cover_weighting=True, percent_cover_scale=20, categorical=False, category_map=None, add_stats=None, zone_func=None, raster_out=False, prefix=None, geojson_out=False, **kwargs): """Zonal statistics of raster values aggregated to vector geometries. Parameters vectors: path to an vector source or geo-like python objects raster: ndarray or path to a GDAL raster source If ndarray is passed, the ``affine`` kwarg is required. layer: int or string, optional If `vectors` is a path to an fiona source, specify the vector layer to use either by name or number. defaults to 0 band: int, optional If `raster` is a GDAL source, the band number to use (counting from 1). defaults to 1. nodata: float, optional If `raster` is a GDAL source, this value overrides any NODATA value specified in the file's metadata. If `None`, the file's metadata's NODATA value (if any) will be used. defaults to `None`. affine: Affine instance required only for ndarrays, otherwise it is read from src stats: list of str, or space-delimited str, optional Which statistics to calculate for each zone. All possible choices are listed in ``utils.VALID_STATS``. defaults to ``DEFAULT_STATS``, a subset of these. all_touched: bool, optional Whether to include every raster cell touched by a geometry, or only those having a center point within the polygon. defaults to `False` percent_cover_selection: float, optional Include only raster cells that have at least the given percent covered by the vector feature. Requires percent_cover_scale argument be used to specify scale at which to generate percent coverage estimates percent_cover_weighting: bool, optional whether or not to use percent coverage of cells during calculations to adjust stats (only applies to mean, count and sum) percent_cover_scale: int, optional Scale used when generating percent coverage estimates of each raster cell by vector feature. Percent coverage is generated by rasterizing the feature at a finer resolution than the raster (based on percent_cover_scale value) then using a summation to aggregate to the raster resolution and dividing by the square of percent_cover_scale to get percent coverage value for each cell. Increasing percent_cover_scale will increase the accuracy of percent coverage values; three orders magnitude finer resolution (percent_cover_scale=1000) is usually enough to get coverage estimates with <1% error in individual edge cells coverage estimates, though much smaller values (e.g., percent_cover_scale=10) are often sufficient (<10% error) and require less memory. categorical: bool, optional category_map: dict A dictionary mapping raster values to human-readable categorical names. Only applies when categorical is True add_stats: dict with names and functions of additional stats to compute, optional zone_func: callable function to apply to zone ndarray prior to computing stats raster_out: boolean Include the masked numpy array for each feature?, optional Each feature dictionary will have the following additional keys: mini_raster_array: The clipped and masked numpy array mini_raster_affine: Affine transformation mini_raster_nodata: NoData Value prefix: string add a prefix to the keys (default: None) geojson_out: boolean Return list of GeoJSON-like features (default: False) Original feature geometry and properties will be retained with zonal stats appended as additional properties. Use with `prefix` to ensure unique and meaningful property names. Returns generator of dicts (if geojson_out is False) Each item corresponds to a single vector feature and contains keys for each of the specified stats. generator of geojson features (if geojson_out is True) GeoJSON-like Feature as python dict """ stats, run_count = check_stats(stats, categorical) # check inputs related to percent coverage percent_cover = False if percent_cover_weighting or percent_cover_selection is not None: percent_cover = True if percent_cover_scale is None: warnings.warn('No value for `percent_cover_scale` was given. ' 'Using default value of 10.') percent_cover_scale = 10 try: if percent_cover_scale != int(percent_cover_scale): warnings.warn('Value for `percent_cover_scale` given ({0}) ' 'was converted to int ({1}) but does not ' 'match original value'.format( percent_cover_scale, int(percent_cover_scale))) percent_cover_scale = int(percent_cover_scale) if percent_cover_scale <= 1: raise Exception( 'Value for `percent_cover_scale` must be ' 'greater than one ({0})'.format(percent_cover_scale)) except: raise Exception('Invalid value for `percent_cover_scale` ' 'provided ({0}). Must be type int.'.format( percent_cover_scale)) if percent_cover_selection is not None: try: percent_cover_selection = float(percent_cover_selection) except: raise Exception('Invalid value for `percent_cover_selection` ' 'provided ({0}). Must be able to be converted ' 'to a float.'.format(percent_cover_selection)) # if not all_touched: # warnings.warn('`all_touched` was not enabled but an option requiring ' # 'percent_cover calculations was selected. Automatically ' # 'enabling `all_touched`.') # all_touched = True with Raster(raster, affine, nodata, band) as rast: features_iter = read_features(vectors, layer) for _, feat in enumerate(features_iter): geom = shape(feat['geometry']) if 'Point' in geom.type: geom = boxify_points(geom, rast) percent_cover = False geom_bounds = tuple(geom.bounds) fsrc = rast.read(bounds=geom_bounds) if percent_cover: cover_weights = rasterize_pctcover_geom( geom, shape=fsrc.shape, affine=fsrc.affine, scale=percent_cover_scale, all_touched=all_touched) rv_array = cover_weights > (percent_cover_selection or 0) else: rv_array = rasterize_geom(geom, shape=fsrc.shape, affine=fsrc.affine, all_touched=all_touched) # nodata mask isnodata = (fsrc.array == fsrc.nodata) # add nan mask (if necessary) if np.issubdtype(fsrc.array.dtype, float) and \ np.isnan(fsrc.array.min()): isnodata = (isnodata | np.isnan(fsrc.array)) # Mask the source data array # mask everything that is not a valid value or not within our geom masked = np.ma.MaskedArray(fsrc.array, mask=(isnodata | ~rv_array)) # execute zone_func on masked zone ndarray if zone_func is not None: if not callable(zone_func): raise TypeError(('zone_func must be a callable ' 'which accepts function a ' 'single `zone_array` arg.')) zone_func(masked) if masked.compressed().size == 0: # nothing here, fill with None and move on feature_stats = dict([(stat, None) for stat in stats]) if 'count' in stats: # special case, zero makes sense here feature_stats['count'] = 0 else: if run_count: keys, counts = np.unique(masked.compressed(), return_counts=True) pixel_count = dict( zip([np.asscalar(k) for k in keys], [np.asscalar(c) for c in counts])) if categorical: feature_stats = dict(pixel_count) if category_map: feature_stats = remap_categories( category_map, feature_stats) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: if percent_cover: feature_stats['mean'] = float( np.sum(masked * cover_weights) / np.sum(~masked.mask * cover_weights)) else: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: if percent_cover: feature_stats['count'] = float( np.sum(~masked.mask * cover_weights)) else: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: if percent_cover: feature_stats['sum'] = float( np.sum(masked * cover_weights)) else: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float( np.median(masked.compressed())) if 'majority' in stats: feature_stats['majority'] = float( key_assoc_val(pixel_count, max)) if 'minority' in stats: feature_stats['minority'] = float( key_assoc_val(pixel_count, min)) if 'unique' in stats: feature_stats['unique'] = len(list(pixel_count.keys())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin for pctile in [ s for s in stats if s.startswith('percentile_') ]: q = get_percentile(pctile) pctarr = masked.compressed() feature_stats[pctile] = np.percentile(pctarr, q) if 'nodata' in stats: featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_array)) feature_stats['nodata'] = float( (featmasked == fsrc.nodata).sum()) if add_stats is not None: for stat_name, stat_func in add_stats.items(): feature_stats[stat_name] = stat_func(masked) if raster_out: feature_stats['mini_raster_array'] = masked feature_stats['mini_raster_affine'] = fsrc.affine feature_stats['mini_raster_nodata'] = fsrc.nodata if prefix is not None: prefixed_feature_stats = {} for key, val in feature_stats.items(): newkey = "{}{}".format(prefix, key) prefixed_feature_stats[newkey] = val feature_stats = prefixed_feature_stats if geojson_out: for key, val in feature_stats.items(): if 'properties' not in feat: feat['properties'] = {} feat['properties'][key] = val yield feat else: yield feature_stats
def test_featurecollection(): assert read_featurecollection(polygons)['features'] == \ list(read_features(polygons)) == \ target_features
def _test_read_features_single(indata): # single (first target geom) geom = shape(list(read_features(indata))[0]['geometry']) assert geom.almost_equals(target_geoms[0])
def test_layer_name(): assert list(read_features(DATA, layer='polygons')) == target_features
def test_fiona_path(): assert list(read_features(polygons)) == target_features
def test_layer_index(): layer = fiona.listlayers(DATA).index('polygons') assert list(read_features(DATA, layer=layer)) == target_features
def _test_read_features(indata): features = list(read_features(indata)) # multi geoms = [shape(f['geometry']) for f in features] _compare_geomlists(geoms, target_geoms)
def test_notafeature(): with pytest.raises(ValueError): list(read_features(['foo', 'POINT(-122 42)'])) with pytest.raises(ValueError): list(read_features(Exception()))
def gen_tabulate(vectors, raster, layer=0, index=None, band_num=1, nodata=None, affine=None, all_touched=False, categorical=False, category_map=None, prefix=None): """Zonal statistics of raster values aggregated to vector geometries. Parameters ---------- vectors: path to an vector source or geo-like python objects raster: ndarray or path to a GDAL raster source If ndarray is passed, the ``affine`` kwarg is required. layer: int or string, optional If `vectors` is a path to an fiona source, specify the vector layer to use either by name or number. defaults to 0 index: string, optional The name of the variable in the vector shapefile that will be used to id the polygons in the output file band_num: int, optional If `raster` is a GDAL source, the band number to use (counting from 1). defaults to 1. nodata: float, optional If `raster` is a GDAL source, this value overrides any NODATA value specified in the file's metadata. If `None`, the file's metadata's NODATA value (if any) will be used. defaults to `None`. affine: Affine instance required only for ndarrays, otherwise it is read from src all_touched: bool, optional Whether to include every raster cell touched by a geometry, or only those having a center point within the polygon. defaults to `False` categorical: bool, optional category_map: dict A dictionary mapping raster values to human-readable categorical names. Only applies when categorical is True prefix: string add a prefix to the keys (default: None) Returns ------- generator of dicts Each item corresponds to a single vector feature and contains keys for each of the specified stats. """ # Handle 1.0 deprecations transform = kwargs.get('transform') if transform: warnings.warn( "GDAL-style transforms will disappear in 1.0. " "Use affine=Affine.from_gdal(*transform) instead", DeprecationWarning) if not affine: affine = Affine.from_gdal(*transform) tabulate = {} with Raster(raster, affine, nodata, band_num) as rast: features_iter = read_features(vectors, layer) for i, feat in enumerate(features_iter): #Get the index for the geometry if index: id_in = feat['properties'][index] else: id_in = feat['id'] geom = shape(feat['geometry']) if 'Point' in geom.type: geom = boxify_points(geom, rast) geom_bounds = tuple(geom.bounds) fsrc = rast.read(bounds=geom_bounds) # create ndarray of rasterized geometry rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) assert rv_array.shape == fsrc.shape # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 for the correct mask effect # we also mask out nodata values explicitly masked = np.ma.MaskedArray(fsrc.array, mask=np.logical_or( fsrc.array == fsrc.nodata, np.logical_not(rv_array))) if masked.compressed().size == 0: # nothing here, fill with zeros and move on continue else: keys, counts = np.unique(masked.compressed(), return_counts=True) if prefix: key = [prefix + str(k) for k in keys] #Add the counts to the tabulated values as a dictionnary tabulate[id_in] = dict(zip(keys, counts)) return (tabulate)