def raster_sum(data): data [data <= 0] = 0 tot = np.nansum(data) log_print(f'total: [{tot}]') return tot
def check_dataframe(df): all_columns = True for C in (VALUE_COLUMN, ID, USE_SECTOR, CURVE): if C not in df: print(f'ERROR: column {C} not in shapefile') all_columns = False if not all_columns: sys.exit(-1) unique_use = df[USE_SECTOR].unique() missing_use = filter( lambda u: u not in use_sector_table, unique_use ) if len(list(missing_use))>0: log_print(f'warning missing uses: [{", ".join(missing_use)}]')
def process(shapefile, mask, output_file): df = gpd.read_file(shapefile) log_print('getting geometries') geometries = df.groupby(ID).agg({'geometry': 'first'}) out_crs = rio.crs.CRS({'init': 'EPSG:4326', 'no_defs': True}) trans, shape = extract_geo_info(geometries) with rio.open(mask) as ref_raster: out_trans, out_shape = ref_raster.affine, ref_raster.shape mask_data = ref_raster.read(1) gdf_geom = gpd.GeoDataFrame(geometries) gdf_geom['value'] = 1 log_print(f'writing {output_file}') with geotiff_writer(output_file, out_trans, out_crs, out_shape, 1) as writer: gdf_geom = gpd.GeoDataFrame(geometries) # create the low res raster for the band raster = rasterize(gdf_geom, 'value', shape, trans, out_crs) # upscale the raster to the mask resolution hr_raster = reproject(raster, trans, out_trans, out_shape, out_crs) new_mask = (mask_data * hr_raster) # write using the context-manager writer writer.write(new_mask.astype('uint8'), indexes=1) perc_missing = 100.0 * np.nansum(mask_data - new_mask) / np.nansum(mask_data) log_print('Percentage missing: %.2f' % perc_missing)
data_hous = np.zeros((ysize, xsize)) data_ind = np.zeros((ysize, xsize)) data_serv = np.zeros((ysize, xsize)) data_gov = np.zeros((ysize, xsize)) init = True p_file = join(path_p, f'{m}.tiff') if os.path.isfile(p_file): print(f'{m} p_file found') data_p1, data_p2, data_p3, data_p4, data_p5, data_p6 \ = readMultiBandGeotiff(p_file) data_hous += (data_m * data_p1 + data_m * data_p2) / 100 data_ind += (data_m * data_p6) / 100 data_serv += (data_m * data_p3) / 100 data_gov += (data_m * data_p5) / 100 hous_tot = np.nansum(data_hous) log_print(f'total housing: [{hous_tot}]') ind_tot = np.nansum(data_ind) log_print(f'total industrial: [{ind_tot}]') serv_tot = np.nansum(data_serv) log_print(f'total service: [{serv_tot}]') gov_tot = np.nansum(data_gov) log_print(f'total governmental: [{gov_tot}]') writeGeotiffSingleBand(sFile_hous, geotransform, geoproj, data_hous) writeGeotiffSingleBand(sFile_ind, geotransform, geoproj, data_ind) writeGeotiffSingleBand(sFile_serv, geotransform, geoproj, data_serv) writeGeotiffSingleBand(sFile_gov, geotransform, geoproj, data_gov)
def process(shapefile, output_dir, mask): """processes the shapefile and writes the geotiff files """ log_print('loading file') df = gpd.read_file(shapefile) check_dataframe(df) log_print('getting geometries') geometries = df \ .groupby(ID) \ .agg({'geometry':'first'}) log_print('aggregate on use') get_use = partial(get_use_on_df, df) df_abs_values = df \ .loc[ \ df[USE_SECTOR].isin(use_sector_table) \ ] \ .groupby([CURVE, get_use, ID]) \ .agg({VALUE_COLUMN: 'sum'}) log_print('calculating percentage') df_perc_values = df_abs_values \ .groupby(level=(CURVE,ID)) \ .apply(fix_percentage) out_crs = rio.crs.CRS({'init': 'EPSG:4326', 'no_defs': True}) trans, shape = extract_geo_info(geometries) with rio.open(mask) as ref_raster: out_trans, out_shape = ref_raster.affine, ref_raster.shape os.makedirs(output_dir, exist_ok=True) log_print('writing geotiffs') for curve, df_curve in df_perc_values.groupby(CURVE): curve_name = curve_name_mapping.get(curve, curve) filename = f'{output_dir}/{curve_name}.tiff' n_bands = len(band_order) log_print(f'writing {filename}') with geotiff_writer(filename, out_trans, out_crs, out_shape, n_bands, dtype=np.uint8 ) as writer: for use, df_data in df_curve.groupby(level=1): #select the band as configured band = band_order.index(use) log_print(f' use "{use}" as band {band+1}') # intersect geometries with data df_geom = df_data.join(geometries) gdf_geom = gpd.GeoDataFrame(df_geom) # create the low res raster for the band raster = rasterize(gdf_geom, VALUE_COLUMN, shape, trans, out_crs, dtype=np.float32, nodata=0.0) # upscale the raster to the mask resolution out_raster = reproject(raster, trans, out_trans, out_shape, out_crs) # write using the context-manager writer out_raster_int = (np.round(out_raster)).astype(np.uint8) writer.write(out_raster_int, indexes=band+1)
# intersect geometries with data df_geom = df_data.join(geometries) gdf_geom = gpd.GeoDataFrame(df_geom) # create the low res raster for the band raster = rasterize(gdf_geom, VALUE_COLUMN, shape, trans, out_crs, dtype=np.float32, nodata=0.0) # upscale the raster to the mask resolution out_raster = reproject(raster, trans, out_trans, out_shape, out_crs) # write using the context-manager writer out_raster_int = (np.round(out_raster)).astype(np.uint8) writer.write(out_raster_int, indexes=band+1) if __name__ == '__main__': parser = init_parser() args = parser.parse_args() log_print(f'processing {args.shapefile.name}') if args.config: override_config(args.config) if args.outdir is None: args.outdir = './' args.shapefile.close() args.mask.close() process(args.shapefile.name, args.outdir, args.mask.name) log_print(f'finished processing {args.shapefile.name}')