def create_fishnet(extents_file, out_folder, prefix): urban_extents = gpd.read_file(extents_file) sel_cities = urban_extents.sort_values(['Pop'], ascending=False).iloc[0:5] try: sel_cities = misc.project_UTM(sel_cities) except: sel_cities = sel_cities.to_crs({"init": "epsg:3857"}) for idx, row in sel_cities.iterrows(): out_fishnet = os.path.join(out_folder, "%s_%s.shp" % (prefix, row['ID'])) b = row['geometry'].bounds crs_num = sel_cities.crs['init'].split(":")[-1] crs_num = int(crs_num) misc.createFishnet(out_fishnet, b[0], b[2], b[1], b[3], 1000, 1000, crsNum=crs_num) fishnet = gpd.read_file(out_fishnet) fishnet = fishnet[fishnet.intersects(row['geometry'])] fishnet = fishnet.to_crs({'init': 'epsg:4326'}) fishnet.to_file(out_fishnet) if verbose: misc.tPrint("%s: %s" % (prefix, row['ID']))
def getLargestPointsInPolygons(inPolys, inPoints, weightField, polyIdField, ptsIdField): ''' For each Polygon in inPolys, return the point with the largest value in weightField inPolys - polygons within which to search for points inPoints - points to search for within the inPolys weightField - gravity field to search for in the inPoints idField - id field for inPoints RETURNS - dataframe of cities with the following columns ptIDField, adminIDField, weightField, lat, long ''' outputDF = pd.DataFrame(columns=["ptID", "polyID", "pop", "lat", "lng"]) #outputDF = [] pointLayer = "pyLyr" arcpy.MakeFeatureLayer_management(inPoints, pointLayer) featCnt = 0 with arcpy.da.SearchCursor(inPolys, ["SHAPE@XY", "OID@", polyIdField]) as adminCur: for feature in adminCur: featCnt = featCnt + 1 #Create a feature layer for the first feature from the OID and select intersecting points adminLyr = "FUBAR_%s" % feature[2] arcpy.MakeFeatureLayer_management(inPolys, adminLyr, '"FID" = %s' % feature[1]) arcpy.SelectLayerByLocation_management(pointLayer, "INTERSECT", adminLyr) count = int(arcpy.GetCount_management(pointLayer)[0]) if featCnt % 10 == 0: tPrint("Polygon %s intersects %s points" % (feature[2], count)) maxWeight = -1 #Search through all point features that intersect the current admin shape with arcpy.da.SearchCursor( pointLayer, ["SHAPE@XY", "OID@", weightField, ptsIdField]) as ptCursor: for ptFeat in ptCursor: if ptFeat[2] > maxWeight: maxWeight = ptFeat[2] outputFeature = ptFeat #Append the selected feature to the output if maxWeight > 0: curDF = pd.DataFrame( [[ outputFeature[3], feature[2], outputFeature[2], outputFeature[0][0], outputFeature[0][1] ]], columns=["ptID", "polyID", "pop", "lat", "lng"]) else: #If no points intersect area, use the admin's centroid curDF = pd.DataFrame( [["CENTROID", feature[2], 0, feature[0][0], feature[0][1]] ], columns=["ptID", "polyID", "pop", "lat", "lng"]) outputDF = outputDF.append(curDF) if featCnt > 20: return outputDF #return pd.concat(outputDF, axis=2) return outputDF
def main(): # define the input datasets global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp" global_adm1 = "/home/public/Data/GLOBAL/ADMIN/Admin1_Polys.shp" global_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp" pop_folder = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/GLOBAL_1km_Demographics" output_folder = "/home/wb411133/data/Projects/CoVID" population_raster = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif" # Read in raster definitions json_file = 'DataDictionary_v2.json' with open(json_file, 'r') as j_file: file_defs = json.load(j_file, strict=False) # Read in the global datasets pop_files = os.listdir(pop_folder) inG = gpd.read_file(global_bounds) inG1 = gpd.read_file(global_adm1) inG2 = gpd.read_file(global_adm2) inR = rasterio.open(population_raster) #set up logging set_up_logging("covid_extractor.log") # Read in data processing file data_def_file = "../Notebooks/HNP_Data_Readiness_0no_1yes_Ben.csv" inD = pd.read_csv(data_def_file, index_col=0) countries = inD['NOTHING'].iloc[1:].values nCountries = len(countries) idx = 0 countries = ['PER', 'MEX', 'PHL'] for iso3 in countries: # extract national bounds misc.tPrint("Processing %s of %s: %s" % (idx, nCountries, iso3)) idx = idx + 1 country_folder = os.path.join(output_folder, iso3) if not os.path.exists(country_folder): os.makedirs(country_folder) country_bounds = inG.loc[inG['ISO3'] == iso3] country_bounds = country_bounds.to_crs({'init': 'epsg:4326'}) covid_extract = covid_extractor(iso3, country_bounds, inG1, inG2, country_folder) if not covid_extract.data_status['vul_data']: covid_extract.calculate_vulnerability(pop_folder, pop_files) if not covid_extract.data_status['urb_extents']: covid_extract.create_urban_data(inR) covid_extract.run_zonal(file_defs)
def processUrbanExtents(extents, popRaster, tempFolder, fieldPref="c"): allCompactness = [] filesToDelete = [] #add two output fields tryAddField(extents, "%sCom" % fieldPref, "FLOAT") tryAddField(extents, "%sNCom" % fieldPref, "FLOAT") with arcpy.da.UpdateCursor( extents, ["OID@", "%sCom" % fieldPref, "%sNCom" % fieldPref]) as cursor: for featRow in cursor: tRaster = os.path.join(tempFolder, "pop_%s" % featRow[0]) tRasterPoints = os.path.join(tempFolder, "pop_%s_pts.shp" % featRow[0]) tLayer = "pop_%s" % featRow[0] filesToDelete.append(tRaster) filesToDelete.append(tRasterPoints) filesToDelete.append(tRasterPoints) arcpy.MakeFeatureLayer_management(extents, tLayer, '"FID" = %s' % featRow[0]) #Use the current feature to extract the current raster arcpy.Clip_management(popRaster, '#', tRaster, tLayer, '0', 'ClippingGeometry', 'MAINTAIN_EXTENT') try: arcpy.RasterToPoint_conversion(tRaster, tRasterPoints) compactness = main(tRasterPoints, "GRID_CODE", tempFolder) featRow[1] = compactness[0] featRow[2] = compactness[1] except: tPrint("Something went wrong with feature %s" % featRow[0]) featRow[1] = 0 featRow[2] = 0 cursor.updateRow(featRow) for f in filesToDelete: arcpy.Delete_management(f) return (allCompactness)
def process_dem(self, global_dem=''): ''' Download DEM from AWS, calculate slope ''' # Download DEM if not os.path.exists(self.dem_file) and global_dem == '': tPrint("Downloading DEM") elevation.clip(bounds=self.inD.total_bounds, max_download_tiles=90000, output=self.dem_file, product='SRTM3') if not os.path.exists(self.dem_file) and not global_dem == '': tPrint("Downloading DEM") rMisc.clipRaster(rasterio.open(global_dem), self.inD, self.dem_file) # Calculate slope if not os.path.exists(self.slope_file) and os.path.exists(self.dem_file): tPrint("Calculating slope") in_dem = rasterio.open(self.dem_file) in_dem_data = in_dem.read() beau = richdem.rdarray(in_dem_data[0,:,:], no_data=in_dem.meta['nodata']) slope = richdem.TerrainAttribute(beau, attrib='slope_riserun') meta = in_dem.meta.copy() meta.update(dtype = slope.dtype) with rasterio.open(self.slope_file, 'w', **meta) as outR: outR.write_band(1, slope)
def calculate_country(iso3, curD, curB, curN, out_file, selCol, selIXP, inCables, inCell, epsg='epsg:6933', debug=False): ''' calculaet ICT distances per country Args: curD: geopandas data frame of WBES survey locations curB: geopandas data frame of country bounds curN: geopandas data frame of neighbouring countries boundaries outFile: string of the path for the output file; is read in if it doesn't exist selCol: geopandas data frame of colocation centers selIXP: geopandas data frame of IXPs inCables: geopandas data frame cable landing spots ''' start_time = time.time() tPrint("Starting %s" % iso3) cell_coverage_folder = '/home/public/Data/GLOBAL/INFRA/GSMA/2019/MCE/Data_MCE/Global' cell_files = ['MCE_Global2G_2020.tif', 'MCE_Global3G_2020.tif', 'MCE_Global4G_2020.tif'] gsma2g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[0])) gsma3g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[1])) gsma4g_R = rasterio.open(os.path.join(cell_coverage_folder, cell_files[2])) if False: #os.path.exists(out_file): curD = pd.read_csv(out_file, index_col=0) curD = pd.merge(distD, curD.loc[:,['idstd','d2_l1_year_perf_indicators']], on='idstd') curD_geom = curD['geometry'].apply(wkt.loads) distD = gpd.GeoDataFrame(curD, geometry=curD_geom, crs=epsg) # Remove columns that need to be re-calculated distD = distD.loc[:,[not "ngh" in x for x in distD.columns]] distD = distD.loc[:,[not "gsma" in x for x in distD.columns]] distD = distD.loc[:,[not "cables_dist" in x for x in distD.columns]] else: distD = curD.to_crs(epsg) total_bound = curB.unary_union if curB.shape[0] > 0: if not 'col_dist' in distD.columns: if selCol.shape[0] > 0: selCol = selCol.to_crs(epsg) distD['col_dist'] = distD.distance(selCol.unary_union) else: distD['col_dist'] = -1 if not "ixp_dist" in distD.columns: if selIXP.shape[0] > 0: selIXP = selIXP.to_crs(epsg) distD['ixp_dist'] = distD.distance(selIXP.unary_union) else: distD['ixp_dist'] = -1 if not 'firstCable' in distD.columns: selCables = inCables.loc[inCables['ISO3'] == iso3] if selCables.shape[0] > 0: selCables = selCables.to_crs(epsg) # Calculate distance and date of first cable landing point first_date = selCables['RFS'].sort_values().iloc[0] first_points = selCables.loc[selCables['RFS'] == first_date] distD['firstCable'] = first_date distD['firstCable_dist'] = distD.distance(first_points.unary_union) # Calculate distance and date of closest cable landing point distD['closestCable'] = distD.apply(lambda x: get_nearest_date(x, selCables), axis=1) distD['closestCable_dist'] = distD.apply(lambda x: get_nearest(x, selCables), axis=1) else: distD['firstCable'] = '' distD['firstCable_dist'] = -1 # Calculate distance and date of closest cable landing point distD['closestCable'] = '' distD['closestCable_dist'] = -1 # Calculate distance to nearest neighbouring country if not "ngh1_dist" in distD.columns: cnt = 1 for idx, row in curN.iterrows(): distD['ngh%s' % cnt] = row['ISO3'] distD['ngh%s_dist' % cnt] = distD.distance(row['geometry']) #Calculate distance to neighbouring submarine cables selCables = inCables.loc[inCables['ISO3'] == row['ISO3']] if selCables.shape[0] > 0: if debug and row['ISO3'] == 'GUY': return([distD, selCables, curN]) distD['ngh%s_cbl_dist' % cnt] = distD.distance(selCables.unary_union) print(row['ISO3']) distD['ngh%s_cbl' % cnt] = distD.apply(lambda x: get_nearest_date(x, selCables), axis=1) else: distD['ngh%s_cbl_dist' % cnt] = -1 distD['ngh%s_cbl' % cnt] = -1 cnt = cnt +1 if not False: # 'cell_dist' in distD.columns: cell_sindex = inCell.sindex potential_matches = inCell.loc[list(cell_sindex.intersection(total_bound.bounds))] selCell = potential_matches.loc[potential_matches.intersects(total_bound)] selCell = selCell.to_crs(epsg) distD['cell_dist'] = distD.distance(selCell.unary_union) if not "gsma2g" in distD.columns: coordsD = distD.to_crs(gsma2g_R.crs) coords = [[x.x,x.y] for x in coordsD['geometry']] distD['gsma2g'] = [x[0] for x in list(gsma2g_R.sample(coords))] distD['gsma3g'] = [x[0] for x in list(gsma3g_R.sample(coords))] distD['gsma4g'] = [x[0] for x in list(gsma4g_R.sample(coords))] pd.DataFrame(distD).to_csv(out_file) return(distD) end_time = time.time() tPrint(f"Completed {iso3}: {round((end_time-start_time)/60)}")
def evaluateOutput(self, admin_stats, commune_stats): ''' Check the outputs to determine if processing worked correctly 1. compare population totals between raw, 250m and 1km data 2. Calculate urbanization rate 3. Water mask a. calculate overlap between water classes b. calculate overlap between water and population c. calculate overlap between water and urban https://ghsl.jrc.ec.europa.eu/documents/cfs01/V3/CFS_Ghana.pdf ''' stats_file = os.path.join(self.out_folder, "DATA_EVALUATION_%s_%s.txt" % (self.iso3, self.suffix)) with open(stats_file, 'w') as out_stats: # Compare pop rasters pop_comparison = self.compare_pop_rasters(verbose=False) out_stats.write("***** Evaluate Total Population *****\n") for x in pop_comparison: out_stats.write(f"{x[0]}: {x[1]}\n") # define population and urbanization layers pop_file_defs = [] for pop_file in self.pop_files: name = "GHS" if "upo" in pop_file: name = "WP_U_%s" % pop_file[-6:-4] if "cpo" in pop_file: name = "WP_C_%s" % pop_file[-6:-4] pop_file_base = os.path.basename(pop_file) if self.suffix == "1k": pop_file_base = pop_file_base.replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix)) if "1k1k" in pop_file_base: pop_file_base = pop_file_base.replace("1k1k", "1k") out_pop_file = os.path.join(self.final_folder, pop_file_base) urban_pop_file = out_pop_file.replace(".tif", "_urban.tif") hd_pop_file = out_pop_file.replace(".tif", "_urban_hd.tif") pop_file_defs.append([out_pop_file, urban_pop_file, hd_pop_file, name]) out_stats.write("***** Evaluate Urbanization *****\n") for fileDef in pop_file_defs: pFile = fileDef[0] urb_file = fileDef[1] hd_file = fileDef[2] name = fileDef[3] try: inPop = rasterio.open(pFile).read() inPop = inPop * (inPop > 0) inUrb = rasterio.open(urb_file).read() inHd = rasterio.open(hd_file).read() tPop = inPop.sum() urbPop = (inPop * inUrb).sum() hdPop = (inPop * inHd).sum() out_stats.write(f"{name}: TotalPop: {tPop.round(0)}, UrbanPop: {urbPop.round(0)}, HD Pop: {hdPop.round(0)}\n") out_stats.write(f"{name}: {((urbPop/tPop) * 100).round(2)}% Urban; {((hdPop/tPop) * 100).round(2)}% HD Urban\n") except: print(f"Error processing {name}") print(fileDef) # Summarize population in SMOD classes out_stats.write('***** Evaluate SMOD ******\n') smod_vals = [10,11,12,13,21,22,23,30] inSMOD = rasterio.open(os.path.join(self.final_folder, os.path.basename(self.ghssmod_file).replace("%s" % self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix)))) smod = inSMOD.read() for pFile in self.pop_files: if 'gpo' in pFile: inPop = rasterio.open(pFile) pop = inPop.read() pop[pop < 0] = 0 total_pop = pop.sum() total_per = 0 for val in smod_vals: cur_smod = (smod == val).astype(int) cur_pop = pop * cur_smod total_curpop = cur_pop.sum() perUrban = (total_curpop.sum()/total_pop*100) if val > 20: total_per = total_per + perUrban out_stats.write(f'{val}: {perUrban}\n') out_stats.write(f'Total Urban: {total_per}\n') '''3. Water mask ''' out_stats.write("***** Evaluate Water Intersection *****\n") # a. calculate overlap between water classes water_ghsl = os.path.join(self.final_folder, "%s%s_wat.tif" % (self.iso3.lower(), self.suffix)) water_lc = os.path.join(self.final_folder, "%s%s_wat_lc.tif" % (self.iso3.lower(), self.suffix)) inWG = rasterio.open(water_ghsl) wgData = inWG.read() wgData[wgData == inWG.meta['nodata']] = 0 inWLC= rasterio.open(water_lc) wlcData = inWLC.read() wlcData[wlcData == inWLC.meta['nodata']] = 0 combo = wgData + wlcData out_stats.write(f"WATER: GHSL count: {wgData.sum()}; LC count: {wlcData.sum()}; overlap: {(combo == 2).sum()}\n") # b. calculate overlap between water and population out_stats.write("***** Evaluate Water Population Overlap *****\n") for fileDef in pop_file_defs: pop_file = fileDef[0] urb_file = fileDef[1] hd_file = fileDef[2] name = fileDef[3] cur_pop = rasterio.open(pop_file) curP = cur_pop.read() curP[curP == cur_pop.meta['nodata']] = 0 urb = rasterio.open(urb_file).read() hd = rasterio.open(hd_file).read() # c. calculate overlap between water and urban out_stats.write(f"WATER {name} Population: TotalPop: {curP.sum().round()}, WaterPop GHSL: {(curP * wgData).sum().round()}, WaterPop LC: {(curP * wlcData).sum().round()}\n") out_stats.write(f"WATER {name} Urban Cells: TotalUrban Cells: {urb.sum().round()}, WaterUrban GHSL: {(urb * wgData).sum()}, WaterUrb LC: {(urb * wlcData).sum()}\n") out_stats.write(f"WATER {name} HD Cells: TotalPop: {hd.sum().round()}, WaterHD GHSL: {(hd * wgData).sum()}, WaterHD LC: {(hd * wlcData).sum()}\n") #Summarize zonal stats files for sFile in [admin_stats, commune_stats]: if os.path.exists(sFile): tPrint(sFile) file_name = os.path.basename(sFile) inD = pd.read_csv(sFile, index_col=0) out_stats.write(f"***** Summarizing {file_name}\n") bad_cols = ['index','OBJECTID','WB_ADM1_CO','WB_ADM0_CO','WB_ADM2_CO','Shape_Leng','Shape_Area'] for col in inD.columns: if not col in bad_cols: curD = inD[col] try: curD_sum = curD.loc[curD > 0].sum() out_stats.write(f"{col}: {round(curD_sum)}\n") except: pass
def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, global_ghbuilt, global_ghsl, global_smod): ''' extract global layers for current country ''' # Extract water from globcover if not os.path.exists(self.lc_file_h20): tPrint("Extracting water") if not os.path.exists(self.lc_file): rMisc.clipRaster(rasterio.open(global_landcover), self.inD, self.lc_file) in_lc = rasterio.open(self.lc_file) inL = in_lc.read() lcmeta = in_lc.meta.copy() tempL = (inL == 210).astype(lcmeta['dtype']) lcmeta.update(nodata=255) with rasterio.open(self.lc_file_h20, 'w', **lcmeta) as out: out.write(tempL) os.remove(self.lc_file) # Extract water from GHSL if not os.path.exists(self.ghsl_h20): tPrint("Extracting water from GHSL") inR = rasterio.open(global_ghsl) ul = inR.index(*self.inD.total_bounds[0:2]) lr = inR.index(*self.inD.total_bounds[2:4]) # read the subset of the data into a numpy array window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1))) data = inR.read(1, window=window, masked = False) data = data == 1 b = self.inD.total_bounds new_transform = rasterio.transform.from_bounds(b[0], b[1], b[2], b[3], data.shape[1], data.shape[0]) meta = inR.meta.copy() meta.update(driver='GTiff',width=data.shape[1], height=data.shape[0], transform=new_transform) data = data.astype(meta['dtype']) with rasterio.open(self.ghsl_h20, 'w', **meta) as outR: outR.write_band(1, data) ''' bounds = box(*self.inD.total_bounds) if inG.crs != self.inD.crs: destCRS = pyproj.Proj(inG.crs) fromCRS = pyproj.Proj(self.inD.crs) projector = partial(pyproj.transform, fromCRS, destCRS) bounds = transform(projector, bounds) def getFeatures(gdf): #Function to parse features from GeoDataFrame in such a manner that rasterio wants them return [json.loads(gdf.to_json())['features'][0]['geometry']] tD = gpd.GeoDataFrame([[1]], geometry=[bounds]) coords = getFeatures(tD) out_img, out_transform = mask(inG, shapes=coords, crop=True) out_meta = inG.meta.copy() out_meta.update({"driver": "GTiff", "height": out_img.shape[1], "width": out_img.shape[2], "transform": out_transform}) water_data = (out_img == 1).astype(out_meta['dtype']) with rasterio.open(self.ghsl_h20, 'w', **out_meta) as outR: outR.write(water_data) ''' #Extract GHS-Pop if not os.path.exists(self.ghspop_file): tPrint("Extracting GHS-POP") rMisc.clipRaster(rasterio.open(global_ghspop), self.inD, self.ghspop_file) #Extract GHS-Pop-1k if not os.path.exists(self.ghspop1k_file): tPrint("Extracting GHS-POP") rMisc.clipRaster(rasterio.open(global_ghspop1k), self.inD, self.ghspop1k_file) #Extract GHS-Built if not os.path.exists(self.ghsbuilt_file): tPrint("Clipping GHS-Built") rMisc.clipRaster(rasterio.open(global_ghbuilt), self.inD, self.ghsbuilt_file) #Extract GHS-SMOD if not os.path.exists(self.ghssmod_file): tPrint("Clipping GHS-SMOD") rMisc.clipRaster(rasterio.open(global_smod), self.inD, self.ghssmod_file) #Rasterize admin boundaries if not os.path.exists(self.admin_file): tPrint("Rasterizing admin boundaries") xx = rasterio.open(self.ghspop_file) res = xx.meta['transform'][0] tempD = self.inD.to_crs(xx.crs) shapes = ((row['geometry'], 1) for idx, row in tempD.iterrows()) burned = features.rasterize(shapes=shapes, out_shape=xx.shape, fill=0, transform=xx.meta['transform'], dtype='int16') meta = xx.meta.copy() meta.update(dtype=burned.dtype) with rasterio.open(self.admin_file, 'w', **meta) as outR: outR.write_band(1, burned)
def summarizeGlobalData(inShp, idField, otherFiles=-1, type=['N', 'N', 'N', 'N', 'N', 'N', 'N', 'C'], clip=True, tempFolder="C:/Temp"): inFiles = [ r"S:\GLOBAL\NightLights\1992\F101992.v4b_web.stable_lights.avg_vis_ElvidgeCorrected.tif", r"S:\GLOBAL\NightLights\2012\F182012.v4c_web.stable_lights.avg_vis_ElvidgeCorrected.tif", r"S:\GLOBAL\NightLights\rad_cal\F1996_0316-19970212_rad_v4.avg_vis_Corrected.tif", r"S:\GLOBAL\NightLights\rad_cal\F2010_0111-20101209_rad_v4.avg_vis_Corrected.tif", r"S:\GLOBAL\GDP\UNEP\GDP.tif", r"S:\GLOBAL\Elevation\1km\elevation1", r"S:\GLOBAL\POPULATION\Landscan2012\ArcGIS\Population\lspop2012", r"S:\GLOBAL\Landcover\Globcover\GLOBCOVER_L4_200901_200912_V2.3.tif" ] inTitles = [ idField, "NTL2012", "RC1996", "RC2010", "GDP_UNEP", "Elev", "L11", "L14", "L20", "L30", "L40", "L50", "L60", "L70", "L90", "L100", "L110", "L120", "L130", "L140", "L150", "L160", "L170", "L180", "L190", "L200", "L210", "L220", "L230", "NTL1992" ] if not otherFiles == -1 and not otherFiles == "NTL": inFiles = otherFiles inTitles = [] if otherFiles == "NTL": inFiles = getNTLFiles() type = ["N"] * len(inFiles) inTitles = [idField] #Define the values to extract from each DBF summarizeVals = [] filesToDelete = [] tempVals = [] fCount = 0 for inFidx in range(0, len(inFiles)): tempRast = os.path.join(tempFolder, "aaaTemp%s.tif" % inFidx) tempVals.append(tempRast) inFile = inFiles[inFidx] fName = os.path.basename(inFile) curType = type[inFidx] if not clip: tempRast = inFile else: arcpy.Clip_management(inFile, "#", tempRast, inShp, '', "ClippingGeometry") inR = Raster(tempRast) if curType == 'C': #Processing categorical data is slightly different unqVals = getUniqueValues(inFile) print(unqVals) for unq in unqVals: summarizeVals.append([ "SUM" ]) #All the values from categorical datasets need SUM #For the current unique value, create a binary raster curR = inR == unq outTable = os.path.join( tempFolder, "%s_%s.dbf" % (fName.replace(".", "").replace(".tif", ""), unq)) inTitles.append("%s_%s" % (fName, unq)) ZonalStatisticsAsTable(inShp, idField, curR, outTable, 'DATA') filesToDelete.append(outTable) else: summarizeVals.append(["SUM", "MEAN", "STD"]) inTitles.append("%s_SUM" % fName) inTitles.append("%s_MEAN" % fName) inTitles.append("%s_STD" % fName) outTable = os.path.join( tempFolder, "%s_%s.dbf" % (fName.replace(".", "").replace( ".tif", "").replace("-", "_"), fCount)) fCount += 1 if not os.path.exists(outTable): ZonalStatisticsAsTable(inShp, idField, Raster(tempRast), outTable) filesToDelete.append(outTable) tPrint("Finshed Processing " + os.path.basename(inFile).replace(".tif", "")) if idField in ['FID', 'OID']: idField = "%s_" % idField res = summarizeDbf(filesToDelete, idField, summarizeVals) for f in filesToDelete: arcpy.Delete_management(f) for f in tempVals: arcpy.Delete_management(f) return ({"Results": res, "Titles": inTitles})
def calculatePuga(popFile, cityExtent, kernelFiles, distance, filePref="Pop", temp="C:/Temp", verbose=False): for f in [temp]: if not os.path.exists(f): os.makedirs(f) #Temporary files cityLS = os.path.join(temp, "%s_Rast.tif" % filePref) totalPop = os.path.join(temp, "%s_totalPop.tif" % filePref) focalPopulation = os.path.join(temp, "%s_focalPop_%s.tif" % (filePref, distance)) peopleKernel1 = os.path.join( temp, "%s_People%skernel_1.tif" % (filePref, distance)) peopleKernel2 = os.path.join( temp, "%s_People%skernel_2.tif" % (filePref, distance)) pugaDistance = os.path.join(temp, "%s_Puga_%s.tif" % (filePref, distance)) pugaKernel1 = os.path.join(temp, "%s_PugaKernel1_%s.tif" % (filePref, distance)) pugaKernel2 = os.path.join(temp, "%s_PugaKernel2_%s.tif" % (filePref, distance)) pugaDistanceDbf = os.path.join(temp, "%s_Puga_%s.dbf" % (filePref, distance)) pugaKernel1Dbf = os.path.join( temp, "%s_PugaKernel1_%s.dbf" % (filePref, distance)) pugaKernel2Dbf = os.path.join( temp, "%s_PugaKernel2_%s.dbf" % (filePref, distance)) toDelete = [ focalPopulation, peopleKernel1, peopleKernel2, pugaDistance, pugaKernel1, pugaKernel2 ] if not arcpy.Exists(pugaDistanceDbf): #Clip the Landscan data to the city extent arcpy.Clip_management(popFile, "", cityLS, cityExtent, "", "ClippingGeometry", "NO_MAINTAIN_EXTENT") # calculate total population within the city ZonalStatistics(cityExtent, "FID", cityLS, "SUM").save(totalPop) if verbose: tPrint("Ran Zonal Stats to get city population") #Calculate people living in 10KM radius of cell i without discount factor, with e^-0.5d and e^-1d FocalStatistics(cityLS, NbrAnnulus(0, distance, "CELL"), "SUM", "").save(focalPopulation) #Calculate people living in 10KM radius of cell i with distance decay functions - e^-0.5d and e^-1d FocalStatistics(cityLS, NbrWeight(kernelFiles % (distance, 1)), "SUM", "").save(peopleKernel1) FocalStatistics(cityLS, NbrWeight(kernelFiles % (distance, 2)), "SUM", "").save(peopleKernel2) if verbose: tPrint("Calculated Focal Statistics") (Raster(cityLS) / Raster(totalPop) * Raster(focalPopulation)).save(pugaDistance) (Raster(cityLS) / Raster(totalPop) * Raster(peopleKernel1)).save(pugaKernel1) (Raster(cityLS) / Raster(totalPop) * Raster(peopleKernel2)).save(pugaKernel2) if verbose: tPrint("Converted population to puga numbers") # Create Table of Puga outcomes ZonalStatisticsAsTable(cityExtent, "FID", pugaDistance, pugaDistanceDbf, "DATA", "ALL") ZonalStatisticsAsTable(cityExtent, "FID", pugaKernel1, pugaKernel1Dbf, "DATA", "ALL") ZonalStatisticsAsTable(cityExtent, "FID", pugaKernel2, pugaKernel2Dbf, "DATA", "ALL") toDelete.append(pugaDistance) toDelete.append(pugaKernel1) toDelete.append(pugaKernel2) if verbose: tPrint("Finished calculating Puga Index") for f in toDelete: arcpy.Delete_management(f) tPrint("Calculated Puga for %s and distance %s" % (filePref, distance)) return ([pugaDistanceDbf, pugaKernel1Dbf, pugaKernel2Dbf])
def main(): # define the input datasets global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp" global_adm1 = "/home/public/Data/GLOBAL/ADMIN/Admin1_Polys.shp" global_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp" pop_folder = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/GLOBAL_1km_Demographics" output_folder = "/home/wb411133/data/Projects/CoVID" population_raster = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif" # Read in the global datasets pop_files = os.listdir(pop_folder) inG = gpd.read_file(global_bounds) inG1 = gpd.read_file(global_adm1) inG2 = gpd.read_file(global_adm2) inR = rasterio.open(population_raster) countries = os.listdir(output_folder) nCountries = len(countries) idx = 0 for iso3 in countries: # extract national bounds misc.tPrint("Processing %s of %s: %s" % (idx, nCountries, iso3)) idx = idx + 1 country_folder = os.path.join(output_folder, iso3) adm0_file = os.path.join(country_folder, "adm0.shp") adm1_file = os.path.join(country_folder, "adm1.shp") adm2_file = os.path.join(country_folder, "adm2.shp") if not os.path.exists(country_folder): os.makedirs(country_folder) country_bounds = inG.loc[inG['ISO3'] == iso3].to_crs( {'init': 'epsg:4326'}) country_adm1 = inG1.loc[inG1['ISO3'] == iso3].to_crs( {'init': 'epsg:4326'}) country_adm2 = inG2.loc[inG2['ISO3'] == iso3].to_crs( {'init': 'epsg:4326'}) if not os.path.exists(adm0_file): country_bounds.to_file(adm0_file) if not os.path.exists(adm1_file): country_adm1.to_file(adm1_file) if not os.path.exists(adm2_file): country_adm2.to_file(adm2_file) country_bounds = country_bounds.to_crs({'init': 'epsg:4326'}) calculate_vulnerability(iso3, country_folder, country_bounds, pop_folder, pop_files) misc.tPrint("***Calculated Vulnerability") try: create_urban_data(iso3, country_folder, country_bounds, inR, calc_urban=False) misc.tPrint("***Calculated Urban Extents") except: misc.tPrint("%s errored on HD clusters" % iso3) try: create_urban_data(iso3, country_folder, country_bounds, inR, calc_urban=True, calc_hd_urban=False) except: misc.tPrint("%s errored on all clusters" % iso3) #extract_osm(country_bounds, country_folder) misc.tPrint("***Extracted OSM") #Run zonal stats cur_rasters = copy.deepcopy(hnp_categories) for key, values in cur_rasters.items(): values['raster_file'] = os.path.join(country_folder, values['raster_file']) cur_rasters[key] = values all_shps = [] for root, dirs, files, in os.walk(country_folder): for f in files: if f[-4:] == ".shp" and not "zonal" in f: all_shps.append(os.path.join(root, f)) run_zonal(all_shps, cur_rasters) misc.tPrint("***Calculated Zonal")