Exemplo n.º 1
0
def create_fishnet(extents_file, out_folder, prefix, verbose=True):
    ''' Create a 1 km fishnet inside each feature in the input extents_file
    
    INPUT
        extents_file [string] - path to urban extents
        out_folder [string path] - where output shapefiles should be written
        prefix [string] - will be appended to each fidhnet shapefile
    '''
    urban_extents = gpd.read_file(extents_file)
    #sel_cities = urban_extents.sort_values(['Pop'], ascending=False).iloc[0:5]
    sel_cities = urban_extents.sort_values(['Pop'], ascending=False)
    try:
        sel_cities = misc.project_UTM(sel_cities)
    except:
        sel_cities = sel_cities.to_crs({"init":"epsg:3857"})

    for idx, row in sel_cities.iterrows():
        out_fishnet = os.path.join(out_folder, "%s_%s.shp" % (prefix, row['ID']))
        if not os.path.exists(out_fishnet):
            b = row['geometry'].bounds
            crs_num = sel_cities.crs['init'].split(":")[-1]
            crs_num = int(crs_num)
            misc.createFishnet(out_fishnet, b[0], b[2], b[1], b[3], 1000, 1000, crsNum=crs_num)
            fishnet = gpd.read_file(out_fishnet)
            fishnet = fishnet[fishnet.intersects(row['geometry'])]
            fishnet = fishnet.to_crs({'init':'epsg:4326'})
            fishnet['geohash'] = fishnet['geometry'].apply(lambda x: geohash.encode(x.centroid.y, x.centroid.x))
            fishnet.to_file(out_fishnet)
            if verbose:
                misc.tPrint("%s: %s" % (prefix, row['ID']))
Exemplo n.º 2
0
def summarize_DHS(template, dhs_files, country_folder, iso3):
    ''' combine DHS data with WorldPop population and run zonal stats
    
    INPUT
        template [string] - template raster upon which to base rasterization of DHS
        dhs_files [dictionary] - defines DHS files to process {filename:geopandas}
        country_folder [string to path] - folder to create output
    '''
    # Process DHS data
    inP = rasterio.open(template)
    # get a list of unique columns in the DHS data
    total_columns = 0
    try:
        del(all_columns)
    except:
        pass
    # get a list of all unique columns
    for key, inD in dhs_files.items():
        cur_columns = list(inD.columns.values)
        try:
            all_columns = all_columns + cur_columns
        except:
            all_columns = cur_columns

    col_count = Counter(all_columns)
    unq_columns = [key for key, value in col_count.items() if value == 1] 
    
    dhs_rasters = {}
    for key, inD in dhs_files.items():
        sel_dhs = inD.loc[inD['ISO3'] == iso3]
        if sel_dhs.shape[0] > 0:
            for field in inD.columns:
                if field in unq_columns:
                    out_file = os.path.join(country_folder, f'{key}_{field}.tif')
                    out_file_pop = os.path.join(country_folder, f'{key}_{field}_pop.tif')
                    try:
                        # rasterize the desired field in the inputDHS data                
                        if not os.path.exists(out_file) and not os.path.exists(out_file_pop):
                            rMisc.rasterizeDataFrame(inD, out_file, idField=field, templateRaster = template)

                        #Multiply the rasterized data frame by the population layer
                        if not os.path.exists(out_file_pop):
                            combine_dhs_pop(inP, rasterio.open(out_file), out_file_pop, factor=100)
                        if os.path.exists(out_file):
                            os.remove(out_file)
                        misc.tPrint(f'{iso3}_{key}: {field}')
                        dhs_rasters[f'{key}_{field}'] = {
                            'raster_file': f'{key}_{field}_pop.tif',
                            'vars': ['SUM', 'MEAN'],
                            'description': f'{key}_{field}'
                        }
                    except:
                        misc.tPrint(f"Error processing {key} - {field}")
    return(dhs_rasters)
Exemplo n.º 3
0
def run_zonal(admin_shapes, rasters, out_suffix='', iso3=''):
    ''' Calculate zonal results for submitted admin and raster
        
        INPUTS
            admin_shapes [geopandas] - features within which to calculate statistics
            rasters [dictionary] - data dictionary containing the raster and the required information
                { 'HNP_Var1':{
                        'raster_file': 'path_to_raster',
                        'vars':['SUM','MEAN'],
                        'description':'Lorem Ipsum'
                    }
                }
            out_suffix [string] - text to append to output zonal file
    '''
    for shp in admin_shapes:
        inD = gpd.read_file(shp)
        out_zonal = shp.replace(".shp", "_zonal%s.csv" % out_suffix)
        misc.tPrint(f"Processed: {iso3} {os.path.basename(shp)}")
        write_out = False
        if not os.path.exists(out_zonal):
            for var_name, definition in rasters.items():
                if os.path.exists(definition['raster_file']):
                    write_out = True
                    if definition['vars'][0] == 'C':
                        uVals = definition['unqVals']
                        res = rMisc.zonalStats(inD,
                                               definition['raster_file'],
                                               rastType='C',
                                               unqVals=uVals,
                                               reProj=True)
                        res = pd.DataFrame(
                            res, columns=['LC_%s' % x for x in uVals])
                        for column in res.columns:
                            inD[column] = res[column]
                    else:
                        # Zonal stats
                        res = rMisc.zonalStats(inD,
                                               definition['raster_file'],
                                               minVal=0,
                                               reProj=True)
                        res = pd.DataFrame(
                            res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
                        res.columns = [f"{var_name}_{x}" for x in res.columns]
                        for var in definition['vars']:
                            inD[f"{var_name}_{var}"] = res[f"{var_name}_{var}"]
            if write_out:
                inD.drop(['geometry'], axis=1, inplace=True)
                pd.DataFrame(inD).to_csv(out_zonal)
Exemplo n.º 4
0
def run_all(iso3, output_folder, dhs_files):
    country_folder = os.path.join(output_folder, iso3)
    # extract national bounds
    misc.tPrint("Processing %s" % iso3)
    #summarize DHS
    country_pop = os.path.join(country_folder, "WP_2020_1km.tif")
    dhs_rasters = summarize_DHS(country_pop, dhs_files, country_folder, iso3)

    #Run zonal stats
    cur_rasters = copy.deepcopy(hnp_categories)
    for key, values in cur_rasters.items():
        values['raster_file'] = os.path.join(country_folder,
                                             values['raster_file'])
        cur_rasters[key] = values

    cur_dhs = copy.deepcopy(dhs_rasters)
    for key, values in dhs_rasters.items():
        values['raster_file'] = os.path.join(country_folder,
                                             values['raster_file'])
        cur_dhs[key] = values

    all_shps = []
    for root, dirs, files, in os.walk(country_folder):
        for f in files:
            if f[-4:] == ".shp" and not "zonal" in f:
                all_shps.append(os.path.join(root, f))

    run_zonal(all_shps, cur_rasters, out_suffix="_BASE", iso3=iso3)
    misc.tPrint("***%s Calculated Base Zonal" % iso3)
    run_zonal(all_shps, cur_dhs, out_suffix="_DHS", iso3=iso3)
    misc.tPrint("***%s Calculated DHS Zonal" % iso3)
Exemplo n.º 5
0
def zonalStats(inShp,
               inRaster,
               bandNum=1,
               mask_A=None,
               reProj=False,
               minVal='',
               maxVal='',
               verbose=False,
               rastType='N',
               unqVals=[],
               weighted=False,
               allTouched=False):
    ''' Run zonal statistics against an input shapefile. Returns array of SUM, MIN, MAX, and MEAN

    INPUT VARIABLES
    inShp [string or geopandas object] - path to input shapefile
    inRaster [string or rasterio object] - path to input raster

    OPTIONAL
    bandNum [integer] - band in raster to analyze
    reProj [boolean] -  whether to reproject data to match, if not, raise an error
    minVal/maxVal [number] - if defined, will only calculate statistics on values above or below this number
    verbose [boolean] - whether to be loud with technical updates
    rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers
    unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts
    mask_A [numpy boolean mask] - mask the desired band using an identical shape boolean mask. Useful for doing conditional zonal stats
    weighted [boolean] - apply weighted zonal calculations. This will determine the % overlap for each
        cell in the defined AOI. Will apply weights in calculations of numerical statistics
    
    RETURNS
    array of arrays, one for each feature in inShp
    '''
    if isinstance(inShp, str):
        inVector = gpd.read_file(inShp)
    else:
        inVector = inShp
    if isinstance(inRaster, str):
        curRaster = rasterio.open(inRaster, 'r')
    else:
        curRaster = inRaster

    # If mask is not none, apply mask
    if mask_A is not None:
        curRaster.write_mask(mask_A)

    outputData = []
    if inVector.crs != curRaster.crs:
        if reProj:
            inVector = inVector.to_crs(curRaster.crs)
        else:
            raise ValueError("Input CRS do not match")
    fCount = 0
    tCount = len(inVector['geometry'])
    #generate bounding box geometry for raster bbox
    b = curRaster.bounds
    rBox = box(b[0], b[1], b[2], b[3])
    for idx, row in inVector.iterrows():
        geometry = row['geometry']
        fCount = fCount + 1
        try:
            #This test is used in case the geometry extends beyond the edge of the raster
            #   I think it is computationally heavy, but I don't know of an easier way to do it
            if not rBox.contains(geometry):
                geometry = geometry.intersection(rBox)
            try:
                if fCount % 1000 == 0 and verbose:
                    tPrint("Processing %s of %s" % (fCount, tCount))
                # get pixel coordinates of the geometry's bounding box
                ul = curRaster.index(*geometry.bounds[0:2])
                lr = curRaster.index(*geometry.bounds[2:4])
                # read the subset of the data into a numpy array
                window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]),
                                                             float(lr[1] + 1)))

                if mask_A is not None:
                    data = curRaster.read(bandNum, window=window, masked=True)
                else:
                    data = curRaster.read(bandNum, window=window, masked=False)

                if weighted:
                    allTouched = True
                    #Create a grid of the input raster (data)
                    rGrid = polygonizeArray(data, geometry.bounds, curRaster)
                    #Clip the grid by the input geometry
                    rGrid['gArea'] = rGrid.area
                    rGrid['newArea'] = rGrid.intersection(geometry).area
                    #Store the percent overlap
                    rGrid['w'] = rGrid['newArea'] / rGrid['gArea']
                    newData = data
                    for idx, row in rGrid.iterrows():
                        newData[row['row'],
                                row['col']] = data[row['row'],
                                                   row['col']] * row['w']
                    data = newData

                # create an affine transform for the subset data
                t = curRaster.transform
                shifted_affine = Affine(t.a, t.b, t.c + ul[1] * t.a, t.d, t.e,
                                        t.f + lr[0] * t.e)

                # rasterize the geometry
                mask = rasterize([(geometry, 0)],
                                 out_shape=data.shape,
                                 transform=shifted_affine,
                                 fill=1,
                                 all_touched=allTouched,
                                 dtype=np.uint8)

                # create a masked numpy array
                masked_data = np.ma.array(data=data, mask=mask.astype(bool))
                if rastType == 'N':
                    if minVal != '' or maxVal != '':
                        if minVal != '':
                            masked_data = np.ma.masked_where(
                                masked_data < minVal, masked_data)
                        if maxVal != '':
                            masked_data = np.ma.masked_where(
                                masked_data > maxVal, masked_data)
                        if masked_data.count() > 0:
                            results = [
                                np.nansum(masked_data),
                                np.nanmin(masked_data),
                                np.nanmax(masked_data),
                                np.nanmean(masked_data)
                            ]
                        else:
                            results = [-1, -1, -1, -1]
                    else:
                        results = [
                            np.nansum(masked_data),
                            np.nanmin(masked_data),
                            np.nanmax(masked_data),
                            np.nanmean(masked_data)
                        ]
                if rastType == 'C':
                    if len(unqVals) > 0:
                        xx = dict(Counter(data.flatten()))
                        results = [xx.get(i, 0) for i in unqVals]
                    else:
                        results = np.unique(masked_data, return_counts=True)
                outputData.append(results)
            except Exception as e:
                if verbose:
                    print(e)
                if rastType == 'N':
                    outputData.append([-1, -1, -1, -1])
                else:
                    outputData.append([-1 for x in unqVals])
        except:
            print("Error processing %s" % fCount)
    return outputData
Exemplo n.º 6
0
def extract_data(inG, inG1, inG2, inL, inR):
    country_folder = os.path.join(output_folder, iso3)
    adm0_file = os.path.join(country_folder, "adm0.shp")
    adm1_file = os.path.join(country_folder, "adm1.shp")
    adm2_file = os.path.join(country_folder, "adm2.shp")
    lc_file = os.path.join(country_folder, "LC.tif")

    if not os.path.exists(country_folder):
        os.makedirs(country_folder)
    country_bounds = inG.loc[inG['ISO3'] == iso3].to_crs({'init': 'epsg:4326'})
    if not os.path.exists(adm0_file):
        country_bounds.to_file(adm0_file)
    if not os.path.exists(adm1_file):
        try:
            country_adm1 = inG1.loc[inG1['ISO3'] == iso3].to_crs(
                {'init': 'epsg:4326'})
            country_adm1.to_file(adm1_file)
        except:
            misc.tPrint("%s Could not extract ADMIN 1" % iso3)
    if not os.path.exists(adm2_file):
        try:
            country_adm2 = inG2.loc[inG2['ISO3'] == iso3].to_crs(
                {'init': 'epsg:4326'})
            country_adm2.to_file(adm2_file)
        except:
            misc.tPrint("%s Could not extract ADMIN 2" % iso3)
    if not os.path.exists(lc_file):
        rMisc.clipRaster(inL, gpd.read_file(adm0_file), lc_file)

    calculate_vulnerability(iso3, country_folder, country_bounds, pop_folder,
                            pop_files)
    misc.tPrint("***%s Calculated Vulnerability" % iso3)
    try:
        create_urban_data(iso3,
                          country_folder,
                          country_bounds,
                          inR,
                          calc_urban=False)
        misc.tPrint("***%s Calculated Urban Extents" % iso3)
    except:
        misc.tPrint("%s errored on HD clusters" % iso3)
        try:
            create_urban_data(iso3,
                              country_folder,
                              country_bounds,
                              inR,
                              calc_urban=True,
                              calc_hd_urban=False)
        except:
            misc.tPrint("%s errored on all clusters" % iso3)
Exemplo n.º 7
0
def zonalStats(inShp,
               inRaster,
               bandNum=1,
               reProj=False,
               minVal='',
               verbose=False,
               rastType='N',
               unqVals=[]):
    ''' Run zonal statistics against an input shapefile
    
    INPUT VARIABLES
    inShp [string] - path to input shapefile
    inRaster [string] - path to input raster
    
    OPTIONAL
    bandNum [integer] - band in raster to analyze
    reProj [boolean] -  whether to reproject data to match, if not, raise an error
    minVal [number] - if defined, will only calculation statistics on values above this number
    verbose [boolean] - whether to be loud with responses
    rastType [string N or C] - N is numeric and C is categorical. Categorical returns counts of numbers
    unqVals [array of numbers] - used in categorical zonal statistics, tabulates all these numbers, will report 0 counts
    
    RETURNS
    array of arrays, one for each feature in inShp
    '''

    outputData = []
    with rasterio.open(inRaster, 'r') as curRaster:
        inVector = gpd.read_file(inShp)
        if inVector.crs != curRaster.crs:
            if reProj:
                inVector = inVector.to_crs(curRaster.crs)
            else:
                raise ValueError("Input CRS do not match")
        fCount = 0
        tCount = len(inVector['geometry'])
        for geometry in inVector['geometry']:
            fCount = fCount + 1
            if fCount % 1000 == 0 and verbose:
                tPrint("Processing %s of %s" % (fCount, tCount))
            # get pixel coordinates of the geometry's bounding box
            ul = curRaster.index(*geometry.bounds[0:2])
            lr = curRaster.index(*geometry.bounds[2:4])

            # read the subset of the data into a numpy array
            window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]),
                                                         float(lr[1] + 1)))
            try:
                data = curRaster.read(bandNum, window=window)
                # create an affine transform for the subset data
                t = curRaster.transform
                shifted_affine = Affine(t.a, t.b, t.c + ul[1] * t.a, t.d, t.e,
                                        t.f + lr[0] * t.e)

                # rasterize the geometry
                mask = rasterize([(geometry, 0)],
                                 out_shape=data.shape,
                                 transform=shifted_affine,
                                 fill=1,
                                 all_touched=True,
                                 dtype=np.uint8)

                # create a masked numpy array
                masked_data = np.ma.array(data=data, mask=mask.astype(bool))
                if rastType == 'N':
                    if minVal != '':
                        masked_data = np.ma.masked_where(
                            masked_data < minVal, masked_data)
                        if masked_data.count() > 0:
                            results = [
                                masked_data.sum(),
                                masked_data.min(),
                                masked_data.max(),
                                masked_data.mean()
                            ]
                        else:
                            results = [-1, -1, -1, -1]
                    else:
                        results = [
                            masked_data.sum(),
                            masked_data.min(),
                            masked_data.max(),
                            masked_data.mean()
                        ]
                if rastType == 'C':
                    if len(unqVals) > 0:
                        xx = dict(Counter(data.flatten()))
                        results = [xx.get(i, 0) for i in unqVals]
                    else:
                        results = np.unique(masked_data, return_counts=True)
                outputData.append(results)
            except Exception as e:
                print(e)
                outputData.append([-1, -1, -1, -1])
    return outputData