Пример #1
0
def get_ts_from_complete_data(complete_data, mask, keys, dates=None):

    if keys == None:
        keys = complete_data.keys()

    common_dates = becgis.CommonDates([complete_data[key][1] for key in keys])
    becgis.AssertProjResNDV([complete_data[key][0] for key in keys])

    MASK = becgis.OpenAsArray(mask, nan_values=True)

    tss = dict()

    for key in keys:

        var_mm = np.array([])

        for date in common_dates:

            tif = complete_data[key][0][complete_data[key][1] == date][0]

            DATA = becgis.OpenAsArray(tif, nan_values=True)
            DATA[np.isnan(DATA)] = 0.0

            DATA[np.isnan(MASK)] = np.nan

            var_mm = np.append(var_mm, np.nanmean(DATA))

        tss[key] = (common_dates, var_mm)

    return tss
Пример #2
0
    def createTRENDMAP(self, varname):
        trend = self.calcTREND(self.VARS[varname])
        output_fh = os.path.join(self.OutFldr, '{0}.zip'.format(varname))
        
        self.factor = np.interp(self.Area, [50000, 625000000], [0.1, 1.0])
        scale = self.scale.multiply(self.factor).getInfo()

        geegis.downloadImage(trend, output_fh, self.CountryShape, scale)
        
        TREND = becgis.OpenAsArray(os.path.join(output_fh[:-4], 'test.long-trend.tif'))
        MASK = becgis.OpenAsArray(os.path.join(output_fh[:-4], 'test.max.tif'))
        PVALUE = becgis.OpenAsArray(os.path.join(output_fh[:-4], 'test.p-value.tif'))
        
        assert np.shape(TREND) == np.shape(MASK), "resolution dont match"
        
        TREND[MASK == 0] = np.nan
        TREND[TREND == 0.0000000000] = np.nan # need to fix this properly.
        TREND[PVALUE > 0.1] = np.nan

        PVALUE[PVALUE <= 0.1] = np.nan
        PVALUE[MASK == 0] = np.nan
        PVALUE[~np.isnan(PVALUE)] = 1.
        
        AREA = becgis.MapPixelAreakm(os.path.join(output_fh[:-4], 'test.max.tif'))
        self.PixelArea = np.mean(AREA[MASK == 1]) * 100 # ha
        
        self.Columns[varname]['TREND'] = TREND
        self.Columns[varname]['PVALUE'] = PVALUE
        
        ds = gdal.Open(os.path.join(output_fh[:-4], 'test.long-trend.tif'))
        gt = ds.GetGeoTransform()
        n_cols = ds.RasterXSize
        n_rows = ds.RasterYSize
        ds = None
        self.extent_ll = (gt[0], gt[0] + (gt[1] * n_cols), gt[3] + (gt[5] * n_rows), gt[3])
Пример #3
0
def root_zone_storage_Wpx(output_folder, rz_sm_fhs, rz_depth_fh):
    Data_Path_RZ = "RZstor"
    out_folder = os.path.join(output_folder, Data_Path_RZ)
    if not os.path.exists(out_folder):
        os.mkdir(out_folder)
    root_depth = becgis.OpenAsArray(rz_depth_fh, nan_values=True)
    geo = becgis.GetGeoInfo(rz_depth_fh)
    root_storage_fhs = []
    for rz_sm_fh in rz_sm_fhs:
        root_depth_sm = becgis.OpenAsArray(rz_sm_fh, nan_values=True)
        root_storage = root_depth * root_depth_sm
        out_fh = os.path.join(out_folder, 'RZ_storage_mm_%s' %(rz_sm_fh[-10:]))
        becgis.CreateGeoTiff(out_fh, root_storage, *geo)
        root_storage_fhs.append(out_fh)
    return root_storage_fhs
Пример #4
0
def calc_ETs(ET, lu_fh, sheet1_lucs):
    """
    Calculates the sums of the values within a specified landuse category.
    
    Parameters
    ----------
    ET : ndarray
        Array of the data for which the sum needs to be calculated.
    lu_fh : str
        Filehandle pointing to landusemap.
    sheet1_lucs : dict
        Dictionary with landuseclasses per category.
    
    Returns
    -------
    et : dict
        Dictionary with the totals per landuse category.
    """
    LULC = becgis.OpenAsArray(lu_fh, nan_values = True)
    et = dict()
    for key in sheet1_lucs:
        classes = sheet1_lucs[key]
        mask = np.logical_or.reduce([LULC == value for value in classes])
        et[key] = np.nansum(ET[mask])
    return et
Пример #5
0
def livestock_feed(output_folder, lu_fh, ndm_fhs, feed_dict, live_feed, cattle_fh, fraction_fhs, ndmdates):
    """
    Calculate natural livestock feed production

    INPUTS
    ----------
    lu_fh : str
        filehandle for land use map
    ndm_fhs: nd array
        array of filehandles of NDM maps
    ndm_dates: nd array
        array of dates for NDM maps
    feed_dict: dict
        dictionnary 'pasture class':[list of LULC]
    feed_pct: dict
        dictionnary 'pasture class':[percent available as feed]
    cattle_fh : str
        filehandle for cattle map
    """
    Data_Path_Feed = "Feed"
    out_folder = os.path.join(output_folder, Data_Path_Feed)
    if not os.path.exists(out_folder):
        os.mkdir(out_folder)

    area_ha = becgis.MapPixelAreakm(lu_fh) * 100
    LULC = RC.Open_tiff_array(lu_fh)
  #  cattle = RC.Open_tiff_array(cattle_fh)
    geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh)

    f_pct = np.zeros(LULC.shape)
    for lu_type in feed_dict.keys():
        classes = feed_dict[lu_type]
        mask = np.logical_or.reduce([LULC == value for value in classes])
        f_pct[mask] = live_feed[lu_type]
    feed_fhs_landscape = []
    feed_fhs_incremental = []
    for d in range(len(ndm_fhs)):
        ndm_fh = ndm_fhs[d]
        fraction_fh = fraction_fhs[d]
        date1 = ndmdates[d]
        year = '%d' %date1.year
        month = '%02d' %date1.month

        yield_fract = RC.Open_tiff_array(fraction_fh)

        out_fh_l = out_folder+'\\feed_prod_landscape_%s_%s.tif' %(year, month)
        out_fh_i = out_folder+'\\feed_prod_incremental_%s_%s.tif' %(year, month)
#        out_fh2 = out_folder+'\\Feed_prod_pH_%s_%s.tif' %(year, month)
        NDM = becgis.OpenAsArray(ndm_fh, nan_values=True)
        NDM_feed = NDM * f_pct
        NDM_feed_incremental = NDM_feed * yield_fract * area_ha/1e6
        NDM_feed_landscape = (NDM_feed *(1-yield_fract)) * area_ha/1e6
        DC.Save_as_tiff(out_fh_l, NDM_feed_landscape, geo_out)
        DC.Save_as_tiff(out_fh_i, NDM_feed_incremental, geo_out)
#        NDM_feed_perHead = NDM_feed / cattle
#        DC.Save_as_tiff(out_fh2, NDM_feed, geo_out)
        feed_fhs_landscape.append(out_fh_l)
        feed_fhs_incremental.append(out_fh_i)
    return feed_fhs_landscape, feed_fhs_incremental
Пример #6
0
def get_ts_from_complete_data_spec(complete_data, lu_fh, keys, a, dates=None):

    if keys == None:
        keys = complete_data.keys()

    common_dates = becgis.CommonDates([complete_data[key][1] for key in keys])
    becgis.AssertProjResNDV([complete_data[key][0] for key in keys])

    MASK = becgis.OpenAsArray(lu_fh, nan_values=True)

    lucs = lucs = gd.get_sheet4_6_classes()
    gw_classes = list()
    for subclass in [
            'Forests', 'Rainfed Crops', 'Shrubland', 'Forest Plantations'
    ]:
        gw_classes += lucs[subclass]
    mask_gw = np.logical_or.reduce([MASK == value for value in gw_classes])

    tss = dict()

    for key in keys:

        var_mm = np.array([])

        for date in common_dates:

            tif = complete_data[key][0][complete_data[key][1] == date][0]

            DATA = becgis.OpenAsArray(tif, nan_values=True)
            DATA[np.isnan(DATA)] = 0.0

            DATA[np.isnan(MASK)] = np.nan

            alpha = np.ones(np.shape(DATA)) * a

            alpha[mask_gw] = 0.0

            var_mm = np.append(var_mm, np.nanmean(DATA * alpha))

        tss[key] = (common_dates, var_mm)

    return tss
Пример #7
0
def get_timeseries_raster(ds1_fhs,
                          ds1_dates,
                          coordinates,
                          output_fh,
                          unit='m3/s'):
    """
    Substract a timeseries from a set of raster files. Store results in a csv-file.
    
    Parameters
    ----------
    ds1_fhs : 1dnarray
        List containing filehandles to georeferenced raster files.
    ds1_dates : 1dnarray
        List containing datetime.date or datetime.datetime objects corresponding
        to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs.
    coordinates : tuple
        Tuple with the latitude and longitude, (lat, lon).
    output_fh : str
        Filehandle pointing to a csv-file.
    unit : str, optional
        String indicating the unit of the data, default is 'm3/s'.
    """
    ds1_values = list()
    xpixel, ypixel = pixelcoordinates(coordinates[0], coordinates[1],
                                      ds1_fhs[0])

    if np.any([np.isnan(xpixel), np.isnan(ypixel)]):
        print "Coordinates ({0}) not on the map".format(coordinates)
    else:
        for date in ds1_dates:
            ds1_values.append(
                becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0],
                                   nan_values=True)[ypixel, xpixel])

        ds1_values = np.array(ds1_values)

        csv_file = open(output_fh, 'wb')
        writer = csv.writer(csv_file, delimiter=';')

        writer.writerow(['lat:', coordinates[0], 'lon:', coordinates[1], unit])
        writer.writerow(['datetime', 'year', 'month', 'day', 'data'])

        for date in ds1_dates:

            year = date.year
            month = date.month
            day = date.day

            dt = datetime.datetime(year, month, day, 0, 0, 0)
            data = ds1_values[ds1_dates == date][0]
            writer.writerow([dt, year, month, day, data])

        csv_file.close()
Пример #8
0
def lu_type_sum(data_fh, lu_fh, lu_dict, convert=None):
    LULC = RC.Open_tiff_array(lu_fh)
    in_data = becgis.OpenAsArray(data_fh, nan_values=True)
#    in_data = RC.Open_tiff_array(data_fh)
    if convert == 'mm_to_km3':
        AREA = becgis.MapPixelAreakm(data_fh)
        in_data *= AREA / 1e6
    out_data = {}
    for lu_class in lu_dict.keys():
        mask = [LULC == value for value in lu_dict[lu_class]]
        mask = (np.sum(mask, axis=0)).astype(bool)
        out_data[lu_class] = np.nansum(in_data[mask])
    return out_data
Пример #9
0
def fuel_wood(output_folder, lu_fh, ndm_fhs, fraction_fhs, ndmdates):
    """
    Calculate natural livestock feed production

    INPUTS
    ----------
    lu_fh : str
        filehandle for land use map
    ndm_fhs: nd array
        array of filehandles of NDM maps
    abv_grnd_biomass_ratio: dict
        dictionnary 'LULC':[above ground biomass]
    """
    Data_Path_Fuel = "Fuel"
    out_folder = os.path.join(output_folder, Data_Path_Fuel)
    if not os.path.exists(out_folder):
        os.mkdir(out_folder)

    area_ha = becgis.MapPixelAreakm(lu_fh) * 100
    LULC = RC.Open_tiff_array(lu_fh)
    geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh)

    fuel_classes = [1, 8, 9, 10, 11, 12, 13]
    fuel_mask = np.zeros(LULC.shape)
    for fc in fuel_classes:
        fuel_mask[np.where(LULC == fc)] = 1

    fuel_fhs_landscape = []
    fuel_fhs_incremental = []

    for d in range(len(ndm_fhs)):
        ndm_fh = ndm_fhs[d]
        fraction_fh = fraction_fhs[d]
        yield_fract = RC.Open_tiff_array(fraction_fh)
        date1 = ndmdates[d]
        year = '%d' %date1.year
        month = '%02d' %date1.month
#        year = ndm_fh[-14:-10]
#        month = ndm_fh[-9:-7]
        out_fh_l = out_folder+'\\fuel_prod_landscape_%s_%s.tif' %(year, month)
        out_fh_i = out_folder+'\\fuel_prod_incremental_%s_%s.tif' %(year, month)
        NDM = becgis.OpenAsArray(ndm_fh, nan_values=True)

        NDM_fuel_incremental = NDM * .05 * fuel_mask * yield_fract * area_ha/1e6
        NDM_fuel_landscape = NDM  * .05 * fuel_mask *(1-yield_fract) * area_ha/1e6
        DC.Save_as_tiff(out_fh_i, NDM_fuel_incremental, geo_out)
        DC.Save_as_tiff(out_fh_l, NDM_fuel_landscape, geo_out)
        fuel_fhs_landscape.append(out_fh_l)
        fuel_fhs_incremental.append(out_fh_i)

    return fuel_fhs_landscape, fuel_fhs_incremental
Пример #10
0
def recycle(output_folder, et_bg_fhs, recy_ratio, lu_fh, et_type):
    Data_Path_rec = "temp_et_recycle"
    out_folder = os.path.join(output_folder, Data_Path_rec)
    geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh)
    if not os.path.exists(out_folder):
        os.mkdir(out_folder)
    recycle_fhs = []
    for et_fh in et_bg_fhs:
        out_fh = out_folder + "\\recycled_et_"+et_type+et_fh[-11:-4]+".tif"
        et = becgis.OpenAsArray(et_fh, nan_values=True)
        et_recy = et*recy_ratio
        DC.Save_as_tiff(out_fh, et_recy, geo_out)
        recycle_fhs.append(out_fh)
    return recycle_fhs
Пример #11
0
def correct_var(metadata,
                complete_data,
                output_dir,
                formula,
                new_var,
                slope=False,
                bounds=(0, [1.0, 1., 12.])):

    var = split_form(formula)[0][-1]

    a, x0 = calc_var_correction(metadata,
                                complete_data,
                                output_dir,
                                formula=formula,
                                slope=slope,
                                plot=True,
                                bounds=bounds)

    for date, fn in zip(complete_data[var][1], complete_data[var][0]):

        geo_info = becgis.GetGeoInfo(fn)

        data = becgis.OpenAsArray(fn, nan_values=True)

        x = calc_delta_months(x0, date)

        fraction = a[0] * (np.cos(
            (x - a[2]) * (np.pi / 6)) * 0.5 + 0.5) + (a[1] * (1 - a[0]))

        data *= fraction

        folder = os.path.join(output_dir, metadata['name'], 'data', new_var)

        if not os.path.exists(folder):
            os.makedirs(folder)

        bla = os.path.split(fn)[1].split('_')[-1]
        filen = 'supply_sw_' + bla[0:4] + '_' + bla[4:6] + '.tif'
        fn = os.path.join(folder, filen)

        becgis.CreateGeoTiff(fn, data, *geo_info)

    meta = becgis.SortFiles(folder, [-11, -7], month_position=[-6, -4])[0:2]
    return a, meta
Пример #12
0
def calc_non_utilizable(P, ET, fractions_fh):
    """
    Calculate non utilizable outflow.
    
    Parameters
    ----------
    P : ndarray
        Array with the volumes of precipitation per pixel.
    ET : ndarray
        Array with the volumes of evapotranspiration per pixel.
    fractions_fh : str
        Filehandle pointing to a map with fractions indicating how much of the
        (P-ET) difference is non-utilizable.
    
    Returns
    -------
    non_utilizable_runoff : float
        The total volume of non_utilizable runoff.
    """
    fractions = becgis.OpenAsArray(fractions_fh, nan_values = True)
    non_utilizable_runoff = np.nansum((P - ET) * fractions)
    return non_utilizable_runoff
Пример #13
0
def calc_sheet1(entries, lu_fh, sheet1_lucs, recycling_ratio, q_outflow, q_out_avg, 
                output_folder, q_in_sw, q_in_gw = 0., q_in_desal = 0., q_out_sw = 0., q_out_gw = 0.):
    """
    Calculate the required values to plot Water Accounting Plus Sheet 1.
    
    Parameters
    ----------
    entries : dict
        Dictionary with several filehandles, also see examples below.
    lu_fh : str
        Filehandle pointing to the landuse map.
    sheet1_lucs : dict
        Dictionary sorting different landuse classes into categories.
    recycling_ratio : float
        Value indicating the recycling ratio.
    q_outflow : float
        The outflow of the basin.
    q_out_avg : float
        The longterm average outflow.
    output_folder : str
        Folder to store results.
    q_in_sw : float, optional
        Surfacewater inflow into the basin. Default is 0.0.
    q_in_gw : float, optional
        Groundwater inflow into the basin. Default is 0.0.
    q_in_desal : float, optional
        Desalinised water inflow into the basin. Default is 0.0.
    q_out_sw : float, optional
        Additional surfacewater outflow from basin. Default is 0.0.
    q_out_gw : float, optional
        Groundwater outflow from the basin. Default is 0.0.
        
    Returns
    -------
    results : dict
        Dictionary containing necessary variables for Sheet 1.
    """
    results = dict()
    
    LULC = becgis.OpenAsArray(lu_fh, nan_values = True)
    P = becgis.OpenAsArray(entries['P'], nan_values = True)
    ETgreen = becgis.OpenAsArray(entries['ETgreen'], nan_values = True)
    ETblue = becgis.OpenAsArray(entries['ETblue'], nan_values = True)
    
    pixel_area = becgis.MapPixelAreakm(lu_fh)

    gray_water_fraction = becgis.calc_basinmean(entries['WPL'], lu_fh)
    ewr_percentage = becgis.calc_basinmean(entries['EWR'], lu_fh)
    
    P[np.isnan(LULC)] = ETgreen[np.isnan(LULC)] = ETblue[np.isnan(LULC)] = np.nan
    P, ETgreen, ETblue = np.array([P, ETgreen, ETblue]) * 0.000001 * pixel_area
    
    ET = np.nansum([ETblue, ETgreen], axis = 0)
    
    results['et_advection'], results['p_advection'], results['p_recycled'], results['dS'] = calc_wb(P, ET, q_outflow, recycling_ratio, 
           q_in_sw = q_in_sw, q_in_gw = q_in_gw, q_in_desal = q_in_desal, q_out_sw = q_out_sw, q_out_gw = q_out_gw)

    results['non_recoverable'] = gray_water_fraction * (q_outflow + q_out_sw) # Mekonnen and Hoekstra (2015), Global Gray Water Footprint and Water Pollution Levels Related to Anthropogenic Nitrogen Loads to Fresh Water
    results['reserved_outflow_demand'] = q_out_avg * ewr_percentage
    
    results['other'] = 0.0
    
    landscape_et = calc_ETs(ETgreen, lu_fh, sheet1_lucs)
    incremental_et = calc_ETs(ETblue, lu_fh, sheet1_lucs)
    
    results['manmade'] = incremental_et['Managed']
    results['natural'] = incremental_et['Modified'] + incremental_et['Protected'] + incremental_et['Utilized']    
    
    other_fractions = {'Modified': 0.00,
                       'Managed':  1.00,
                       'Protected':0.00,
                       'Utilized': 0.00}    
                       
    non_recoverable_fractions = {'Modified': 0.00,
                                 'Managed':  1.00,
                                 'Protected':0.00,
                                 'Utilized': 0.00}  
                                 
    results['uf_plu'], results['uf_ulu'], results['uf_mlu'], results['uf_mwu'] = calc_utilizedflow(incremental_et, results['other'], results['non_recoverable'], other_fractions, non_recoverable_fractions)
   
    net_inflow = results['p_recycled'] + results['p_advection'] + q_in_sw + q_in_gw + q_in_desal + results['dS'] 
    consumed_water = np.nansum(landscape_et.values()) + np.nansum(incremental_et.values()) + results['other'] + results['non_recoverable']
    non_consumed_water = net_inflow - consumed_water
    
    results['non_utilizable_outflow'] = min(non_consumed_water, max(0.0, calc_non_utilizable(P, ET, entries['Fractions'])))
    results['reserved_outflow_actual'] = min(non_consumed_water - results['non_utilizable_outflow'], results['reserved_outflow_demand'])
    results['utilizable_outflow'] = max(0.0, non_consumed_water - results['non_utilizable_outflow'] - results['reserved_outflow_actual'])
    
    results['landscape_et_mwu'] = landscape_et['Managed']
    results['landscape_et_mlu'] = landscape_et['Modified']
    results['landscape_et_ulu'] = landscape_et['Utilized']
    results['landscape_et_plu'] = landscape_et['Protected']
    results['q_outflow'] = q_outflow
    results['q_in_sw'] = q_in_sw
    results['q_in_gw'] = q_in_gw
    results['q_in_desal'] = q_in_desal
    results['q_out_sw'] = q_out_sw
    results['q_out_gw'] = q_out_gw
    
    return results
Пример #14
0
def compare_rasters2rasters_per_lu(ds1_fhs,
                                   ds1_dates,
                                   ds2_fhs,
                                   ds2_dates,
                                   lu_fh,
                                   output_dir,
                                   dataset_names=["DS1", "DS2"],
                                   class_dictionary=None,
                                   no_of_classes=6):
    """
    Compare two raster datasets with eachother per different landuse categories.
    
    Parameters
    ----------
    ds1_fhs : ndarray
        Array with strings pointing to maps of dataset 1.
    ds1_dates : ndarray
        Array with same shape as ds1_fhs, containing datetime.date objects.
    ds2_fhs : ndarray
        Array with strings pointing to maps of dataset 2.
    ds2_dates : ndarray
        Array with same shape as ds2_fhs, containing datetime.date objects.
    lu_fh : str
        Pointer to a landusemap.
    output_dir : str
        Map to save results.
    dataset_names : list, optional
        List with two strings describing the names of the two datasets.
    class_dictionary : dict
        Dictionary specifying all the landuse categories.
    no_of_classes : int
        The 'no_of_classes' most dominant classes in the the lu_fh are compared, the rest is ignored.
    
    """
    LUCS = becgis.OpenAsArray(lu_fh, nan_values=True)
    DS1 = becgis.OpenAsArray(ds1_fhs[0], nan_values=True)
    DS2 = becgis.OpenAsArray(ds2_fhs[0], nan_values=True)

    DS1[np.isnan(DS2)] = np.nan
    LUCS[np.isnan(DS1)] = np.nan

    classes, counts = np.unique(LUCS[~np.isnan(LUCS)], return_counts=True)
    counts_sorted = np.sort(counts)[-no_of_classes:]
    selected_lucs = [
        classes[counts == counter][0] for counter in counts_sorted
    ]

    driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(lu_fh)
    becgis.CreateGeoTiff(lu_fh.replace('.tif', '_.tif'), LUCS, driver, NDV,
                         xsize, ysize, GeoT, Projection)

    common_dates = becgis.CommonDates([ds1_dates, ds2_dates])

    ds1_totals = np.array([])
    ds2_totals = np.array([])

    DS1_per_class = dict()
    DS2_per_class = dict()

    for date in common_dates:

        DS1 = becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0],
                                 nan_values=True)
        DS2 = becgis.OpenAsArray(ds2_fhs[ds2_dates == date][0],
                                 nan_values=True)

        for clss in selected_lucs:

            if clss in DS1_per_class.keys():
                DS1_per_class[clss] = np.append(DS1_per_class[clss],
                                                np.nanmean(DS1[LUCS == clss]))
            else:
                DS1_per_class[clss] = np.array([np.nanmean(DS1[LUCS == clss])])

            if clss in DS2_per_class.keys():
                DS2_per_class[clss] = np.append(DS2_per_class[clss],
                                                np.nanmean(DS2[LUCS == clss]))
            else:
                DS2_per_class[clss] = np.array([np.nanmean(DS2[LUCS == clss])])

        ds1_totals = np.append(ds1_totals, np.nanmean(DS1))
        ds2_totals = np.append(ds2_totals, np.nanmean(DS2))

        print("Finished {0}, going to {1}".format(date, common_dates[-1]))

    for clss in selected_lucs:

        if class_dictionary is None:
            plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss],
                                dataset_names[0], dataset_names[1], clss,
                                output_dir)
        else:
            cats = {v[0]: k for k, v in class_dictionary.iteritems()}
            plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss],
                                dataset_names[0], dataset_names[1], cats[clss],
                                output_dir)

    plot_scatter_series(ds1_totals, ds2_totals, dataset_names[0],
                        dataset_names[1], "Total Area", output_dir)

    if class_dictionary is not None:
        output_fh = os.path.join(output_dir, 'landuse_percentages.png')
        driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(lu_fh)
        becgis.CreateGeoTiff(lu_fh.replace('.tif', '_.tif'), LUCS, driver, NDV,
                             xsize, ysize, GeoT, Projection)
        becgis.plot_category_areas(lu_fh.replace('.tif', '_.tif'),
                                   class_dictionary,
                                   output_fh,
                                   area_treshold=0.01)
        os.remove(lu_fh.replace('.tif', '_.tif'))
Пример #15
0
def compare_rasters2rasters(ds1_fhs,
                            ds1_dates,
                            ds2_fhs,
                            ds2_dates,
                            output_dir=None,
                            dataset_names=None,
                            data_treshold=0.75):
    """ 
    Compare two series of raster maps by computing
    the relative bias, RMAE, Pearson-correlation coefficient and
    the Nash-Sutcliffe coefficient per pixel.
    
    Parameters
    ----------
    ds1_fhs : list
        list pointing to georeferenced raster files of dataset 1.
    ds1_dates : list
        list corresponding to ds1_fhs specifying the dates.
    ds2_fhs : list
        list pointing to georeferenced raster files of dataset 2.
    ds2_dates : list
        list corresponding to ds2_fhs specifying the dates.
    quantity_unit  : list, optional
        list of two strings describing the quantity and unit of the data. e.g. ['Precipitation', 'mm/month'].
    dataset_names : list, optional
        list of strings describing the names of the datasets. e.g. ['CHIRPS', 'ERA-I'].
    output_dir : list, optional
        directory to store some results, i.e. (1) a graph of the spatially averaged datasets trough time and the
        bias and (2) 4 geotiffs showing the bias, nash-sutcliffe coefficient, pearson coefficient and rmae per pixel.
    data_treshold : float, optional
        pixels with less than data_treshold * total_number_of_samples actual values are set to no-data, i.e. pixels with
        too few data points are ignored.
        
    Returns
    -------
    results : dict
        dictionary with four keys (relative bias, RMAE, Pearson-correlation coefficient and 
        the Nash-Sutcliffe) with 2dnarrays of the values per pixel.
        
    Examples
    --------
    >>> results = compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, 
                                          output_dir = r"C:/Desktop/", quantity_unit = ["P", "mm/month"], 
                                          dataset_names = ["CHIRPS", "TRMM"])
    """
    becgis.AssertProjResNDV([ds1_fhs, ds2_fhs])

    if dataset_names is None:
        dataset_names = ['DS1', 'DS2']

    driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(ds1_fhs[0])

    common_dates = becgis.CommonDates([ds1_dates, ds2_dates])

    diff_sum = np.zeros((ysize, xsize))
    non_nans = np.zeros((ysize, xsize))

    progress = 0
    samples = len(common_dates)

    for date in common_dates:

        DS1 = becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0],
                                 nan_values=True)
        DS2 = becgis.OpenAsArray(ds2_fhs[ds2_dates == date][0],
                                 nan_values=True)

        DS1[np.isnan(DS2)] = np.nan
        DS2[np.isnan(DS1)] = np.nan

        non_nans[~np.isnan(DS1)] += np.ones((ysize, xsize))[~np.isnan(DS1)]

        diff = (DS1 - DS2)**2
        diff_sum[~np.isnan(DS1)] += diff[~np.isnan(DS1)]

        progress += 1
        print "progress: {0} of {1} finished".format(progress, samples)

    diff_sum[non_nans <= data_treshold * samples] = np.nan
    results = dict()
    results['rmse'] = np.where(non_nans == 0., np.nan,
                               np.sqrt(diff_sum / non_nans))

    startdate = common_dates[0].strftime('%Y%m%d')
    enddate = common_dates[-1].strftime('%Y%m%d')

    path = os.path.join(output_dir, 'spatial_errors')
    if not os.path.exists(path):
        os.makedirs(path)

    if output_dir is not None:
        for varname in results.keys():
            fh = os.path.join(
                path,
                '{0}_{1}_vs_{2}_{3}_{4}.tif'.format(varname, dataset_names[0],
                                                    dataset_names[1],
                                                    startdate, enddate))
            becgis.CreateGeoTiff(fh, results[varname], driver, NDV, xsize,
                                 ysize, GeoT, Projection)

    return results
Пример #16
0
def compare_rasters2stations(ds1_fhs,
                             ds1_dates,
                             station_dict,
                             output_dir,
                             station_names=None,
                             quantity_unit=None,
                             dataset_names=None,
                             method='cubic',
                             min_records=1):
    """
    Compare a series of raster maps with station time series by computing
    the relative bias, RMAE, Pearson-correlation coefficient and 
    the Nash-Sutcliffe coefficient for each station.
    
    Parameters
    ----------
    ds1_fhs : 1dnarray
        List containing filehandles to georeferenced raster files.
    ds1_dates : 1dnarray
        List containing datetime.date or datetime.datetime objects corresponding
        to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs.
    station_dict : dictionary
        Dictionary containing coordinates of stations and timeseries. See examples
        below for an example
    output_dir : str, optional
        Directory to store several results, i.e. (1) a csv file to load in a GIS program, 
        (2) interpolated maps showing the various error indicators spatially and (3)
        scatter plots for all the stations.
    station_names : dictionary, optional
        Dictionary containing names of the respective stations which can be added to the csv-file, see
        Examples for more information.
    quantity_unit : list, optional
        List of two strings describing the quantity and unit of the data.
    dataset_name : list, optional
        List of strings describing the names of the datasets.
    method : str, optional
        Method used for interpolation of the error-indicators, i.e.: 'linear', 'nearest' or 'cubic' (default).
    
    Returns
    -------
    results : dictionary
        Dictionary containing several error indicators per station.

    Examples
    --------
    
    >>> station_dict = {(lat1, lon1): [(datetime.date(year, month, day), data_value), 
                                       (datetime.date(year, month, day), data_value), 
                                        etc.],
                        (lat2, lon2): [(datetime.date(year, month, day), data_value), 
                                       (datetime.date(year, month, day), data_value), 
                                        etc.],
                         etc.}
                    
    >>> station_names = {(lat1,lon1): 'stationname1', (lat2,lon2): 'stationname2', etc.}
    
    >>> results = compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir = r"C:/Desktop",
                                station_names = None, quantity_unit = ["P", "mm/month"], 
                                dataset_names = ["CHIRPS", "Meteo Stations"], 
                                method = 'cubic')
    """
    results = dict()
    pixel_coordinates = list()

    if dataset_names is None:
        dataset_names = ['Spatial', 'Station']
    if quantity_unit is not None:
        quantity_unit[1] = r'[' + quantity_unit[1] + r']'
    else:
        quantity_unit = ['data', '']

    becgis.AssertProjResNDV([ds1_fhs])
    no_of_stations = len(station_dict.keys())
    ds1_dates = becgis.ConvertDatetimeDate(ds1_dates, out='datetime')

    for i, station in enumerate(station_dict.keys()):

        station_dates, station_values = unzip(station_dict[station])
        common_dates = becgis.CommonDates([ds1_dates, station_dates])
        sample_size = common_dates.size

        if sample_size >= min_records:
            ds1_values = list()
            xpixel, ypixel = pixelcoordinates(station[0], station[1],
                                              ds1_fhs[0])

            if np.any([np.isnan(xpixel), np.isnan(ypixel)]):
                print "Skipping station ({0}), cause its not on the map".format(
                    station)
                continue
            else:
                for date in common_dates:
                    ds1_values.append(
                        becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0],
                                           nan_values=True)[ypixel, xpixel])

                common_station_values = [
                    station_values[station_dates == date][0]
                    for date in common_dates
                ]

                results[station] = pairwise_validation(ds1_values,
                                                       common_station_values)
                results[station] += (sample_size, )

                pixel_coordinates.append((xpixel, ypixel))
                #m, b = np.polyfit(ds1_values, common_station_values, 1)

                path_scatter = os.path.join(output_dir, 'scatter_plots')
                if not os.path.exists(path_scatter):
                    os.makedirs(path_scatter)

                path_ts = os.path.join(output_dir, 'time_series')
                if not os.path.exists(path_ts):
                    os.makedirs(path_ts)

                path_int = os.path.join(output_dir, 'interp_errors')
                if not os.path.exists(path_int):
                    os.makedirs(path_int)

                xlabel = '{0} {1} {2}'.format(dataset_names[0],
                                              quantity_unit[0],
                                              quantity_unit[1])
                ylabel = '{0} {1} {2}'.format(dataset_names[1],
                                              quantity_unit[0],
                                              quantity_unit[1])
                if station_names is not None:
                    title = station_names[station]
                    fn = os.path.join(
                        path_scatter,
                        '{0}_vs_{1}.png'.format(station_names[station],
                                                dataset_names[0]))
                    fnts = os.path.join(
                        path_ts,
                        '{0}_vs_{1}.png'.format(station_names[station],
                                                dataset_names[0]))
                else:
                    title = station
                    fn = os.path.join(
                        path_scatter,
                        '{0}_vs_station_{1}.png'.format(dataset_names[0], i))
                    fnts = os.path.join(
                        path_ts,
                        '{0}_vs_station_{1}.png'.format(dataset_names[0], i))
                suptitle = 'pearson: {0:.5f}, rmse: {1:.5f}, ns: {2:.5f}, bias: {3:.5f}, n: {4:.0f}'.format(
                    results[station][0], results[station][1],
                    results[station][2], results[station][3],
                    results[station][4])
                plot_scatter_series(ds1_values,
                                    common_station_values,
                                    xlabel,
                                    ylabel,
                                    title,
                                    fn,
                                    suptitle=suptitle,
                                    dates=common_dates)

                xaxis_label = '{0} {1}'.format(quantity_unit[0],
                                               quantity_unit[1])
                xlabel = '{0}'.format(dataset_names[0])
                ylabel = '{0}'.format(dataset_names[1])
                plot_time_series(ds1_values,
                                 common_station_values,
                                 common_dates,
                                 xlabel,
                                 ylabel,
                                 xaxis_label,
                                 title,
                                 fnts,
                                 suptitle=suptitle)

                print "station {0} ({3}) of {1} finished ({2} matching records)".format(
                    i + 1, no_of_stations, sample_size, title)
        else:
            print "____station {0} of {1} skipped____ (less than {2} matching records)".format(
                i + 1, no_of_stations, min_records)
            continue

    n = len(results)
    csv_filename = os.path.join(
        output_dir,
        '{0}stations_vs_{1}_indicators.csv'.format(n, dataset_names[0]))
    with open(csv_filename, 'wb') as csv_file:
        writer = csv.writer(csv_file, delimiter=';')
        writer.writerow([
            'longitude', 'latitude', 'station_id', 'pearson', 'rmse',
            'nash_sutcliffe', 'bias', 'no_of_samples'
        ])
        for station in results.keys():
            writer.writerow([
                station[1], station[0], station_names[station],
                results[station][0], results[station][1], results[station][2],
                results[station][3], results[station][4]
            ])

    rslt = {
        'Relative Bias': list(),
        'RMSE': list(),
        'Pearson Coefficient': list(),
        'Nash-Sutcliffe Coefficient': list(),
        'Number Of Samples': list()
    }

    for value in results.values():
        rslt['Relative Bias'].append(value[3])
        rslt['RMSE'].append(value[1])
        rslt['Pearson Coefficient'].append(value[0])
        rslt['Nash-Sutcliffe Coefficient'].append(value[2])
        rslt['Number Of Samples'].append(value[4])

    for key, value in rslt.items():
        title = '{0}'.format(key)
        print title
        if key is 'RMSE':
            xlabel = '{0} [mm/month]'.format(key)
        else:
            xlabel = key
        value = np.array(value)
        value = value[(~np.isnan(value)) & (~np.isinf(value))]
        suptitle = 'mean: {0:.5f}, std: {1:.5f}, n: {2}'.format(
            np.nanmean(value), np.nanstd(value), n)
        print value
        plot_histogram(value[(~np.isnan(value)) & (~np.isinf(value))],
                       title,
                       xlabel,
                       output_dir,
                       suptitle=suptitle)

    driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(ds1_fhs[0])
    dummy_map = becgis.OpenAsArray(ds1_fhs[0])
    grid = np.mgrid[0:ysize, 0:xsize]
    var_names = ['pearson', 'rmse', 'ns', 'bias', 'no_of_samples']

    for i, var in enumerate(unzip(results.values())):
        xy = np.array(pixel_coordinates)[~np.isnan(var)]
        z = var[~np.isnan(var)]
        interpolation_field = interpolate.griddata(xy,
                                                   z, (grid[1], grid[0]),
                                                   method=method,
                                                   fill_value=np.nanmean(z))
        interpolation_field[dummy_map == NDV] = NDV
        fh = os.path.join(
            path_int,
            '{0}_{1}stations_vs_{2}.tif'.format(var_names[i], len(xy),
                                                dataset_names[0]))
        becgis.CreateGeoTiff(fh, interpolation_field, driver, NDV, xsize,
                             ysize, GeoT, Projection)

    return results