def get_ts_from_complete_data(complete_data, mask, keys, dates=None): if keys == None: keys = complete_data.keys() common_dates = becgis.CommonDates([complete_data[key][1] for key in keys]) becgis.AssertProjResNDV([complete_data[key][0] for key in keys]) MASK = becgis.OpenAsArray(mask, nan_values=True) tss = dict() for key in keys: var_mm = np.array([]) for date in common_dates: tif = complete_data[key][0][complete_data[key][1] == date][0] DATA = becgis.OpenAsArray(tif, nan_values=True) DATA[np.isnan(DATA)] = 0.0 DATA[np.isnan(MASK)] = np.nan var_mm = np.append(var_mm, np.nanmean(DATA)) tss[key] = (common_dates, var_mm) return tss
def createTRENDMAP(self, varname): trend = self.calcTREND(self.VARS[varname]) output_fh = os.path.join(self.OutFldr, '{0}.zip'.format(varname)) self.factor = np.interp(self.Area, [50000, 625000000], [0.1, 1.0]) scale = self.scale.multiply(self.factor).getInfo() geegis.downloadImage(trend, output_fh, self.CountryShape, scale) TREND = becgis.OpenAsArray(os.path.join(output_fh[:-4], 'test.long-trend.tif')) MASK = becgis.OpenAsArray(os.path.join(output_fh[:-4], 'test.max.tif')) PVALUE = becgis.OpenAsArray(os.path.join(output_fh[:-4], 'test.p-value.tif')) assert np.shape(TREND) == np.shape(MASK), "resolution dont match" TREND[MASK == 0] = np.nan TREND[TREND == 0.0000000000] = np.nan # need to fix this properly. TREND[PVALUE > 0.1] = np.nan PVALUE[PVALUE <= 0.1] = np.nan PVALUE[MASK == 0] = np.nan PVALUE[~np.isnan(PVALUE)] = 1. AREA = becgis.MapPixelAreakm(os.path.join(output_fh[:-4], 'test.max.tif')) self.PixelArea = np.mean(AREA[MASK == 1]) * 100 # ha self.Columns[varname]['TREND'] = TREND self.Columns[varname]['PVALUE'] = PVALUE ds = gdal.Open(os.path.join(output_fh[:-4], 'test.long-trend.tif')) gt = ds.GetGeoTransform() n_cols = ds.RasterXSize n_rows = ds.RasterYSize ds = None self.extent_ll = (gt[0], gt[0] + (gt[1] * n_cols), gt[3] + (gt[5] * n_rows), gt[3])
def root_zone_storage_Wpx(output_folder, rz_sm_fhs, rz_depth_fh): Data_Path_RZ = "RZstor" out_folder = os.path.join(output_folder, Data_Path_RZ) if not os.path.exists(out_folder): os.mkdir(out_folder) root_depth = becgis.OpenAsArray(rz_depth_fh, nan_values=True) geo = becgis.GetGeoInfo(rz_depth_fh) root_storage_fhs = [] for rz_sm_fh in rz_sm_fhs: root_depth_sm = becgis.OpenAsArray(rz_sm_fh, nan_values=True) root_storage = root_depth * root_depth_sm out_fh = os.path.join(out_folder, 'RZ_storage_mm_%s' %(rz_sm_fh[-10:])) becgis.CreateGeoTiff(out_fh, root_storage, *geo) root_storage_fhs.append(out_fh) return root_storage_fhs
def calc_ETs(ET, lu_fh, sheet1_lucs): """ Calculates the sums of the values within a specified landuse category. Parameters ---------- ET : ndarray Array of the data for which the sum needs to be calculated. lu_fh : str Filehandle pointing to landusemap. sheet1_lucs : dict Dictionary with landuseclasses per category. Returns ------- et : dict Dictionary with the totals per landuse category. """ LULC = becgis.OpenAsArray(lu_fh, nan_values = True) et = dict() for key in sheet1_lucs: classes = sheet1_lucs[key] mask = np.logical_or.reduce([LULC == value for value in classes]) et[key] = np.nansum(ET[mask]) return et
def livestock_feed(output_folder, lu_fh, ndm_fhs, feed_dict, live_feed, cattle_fh, fraction_fhs, ndmdates): """ Calculate natural livestock feed production INPUTS ---------- lu_fh : str filehandle for land use map ndm_fhs: nd array array of filehandles of NDM maps ndm_dates: nd array array of dates for NDM maps feed_dict: dict dictionnary 'pasture class':[list of LULC] feed_pct: dict dictionnary 'pasture class':[percent available as feed] cattle_fh : str filehandle for cattle map """ Data_Path_Feed = "Feed" out_folder = os.path.join(output_folder, Data_Path_Feed) if not os.path.exists(out_folder): os.mkdir(out_folder) area_ha = becgis.MapPixelAreakm(lu_fh) * 100 LULC = RC.Open_tiff_array(lu_fh) # cattle = RC.Open_tiff_array(cattle_fh) geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh) f_pct = np.zeros(LULC.shape) for lu_type in feed_dict.keys(): classes = feed_dict[lu_type] mask = np.logical_or.reduce([LULC == value for value in classes]) f_pct[mask] = live_feed[lu_type] feed_fhs_landscape = [] feed_fhs_incremental = [] for d in range(len(ndm_fhs)): ndm_fh = ndm_fhs[d] fraction_fh = fraction_fhs[d] date1 = ndmdates[d] year = '%d' %date1.year month = '%02d' %date1.month yield_fract = RC.Open_tiff_array(fraction_fh) out_fh_l = out_folder+'\\feed_prod_landscape_%s_%s.tif' %(year, month) out_fh_i = out_folder+'\\feed_prod_incremental_%s_%s.tif' %(year, month) # out_fh2 = out_folder+'\\Feed_prod_pH_%s_%s.tif' %(year, month) NDM = becgis.OpenAsArray(ndm_fh, nan_values=True) NDM_feed = NDM * f_pct NDM_feed_incremental = NDM_feed * yield_fract * area_ha/1e6 NDM_feed_landscape = (NDM_feed *(1-yield_fract)) * area_ha/1e6 DC.Save_as_tiff(out_fh_l, NDM_feed_landscape, geo_out) DC.Save_as_tiff(out_fh_i, NDM_feed_incremental, geo_out) # NDM_feed_perHead = NDM_feed / cattle # DC.Save_as_tiff(out_fh2, NDM_feed, geo_out) feed_fhs_landscape.append(out_fh_l) feed_fhs_incremental.append(out_fh_i) return feed_fhs_landscape, feed_fhs_incremental
def get_ts_from_complete_data_spec(complete_data, lu_fh, keys, a, dates=None): if keys == None: keys = complete_data.keys() common_dates = becgis.CommonDates([complete_data[key][1] for key in keys]) becgis.AssertProjResNDV([complete_data[key][0] for key in keys]) MASK = becgis.OpenAsArray(lu_fh, nan_values=True) lucs = lucs = gd.get_sheet4_6_classes() gw_classes = list() for subclass in [ 'Forests', 'Rainfed Crops', 'Shrubland', 'Forest Plantations' ]: gw_classes += lucs[subclass] mask_gw = np.logical_or.reduce([MASK == value for value in gw_classes]) tss = dict() for key in keys: var_mm = np.array([]) for date in common_dates: tif = complete_data[key][0][complete_data[key][1] == date][0] DATA = becgis.OpenAsArray(tif, nan_values=True) DATA[np.isnan(DATA)] = 0.0 DATA[np.isnan(MASK)] = np.nan alpha = np.ones(np.shape(DATA)) * a alpha[mask_gw] = 0.0 var_mm = np.append(var_mm, np.nanmean(DATA * alpha)) tss[key] = (common_dates, var_mm) return tss
def get_timeseries_raster(ds1_fhs, ds1_dates, coordinates, output_fh, unit='m3/s'): """ Substract a timeseries from a set of raster files. Store results in a csv-file. Parameters ---------- ds1_fhs : 1dnarray List containing filehandles to georeferenced raster files. ds1_dates : 1dnarray List containing datetime.date or datetime.datetime objects corresponding to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs. coordinates : tuple Tuple with the latitude and longitude, (lat, lon). output_fh : str Filehandle pointing to a csv-file. unit : str, optional String indicating the unit of the data, default is 'm3/s'. """ ds1_values = list() xpixel, ypixel = pixelcoordinates(coordinates[0], coordinates[1], ds1_fhs[0]) if np.any([np.isnan(xpixel), np.isnan(ypixel)]): print "Coordinates ({0}) not on the map".format(coordinates) else: for date in ds1_dates: ds1_values.append( becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0], nan_values=True)[ypixel, xpixel]) ds1_values = np.array(ds1_values) csv_file = open(output_fh, 'wb') writer = csv.writer(csv_file, delimiter=';') writer.writerow(['lat:', coordinates[0], 'lon:', coordinates[1], unit]) writer.writerow(['datetime', 'year', 'month', 'day', 'data']) for date in ds1_dates: year = date.year month = date.month day = date.day dt = datetime.datetime(year, month, day, 0, 0, 0) data = ds1_values[ds1_dates == date][0] writer.writerow([dt, year, month, day, data]) csv_file.close()
def lu_type_sum(data_fh, lu_fh, lu_dict, convert=None): LULC = RC.Open_tiff_array(lu_fh) in_data = becgis.OpenAsArray(data_fh, nan_values=True) # in_data = RC.Open_tiff_array(data_fh) if convert == 'mm_to_km3': AREA = becgis.MapPixelAreakm(data_fh) in_data *= AREA / 1e6 out_data = {} for lu_class in lu_dict.keys(): mask = [LULC == value for value in lu_dict[lu_class]] mask = (np.sum(mask, axis=0)).astype(bool) out_data[lu_class] = np.nansum(in_data[mask]) return out_data
def fuel_wood(output_folder, lu_fh, ndm_fhs, fraction_fhs, ndmdates): """ Calculate natural livestock feed production INPUTS ---------- lu_fh : str filehandle for land use map ndm_fhs: nd array array of filehandles of NDM maps abv_grnd_biomass_ratio: dict dictionnary 'LULC':[above ground biomass] """ Data_Path_Fuel = "Fuel" out_folder = os.path.join(output_folder, Data_Path_Fuel) if not os.path.exists(out_folder): os.mkdir(out_folder) area_ha = becgis.MapPixelAreakm(lu_fh) * 100 LULC = RC.Open_tiff_array(lu_fh) geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh) fuel_classes = [1, 8, 9, 10, 11, 12, 13] fuel_mask = np.zeros(LULC.shape) for fc in fuel_classes: fuel_mask[np.where(LULC == fc)] = 1 fuel_fhs_landscape = [] fuel_fhs_incremental = [] for d in range(len(ndm_fhs)): ndm_fh = ndm_fhs[d] fraction_fh = fraction_fhs[d] yield_fract = RC.Open_tiff_array(fraction_fh) date1 = ndmdates[d] year = '%d' %date1.year month = '%02d' %date1.month # year = ndm_fh[-14:-10] # month = ndm_fh[-9:-7] out_fh_l = out_folder+'\\fuel_prod_landscape_%s_%s.tif' %(year, month) out_fh_i = out_folder+'\\fuel_prod_incremental_%s_%s.tif' %(year, month) NDM = becgis.OpenAsArray(ndm_fh, nan_values=True) NDM_fuel_incremental = NDM * .05 * fuel_mask * yield_fract * area_ha/1e6 NDM_fuel_landscape = NDM * .05 * fuel_mask *(1-yield_fract) * area_ha/1e6 DC.Save_as_tiff(out_fh_i, NDM_fuel_incremental, geo_out) DC.Save_as_tiff(out_fh_l, NDM_fuel_landscape, geo_out) fuel_fhs_landscape.append(out_fh_l) fuel_fhs_incremental.append(out_fh_i) return fuel_fhs_landscape, fuel_fhs_incremental
def recycle(output_folder, et_bg_fhs, recy_ratio, lu_fh, et_type): Data_Path_rec = "temp_et_recycle" out_folder = os.path.join(output_folder, Data_Path_rec) geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh) if not os.path.exists(out_folder): os.mkdir(out_folder) recycle_fhs = [] for et_fh in et_bg_fhs: out_fh = out_folder + "\\recycled_et_"+et_type+et_fh[-11:-4]+".tif" et = becgis.OpenAsArray(et_fh, nan_values=True) et_recy = et*recy_ratio DC.Save_as_tiff(out_fh, et_recy, geo_out) recycle_fhs.append(out_fh) return recycle_fhs
def correct_var(metadata, complete_data, output_dir, formula, new_var, slope=False, bounds=(0, [1.0, 1., 12.])): var = split_form(formula)[0][-1] a, x0 = calc_var_correction(metadata, complete_data, output_dir, formula=formula, slope=slope, plot=True, bounds=bounds) for date, fn in zip(complete_data[var][1], complete_data[var][0]): geo_info = becgis.GetGeoInfo(fn) data = becgis.OpenAsArray(fn, nan_values=True) x = calc_delta_months(x0, date) fraction = a[0] * (np.cos( (x - a[2]) * (np.pi / 6)) * 0.5 + 0.5) + (a[1] * (1 - a[0])) data *= fraction folder = os.path.join(output_dir, metadata['name'], 'data', new_var) if not os.path.exists(folder): os.makedirs(folder) bla = os.path.split(fn)[1].split('_')[-1] filen = 'supply_sw_' + bla[0:4] + '_' + bla[4:6] + '.tif' fn = os.path.join(folder, filen) becgis.CreateGeoTiff(fn, data, *geo_info) meta = becgis.SortFiles(folder, [-11, -7], month_position=[-6, -4])[0:2] return a, meta
def calc_non_utilizable(P, ET, fractions_fh): """ Calculate non utilizable outflow. Parameters ---------- P : ndarray Array with the volumes of precipitation per pixel. ET : ndarray Array with the volumes of evapotranspiration per pixel. fractions_fh : str Filehandle pointing to a map with fractions indicating how much of the (P-ET) difference is non-utilizable. Returns ------- non_utilizable_runoff : float The total volume of non_utilizable runoff. """ fractions = becgis.OpenAsArray(fractions_fh, nan_values = True) non_utilizable_runoff = np.nansum((P - ET) * fractions) return non_utilizable_runoff
def calc_sheet1(entries, lu_fh, sheet1_lucs, recycling_ratio, q_outflow, q_out_avg, output_folder, q_in_sw, q_in_gw = 0., q_in_desal = 0., q_out_sw = 0., q_out_gw = 0.): """ Calculate the required values to plot Water Accounting Plus Sheet 1. Parameters ---------- entries : dict Dictionary with several filehandles, also see examples below. lu_fh : str Filehandle pointing to the landuse map. sheet1_lucs : dict Dictionary sorting different landuse classes into categories. recycling_ratio : float Value indicating the recycling ratio. q_outflow : float The outflow of the basin. q_out_avg : float The longterm average outflow. output_folder : str Folder to store results. q_in_sw : float, optional Surfacewater inflow into the basin. Default is 0.0. q_in_gw : float, optional Groundwater inflow into the basin. Default is 0.0. q_in_desal : float, optional Desalinised water inflow into the basin. Default is 0.0. q_out_sw : float, optional Additional surfacewater outflow from basin. Default is 0.0. q_out_gw : float, optional Groundwater outflow from the basin. Default is 0.0. Returns ------- results : dict Dictionary containing necessary variables for Sheet 1. """ results = dict() LULC = becgis.OpenAsArray(lu_fh, nan_values = True) P = becgis.OpenAsArray(entries['P'], nan_values = True) ETgreen = becgis.OpenAsArray(entries['ETgreen'], nan_values = True) ETblue = becgis.OpenAsArray(entries['ETblue'], nan_values = True) pixel_area = becgis.MapPixelAreakm(lu_fh) gray_water_fraction = becgis.calc_basinmean(entries['WPL'], lu_fh) ewr_percentage = becgis.calc_basinmean(entries['EWR'], lu_fh) P[np.isnan(LULC)] = ETgreen[np.isnan(LULC)] = ETblue[np.isnan(LULC)] = np.nan P, ETgreen, ETblue = np.array([P, ETgreen, ETblue]) * 0.000001 * pixel_area ET = np.nansum([ETblue, ETgreen], axis = 0) results['et_advection'], results['p_advection'], results['p_recycled'], results['dS'] = calc_wb(P, ET, q_outflow, recycling_ratio, q_in_sw = q_in_sw, q_in_gw = q_in_gw, q_in_desal = q_in_desal, q_out_sw = q_out_sw, q_out_gw = q_out_gw) results['non_recoverable'] = gray_water_fraction * (q_outflow + q_out_sw) # Mekonnen and Hoekstra (2015), Global Gray Water Footprint and Water Pollution Levels Related to Anthropogenic Nitrogen Loads to Fresh Water results['reserved_outflow_demand'] = q_out_avg * ewr_percentage results['other'] = 0.0 landscape_et = calc_ETs(ETgreen, lu_fh, sheet1_lucs) incremental_et = calc_ETs(ETblue, lu_fh, sheet1_lucs) results['manmade'] = incremental_et['Managed'] results['natural'] = incremental_et['Modified'] + incremental_et['Protected'] + incremental_et['Utilized'] other_fractions = {'Modified': 0.00, 'Managed': 1.00, 'Protected':0.00, 'Utilized': 0.00} non_recoverable_fractions = {'Modified': 0.00, 'Managed': 1.00, 'Protected':0.00, 'Utilized': 0.00} results['uf_plu'], results['uf_ulu'], results['uf_mlu'], results['uf_mwu'] = calc_utilizedflow(incremental_et, results['other'], results['non_recoverable'], other_fractions, non_recoverable_fractions) net_inflow = results['p_recycled'] + results['p_advection'] + q_in_sw + q_in_gw + q_in_desal + results['dS'] consumed_water = np.nansum(landscape_et.values()) + np.nansum(incremental_et.values()) + results['other'] + results['non_recoverable'] non_consumed_water = net_inflow - consumed_water results['non_utilizable_outflow'] = min(non_consumed_water, max(0.0, calc_non_utilizable(P, ET, entries['Fractions']))) results['reserved_outflow_actual'] = min(non_consumed_water - results['non_utilizable_outflow'], results['reserved_outflow_demand']) results['utilizable_outflow'] = max(0.0, non_consumed_water - results['non_utilizable_outflow'] - results['reserved_outflow_actual']) results['landscape_et_mwu'] = landscape_et['Managed'] results['landscape_et_mlu'] = landscape_et['Modified'] results['landscape_et_ulu'] = landscape_et['Utilized'] results['landscape_et_plu'] = landscape_et['Protected'] results['q_outflow'] = q_outflow results['q_in_sw'] = q_in_sw results['q_in_gw'] = q_in_gw results['q_in_desal'] = q_in_desal results['q_out_sw'] = q_out_sw results['q_out_gw'] = q_out_gw return results
def compare_rasters2rasters_per_lu(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, lu_fh, output_dir, dataset_names=["DS1", "DS2"], class_dictionary=None, no_of_classes=6): """ Compare two raster datasets with eachother per different landuse categories. Parameters ---------- ds1_fhs : ndarray Array with strings pointing to maps of dataset 1. ds1_dates : ndarray Array with same shape as ds1_fhs, containing datetime.date objects. ds2_fhs : ndarray Array with strings pointing to maps of dataset 2. ds2_dates : ndarray Array with same shape as ds2_fhs, containing datetime.date objects. lu_fh : str Pointer to a landusemap. output_dir : str Map to save results. dataset_names : list, optional List with two strings describing the names of the two datasets. class_dictionary : dict Dictionary specifying all the landuse categories. no_of_classes : int The 'no_of_classes' most dominant classes in the the lu_fh are compared, the rest is ignored. """ LUCS = becgis.OpenAsArray(lu_fh, nan_values=True) DS1 = becgis.OpenAsArray(ds1_fhs[0], nan_values=True) DS2 = becgis.OpenAsArray(ds2_fhs[0], nan_values=True) DS1[np.isnan(DS2)] = np.nan LUCS[np.isnan(DS1)] = np.nan classes, counts = np.unique(LUCS[~np.isnan(LUCS)], return_counts=True) counts_sorted = np.sort(counts)[-no_of_classes:] selected_lucs = [ classes[counts == counter][0] for counter in counts_sorted ] driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(lu_fh) becgis.CreateGeoTiff(lu_fh.replace('.tif', '_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection) common_dates = becgis.CommonDates([ds1_dates, ds2_dates]) ds1_totals = np.array([]) ds2_totals = np.array([]) DS1_per_class = dict() DS2_per_class = dict() for date in common_dates: DS1 = becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0], nan_values=True) DS2 = becgis.OpenAsArray(ds2_fhs[ds2_dates == date][0], nan_values=True) for clss in selected_lucs: if clss in DS1_per_class.keys(): DS1_per_class[clss] = np.append(DS1_per_class[clss], np.nanmean(DS1[LUCS == clss])) else: DS1_per_class[clss] = np.array([np.nanmean(DS1[LUCS == clss])]) if clss in DS2_per_class.keys(): DS2_per_class[clss] = np.append(DS2_per_class[clss], np.nanmean(DS2[LUCS == clss])) else: DS2_per_class[clss] = np.array([np.nanmean(DS2[LUCS == clss])]) ds1_totals = np.append(ds1_totals, np.nanmean(DS1)) ds2_totals = np.append(ds2_totals, np.nanmean(DS2)) print("Finished {0}, going to {1}".format(date, common_dates[-1])) for clss in selected_lucs: if class_dictionary is None: plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], clss, output_dir) else: cats = {v[0]: k for k, v in class_dictionary.iteritems()} plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], cats[clss], output_dir) plot_scatter_series(ds1_totals, ds2_totals, dataset_names[0], dataset_names[1], "Total Area", output_dir) if class_dictionary is not None: output_fh = os.path.join(output_dir, 'landuse_percentages.png') driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(lu_fh) becgis.CreateGeoTiff(lu_fh.replace('.tif', '_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection) becgis.plot_category_areas(lu_fh.replace('.tif', '_.tif'), class_dictionary, output_fh, area_treshold=0.01) os.remove(lu_fh.replace('.tif', '_.tif'))
def compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir=None, dataset_names=None, data_treshold=0.75): """ Compare two series of raster maps by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient per pixel. Parameters ---------- ds1_fhs : list list pointing to georeferenced raster files of dataset 1. ds1_dates : list list corresponding to ds1_fhs specifying the dates. ds2_fhs : list list pointing to georeferenced raster files of dataset 2. ds2_dates : list list corresponding to ds2_fhs specifying the dates. quantity_unit : list, optional list of two strings describing the quantity and unit of the data. e.g. ['Precipitation', 'mm/month']. dataset_names : list, optional list of strings describing the names of the datasets. e.g. ['CHIRPS', 'ERA-I']. output_dir : list, optional directory to store some results, i.e. (1) a graph of the spatially averaged datasets trough time and the bias and (2) 4 geotiffs showing the bias, nash-sutcliffe coefficient, pearson coefficient and rmae per pixel. data_treshold : float, optional pixels with less than data_treshold * total_number_of_samples actual values are set to no-data, i.e. pixels with too few data points are ignored. Returns ------- results : dict dictionary with four keys (relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe) with 2dnarrays of the values per pixel. Examples -------- >>> results = compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = r"C:/Desktop/", quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "TRMM"]) """ becgis.AssertProjResNDV([ds1_fhs, ds2_fhs]) if dataset_names is None: dataset_names = ['DS1', 'DS2'] driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(ds1_fhs[0]) common_dates = becgis.CommonDates([ds1_dates, ds2_dates]) diff_sum = np.zeros((ysize, xsize)) non_nans = np.zeros((ysize, xsize)) progress = 0 samples = len(common_dates) for date in common_dates: DS1 = becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0], nan_values=True) DS2 = becgis.OpenAsArray(ds2_fhs[ds2_dates == date][0], nan_values=True) DS1[np.isnan(DS2)] = np.nan DS2[np.isnan(DS1)] = np.nan non_nans[~np.isnan(DS1)] += np.ones((ysize, xsize))[~np.isnan(DS1)] diff = (DS1 - DS2)**2 diff_sum[~np.isnan(DS1)] += diff[~np.isnan(DS1)] progress += 1 print "progress: {0} of {1} finished".format(progress, samples) diff_sum[non_nans <= data_treshold * samples] = np.nan results = dict() results['rmse'] = np.where(non_nans == 0., np.nan, np.sqrt(diff_sum / non_nans)) startdate = common_dates[0].strftime('%Y%m%d') enddate = common_dates[-1].strftime('%Y%m%d') path = os.path.join(output_dir, 'spatial_errors') if not os.path.exists(path): os.makedirs(path) if output_dir is not None: for varname in results.keys(): fh = os.path.join( path, '{0}_{1}_vs_{2}_{3}_{4}.tif'.format(varname, dataset_names[0], dataset_names[1], startdate, enddate)) becgis.CreateGeoTiff(fh, results[varname], driver, NDV, xsize, ysize, GeoT, Projection) return results
def compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir, station_names=None, quantity_unit=None, dataset_names=None, method='cubic', min_records=1): """ Compare a series of raster maps with station time series by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient for each station. Parameters ---------- ds1_fhs : 1dnarray List containing filehandles to georeferenced raster files. ds1_dates : 1dnarray List containing datetime.date or datetime.datetime objects corresponding to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs. station_dict : dictionary Dictionary containing coordinates of stations and timeseries. See examples below for an example output_dir : str, optional Directory to store several results, i.e. (1) a csv file to load in a GIS program, (2) interpolated maps showing the various error indicators spatially and (3) scatter plots for all the stations. station_names : dictionary, optional Dictionary containing names of the respective stations which can be added to the csv-file, see Examples for more information. quantity_unit : list, optional List of two strings describing the quantity and unit of the data. dataset_name : list, optional List of strings describing the names of the datasets. method : str, optional Method used for interpolation of the error-indicators, i.e.: 'linear', 'nearest' or 'cubic' (default). Returns ------- results : dictionary Dictionary containing several error indicators per station. Examples -------- >>> station_dict = {(lat1, lon1): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], (lat2, lon2): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], etc.} >>> station_names = {(lat1,lon1): 'stationname1', (lat2,lon2): 'stationname2', etc.} >>> results = compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir = r"C:/Desktop", station_names = None, quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "Meteo Stations"], method = 'cubic') """ results = dict() pixel_coordinates = list() if dataset_names is None: dataset_names = ['Spatial', 'Station'] if quantity_unit is not None: quantity_unit[1] = r'[' + quantity_unit[1] + r']' else: quantity_unit = ['data', ''] becgis.AssertProjResNDV([ds1_fhs]) no_of_stations = len(station_dict.keys()) ds1_dates = becgis.ConvertDatetimeDate(ds1_dates, out='datetime') for i, station in enumerate(station_dict.keys()): station_dates, station_values = unzip(station_dict[station]) common_dates = becgis.CommonDates([ds1_dates, station_dates]) sample_size = common_dates.size if sample_size >= min_records: ds1_values = list() xpixel, ypixel = pixelcoordinates(station[0], station[1], ds1_fhs[0]) if np.any([np.isnan(xpixel), np.isnan(ypixel)]): print "Skipping station ({0}), cause its not on the map".format( station) continue else: for date in common_dates: ds1_values.append( becgis.OpenAsArray(ds1_fhs[ds1_dates == date][0], nan_values=True)[ypixel, xpixel]) common_station_values = [ station_values[station_dates == date][0] for date in common_dates ] results[station] = pairwise_validation(ds1_values, common_station_values) results[station] += (sample_size, ) pixel_coordinates.append((xpixel, ypixel)) #m, b = np.polyfit(ds1_values, common_station_values, 1) path_scatter = os.path.join(output_dir, 'scatter_plots') if not os.path.exists(path_scatter): os.makedirs(path_scatter) path_ts = os.path.join(output_dir, 'time_series') if not os.path.exists(path_ts): os.makedirs(path_ts) path_int = os.path.join(output_dir, 'interp_errors') if not os.path.exists(path_int): os.makedirs(path_int) xlabel = '{0} {1} {2}'.format(dataset_names[0], quantity_unit[0], quantity_unit[1]) ylabel = '{0} {1} {2}'.format(dataset_names[1], quantity_unit[0], quantity_unit[1]) if station_names is not None: title = station_names[station] fn = os.path.join( path_scatter, '{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) fnts = os.path.join( path_ts, '{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) else: title = station fn = os.path.join( path_scatter, '{0}_vs_station_{1}.png'.format(dataset_names[0], i)) fnts = os.path.join( path_ts, '{0}_vs_station_{1}.png'.format(dataset_names[0], i)) suptitle = 'pearson: {0:.5f}, rmse: {1:.5f}, ns: {2:.5f}, bias: {3:.5f}, n: {4:.0f}'.format( results[station][0], results[station][1], results[station][2], results[station][3], results[station][4]) plot_scatter_series(ds1_values, common_station_values, xlabel, ylabel, title, fn, suptitle=suptitle, dates=common_dates) xaxis_label = '{0} {1}'.format(quantity_unit[0], quantity_unit[1]) xlabel = '{0}'.format(dataset_names[0]) ylabel = '{0}'.format(dataset_names[1]) plot_time_series(ds1_values, common_station_values, common_dates, xlabel, ylabel, xaxis_label, title, fnts, suptitle=suptitle) print "station {0} ({3}) of {1} finished ({2} matching records)".format( i + 1, no_of_stations, sample_size, title) else: print "____station {0} of {1} skipped____ (less than {2} matching records)".format( i + 1, no_of_stations, min_records) continue n = len(results) csv_filename = os.path.join( output_dir, '{0}stations_vs_{1}_indicators.csv'.format(n, dataset_names[0])) with open(csv_filename, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter=';') writer.writerow([ 'longitude', 'latitude', 'station_id', 'pearson', 'rmse', 'nash_sutcliffe', 'bias', 'no_of_samples' ]) for station in results.keys(): writer.writerow([ station[1], station[0], station_names[station], results[station][0], results[station][1], results[station][2], results[station][3], results[station][4] ]) rslt = { 'Relative Bias': list(), 'RMSE': list(), 'Pearson Coefficient': list(), 'Nash-Sutcliffe Coefficient': list(), 'Number Of Samples': list() } for value in results.values(): rslt['Relative Bias'].append(value[3]) rslt['RMSE'].append(value[1]) rslt['Pearson Coefficient'].append(value[0]) rslt['Nash-Sutcliffe Coefficient'].append(value[2]) rslt['Number Of Samples'].append(value[4]) for key, value in rslt.items(): title = '{0}'.format(key) print title if key is 'RMSE': xlabel = '{0} [mm/month]'.format(key) else: xlabel = key value = np.array(value) value = value[(~np.isnan(value)) & (~np.isinf(value))] suptitle = 'mean: {0:.5f}, std: {1:.5f}, n: {2}'.format( np.nanmean(value), np.nanstd(value), n) print value plot_histogram(value[(~np.isnan(value)) & (~np.isinf(value))], title, xlabel, output_dir, suptitle=suptitle) driver, NDV, xsize, ysize, GeoT, Projection = becgis.GetGeoInfo(ds1_fhs[0]) dummy_map = becgis.OpenAsArray(ds1_fhs[0]) grid = np.mgrid[0:ysize, 0:xsize] var_names = ['pearson', 'rmse', 'ns', 'bias', 'no_of_samples'] for i, var in enumerate(unzip(results.values())): xy = np.array(pixel_coordinates)[~np.isnan(var)] z = var[~np.isnan(var)] interpolation_field = interpolate.griddata(xy, z, (grid[1], grid[0]), method=method, fill_value=np.nanmean(z)) interpolation_field[dummy_map == NDV] = NDV fh = os.path.join( path_int, '{0}_{1}stations_vs_{2}.tif'.format(var_names[i], len(xy), dataset_names[0])) becgis.CreateGeoTiff(fh, interpolation_field, driver, NDV, xsize, ysize, GeoT, Projection) return results