def visualize_data(p): df_land = pd.read_csv( p.baseline_regression_data_path ) ## TODO: If we load data, this reads csv twive (time consuming) optimize? match_af = hb.ArrayFrame(p.country_ids_raster_path) zeros_array = np.zeros(match_af.size) zeros_df = pd.DataFrame(zeros_array) full_df = pd.merge(zeros_df, df_land, left_index=True, right_on='pixel_id', how='outer') #plot_col(agg_df, 'lat') #plot_col(agg_df, 'lon') plot_col(full_df, 'lat') plot_col(full_df, 'lon') # plot_col(p.full_df, 'log_gdp_per_capita') # plot_col(p.full_df, 'climate_zones') # plot_col(p.full_df, 'log_precip') # plot_col(p.full_df, 'log_altitude') # plot_col(p.full_df, 'log_gdp') # plot_col(p.full_df, 'log_min_to_market') plot_col(p.full_df, 'slope') plot_col(p.full_df, 'lon_sin') plot_col(p.full_df, 'lat_sin') plot_col(p.full_df, 'lat_sin')
def export_raster(df,col_name,savefig,full_df_return=False): '''export_as_tif''' #Make a zeros_df of length 9331200 match_af = hb.ArrayFrame(match_raster) zeros_array = np.zeros(match_af.size) zeros_df = pd.DataFrame(zeros_array) DF = df[col_name].reset_index() ### Merge with zeros_df to include non-ag pixels full_df = pd.merge(zeros_df, DF, left_index=True, right_on='pixel_id', how='outer') values = full_df[col_name].as_matrix().reshape((2160, 4320)).astype(np.float32) ### to do transform df to array to raster target_path = savefig x_pixels = 4320 # = match.RasterXSize y_pixels = 2160 # = match.RasterYSize driver = gdal.GetDriverByName('GTiff') output = driver.Create(target_path,x_pixels, y_pixels, 1 ,gdal.GDT_Float32) output.GetRasterBand(1).WriteArray(values) match = gdal.Open(match_raster) proj = match.GetProjection() geotrans = match.GetGeoTransform() output.SetGeoTransform(geotrans) output.SetProjection(proj) output.FlushCache() #output.GetRasterBand(1).SetNoDataValue(np.nan) output=None print('Exported raster at '+savefig)
def test_arrayframe_add(self): temp_path = hb.temp('.tif', 'testing_arrayframe_add', True) hb.add(self.global_1deg_raster_path, self.global_1deg_raster_path, temp_path) temp_path = hb.temp('.tif', 'testing_arrayframe_add', True) af1 = hb.ArrayFrame(self.global_1deg_raster_path) hb.add(af1, af1, temp_path)
def visualize_data(df,col_name,savefig=False,colorscheme='diverging', vminmax=False,savecmap=False, shape=(2160,4320),title=None,resize=False): fig,axes = plt.subplots(1, 1, figsize=(20,15)) # -- Prepare data -- #Make a zeros_df of length 9331200 match_af = hb.ArrayFrame(match_raster) zeros_array = np.zeros(match_af.size) zeros_df = pd.DataFrame(zeros_array) DF = df[col_name].reset_index() ### Merge with zeros_df to include non-ag pixels full_df = pd.merge(zeros_df, DF, left_index=True, right_on='pixel_id', how='outer') ## -- Plot column -- #Colorscale if vminmax==False: serie = df[col_name] vmax = serie.max() vmin = serie.min() else: vmin = vminmax[0] vmax = vminmax[1] if colorscheme == 'diverging': raw_cmap = plt.get_cmap('PiYG') cmap = customColorMap(raw_cmap, vmin, vmax, resize) elif colorscheme == 'sequential': raw_cmap = plt.get_cmap('inferno_r') #alternatively 'magma' cmap = customColorMap(raw_cmap, vmin, vmax, resize) #else: #cmap = replicateColorMap(colorscheme,vmin=14.022869333967861,vmax=19.24083736317894) #Plot data data = np.array(full_df[col_name]) bm = Basemap() im = bm.imshow(np.flipud(data.reshape(shape)),cmap=cmap) bm.drawcoastlines(linewidth=0.15, color='0.1') cbar = plt.colorbar(im, orientation='vertical',fraction=0.0234, pad=0.04) if title == None: plt.title(col_name) else: plt.title(title) plt.show() if savefig != False: fig.savefig(savefig) if savecmap == True: return (vmin, vmax), resize
def create_land_mask(): countries_af = hb.ArrayFrame( '../ipbes_invest_crop_yield_project/input/Cartographic/country_ids.tif' ) df = convert_af_to_1d_df(countries_af) df['land_mask'] = df[0].apply(lambda x: 1 if x > 0 else 0) df = df.drop(0, axis=1) return df
def input_flex_as_af(intput_af_or_path): if isinstance(intput_af_or_path, str): af = hb.ArrayFrame(intput_af_or_path) elif isinstance(intput_af_or_path, hb.ArrayFrame): af = intput_af_or_path else: raise NameError('input_flex_as_af unable to interpret intput_af_or_path of ' + str(intput_af_or_path)) return af
def visualize_two_maps(serie1, serie2, savefig=False,colorscheme='diverging', resize=False, shape=(2160,4320)): fig, axes = plt.subplots(2, 1, figsize=(20,15)) # Define global vmin and vmax vmax = max(serie1.max(),serie2.max()) vmin = min(serie1.min(),serie2.min()) # -- Prepare data -- #Make a zeros_df of length 9331200 match_af = hb.ArrayFrame(match_raster) zeros_array = np.zeros(match_af.size) zeros_df = pd.DataFrame(zeros_array) ### Merge with zeros_df to include non-ag pixels full_df1 = pd.merge(zeros_df, serie1.reset_index(), left_index=True, right_on='pixel_id', how='outer') full_df2 = pd.merge(zeros_df, serie2.reset_index(), left_index=True, right_on='pixel_id', how='outer') ## -- Plot columns -- #Colorscale if colorscheme == 'diverging': raw_cmap = plt.get_cmap('PiYG') cmap = customColorMap_v(raw_cmap, vmin, vmax,#serie1, serie2, serie3, ### Cleaner option: woudl take vmin, vmax as args instead of series resize=resize) elif colorscheme == 'sequential': raw_cmap = plt.get_cmap('inferno_r') #alternatively 'magma' cmap = customColorMap_v(raw_cmap, vmin, vmax,#serie1, serie2, serie3, resize=resize) else: print('Wrong colorscheme') #Plot data data = np.array(full_df1[full_df1.columns[-1]]) bm = Basemap(ax=axes[0]) im = bm.imshow(np.flipud(data.reshape(shape)),cmap=cmap,vmin=vmin,vmax=vmax) bm.drawcoastlines(linewidth=0.15, color='0.1') axes[0].set_title(serie1.name) data = np.array(full_df2[full_df2.columns[-1]]) bm = Basemap(ax=axes[1]) im = bm.imshow(np.flipud(data.reshape(shape)),cmap=cmap,vmin=vmin,vmax=vmax) bm.drawcoastlines(linewidth=0.15, color='0.1') axes[1].set_title(serie2.name) #cbar = plt.colorbar(im, orientation='vertical',fraction=0.0234, pad=0.04) fig.colorbar(im, ax=axes.ravel().tolist()) #Or: #cax,kw = mpl.colorbar.make_axes([ax for ax in axes.flat]) #plt.colorbar(im, cax=cax, **kw) if savefig != False: fig.savefig(savefig,dpi=300)
def add_crop_layers_from_dir(input_dir): crop_layer_names = [ "c4per ^ area_fraction ^ C4 perennial crops.tif", "c4ann ^ area_fraction ^ C4 annual crops.tif", "c3per ^ area_fraction ^ C3 perennial crops.tif", "c3nfx ^ area_fraction ^ C3 nitrogen-fixing crops.tif", "c3ann ^ area_fraction ^ C3 annual crops.tif", ] uris_to_combine = [os.path.join(input_dir, i) for i in crop_layer_names] print('uris_to_combine', uris_to_combine) match_af = hb.ArrayFrame(uris_to_combine[0]) proportion_cultivated = np.zeros(match_af.shape) mask = np.where((match_af.data >= 0.0) & (match_af.data <= 1.0)) for uri in uris_to_combine: proportion_cultivated[mask] += hb.ArrayFrame(uri).data[mask] return proportion_cultivated
def add_with_valid_mask(a_path, b_path, output_path, valid_mask_path, ndv): def op(a, b, valid_mask): return np.where(valid_mask == 1, a + b, ndv) hb.raster_calculator_flex([a_path, b_path, valid_mask_path], op, output_path, ndv=ndv) return hb.ArrayFrame(output_path)
def add_smart(a, b, a_valid_mask, b_valid_mask, output_ndv, output_path): def op(a, b, a_valid_mask, b_valid_mask, output_ndv): return np.where((a_valid_mask == 1 & b_valid_mask == 1), a + b, output_ndv) hb.raster_calculator_flex([a, b, a.valid_mask, b.valid_mask], op, output_path, ndv=output_ndv) return hb.ArrayFrame(output_path)
def rasters_to_tabular_csv(rasters_paths, csv_name, latlon=False, col_names=None): # Create tabular data rasters_names = [] dfs_list = [] match_af = hb.ArrayFrame(rasters_paths[0]) for path in rasters_paths: af = hb.ArrayFrame(path) df = convert_af_to_1d_df(af) dfs_list.append(df) name = hb.explode_path(path)['file_root_no_suffix'] rasters_names.append(name) if col_names == None: col_names = rasters_names df = concatenate_dfs_horizontally(dfs_list, col_names) # Remove NaNs # Or don't ? # Get rid of the oceans cells df['pixel_id'] = df.index #df['pixel_id_float'] = df['pixel_id'].astype('float') land_mask = create_land_mask() df = df.merge(land_mask, right_index=True, left_on='pixel_id') df_land = df[df['land_mask'] == 1] df_land = df_land.dropna() if latlon == True: df_land['lon'] = round( (((df['pixel_id'] % 4320.) / 4320 - .5) * 360.0), 2) df_land['lat'] = round( (((df['pixel_id'] / 4320.).round() / 2160 - .5) * 180.), 2) dfland = df_land.set_index('pixel_id') print('Writing csv ' + csv_name) df_land.to_csv('../Data/intermediate/' + csv_name + '.csv')
def parse_input_flex(input_flex): if isinstance(input_flex, str): output = hb.ArrayFrame(input_flex) elif isinstance(input_flex, np.ndarray): print( 'parse_input_flex is NYI for arrays because i first need to figure out how to have an af without georeferencing.' ) # output = hb.create_af_from_array(input_flex) else: output = input_flex return output
def resample_lulc(p): if p.tasks['resample_lulc']: match_af = hb.ArrayFrame(p.base_data_ha_per_cell_path) match_r_path = p.match_r_path hb.reproject_to_cylindrical(match_af.uri, match_r_path) # hb.reproject_to_epsg(match_af.uri, match_r_path, 54012) match_r_af = hb.ArrayFrame(match_r_path) for scenario in p.scenario_names: for year in p.years: read_dir = os.path.join(p.task_dirs['extract_lulc'], scenario, str(year)) write_dir = os.path.join(p.resample_lulc_dir, scenario, str(year)) hb.create_dirs(write_dir) for filename in hb.list_files_in_dir_recursively(read_dir, filter_extensions=['.tif']): input_path = os.path.join(read_dir, filename) output_path = os.path.join(write_dir, os.path.basename(filename) + '.tif') print('input output', input_path, output_path) hb.align_dataset_to_match(input_path, match_r_path, output_path) else: pass
def arrayframe_load_and_save(): input_array = np.arange(0, 18, 1).reshape((3, 6)) input_uri = hb.temp('.tif', remove_at_exit=False) geotransform = hb.calc_cylindrical_geotransform_from_array(input_array) # projection = hb.get_wkt_from_epsg_code(hb.common_epsg_codes_by_name['plate_carree']) projection = 'plate_carree' hb.save_array_as_geotiff(input_array, input_uri, geotransform_override=geotransform, projection_override=projection) hb.ArrayFrame(input_uri)
def test_arrayframe_load_and_save(self): input_array = np.arange(0, 18, 1).reshape((3, 6)) input_uri = hb.temp('.tif', remove_at_exit=True) geotransform = hb.calc_cylindrical_geotransform_from_array(input_array) # projection = hb.get_wkt_from_epsg_code(hb.common_epsg_codes_by_name['plate_carree']) projection = 'wgs84' ndv = 255 data_type = 1 hb.save_array_as_geotiff(input_array, input_uri, geotransform_override=geotransform, projection_override=projection, ndv=ndv, data_type=data_type) hb.ArrayFrame(input_uri)
def raster_calculator_af_flex( input_, op, output_path, **kwargs ): #KWARGS: datatype=None, ndv=None, gtiff_creation_options=None, compress=False, add_overviews=False """KWARGS: datatype=None, ndv=None, gtiff_creation_options=None, compress=False, add_overviews=False In HB, a flex input is one of [string that points to a file, an array frame, or a suitabily formatted list of the above""" print('input_', input_) # If input is a string, put it into a list if isinstance(input_, str): input_ = [input_] elif isinstance(input_, hb.ArrayFrame): input_ = input_.path final_input = [''] * len(input_) for c, i in enumerate(input_): print('c,i', c, i) if isinstance(i, hb.ArrayFrame): final_input[c] = i.path else: final_input[c] = i input_ = final_input # Determine size of inputs if isinstance(input_, str) or isinstance(input_, hb.ArrayFrame): input_size = 1 elif isinstance(input_, list): input_size = len(input_) else: raise NameError( 'input_ given to raster_calculator_af_flex() not understood. Give a path or list of paths.' ) # # Check that files exist. # for i in input_: # if not os.path.exists(i): # raise FileNotFoundError(str(input_) + ' not found by raster_calculator_af_flex()') # Verify datatypes datatype = kwargs.get('datatype', None) if not datatype: print('input_', input_) datatypes = [ hb.get_datatype_from_uri(i) for i in input_ if type(i) is not float ] print('datatypes', datatypes) if len(set(datatypes)) > 1: L.info( 'Rasters given to raster_calculator_af_flex() were not all of the same type. Defaulting to using first input datatype.' ) datatype = datatypes[0] # Check NDVs. ndv = kwargs.get('ndv', None) if not ndv: ndvs = [ hb.get_ndv_from_path(i) for i in input_ if type(i) is not float ] if len(set(ndvs)) > 1: L.info( 'NDVs used in rasters given to raster_calculator_af_flex() were not all the same. Defaulting to using first value.' ) ndv = ndvs[0] gtiff_creation_options = kwargs.get('gtiff_creation_options', None) if not gtiff_creation_options: gtiff_creation_options = ['TILED=YES', 'BIGTIFF=IF_SAFER'] #, 'COMPRESS=lzw'] compress = kwargs.get('compress', None) if compress: gtiff_creation_options.append('COMPRESS=deflate') # Build tuples to match the required format of raster_calculator. if input_size == 1: if isinstance(input_[0], str): input_tuples_list = [(input_[0], 1)] else: input_tuples_list = [(input_[0].path, 1)] else: if isinstance(input_[0], str): input_tuples_list = [(i, 1) for i in input_] else: input_tuples_list = [(i.path, 1) for i in input_] for c, i in enumerate(input_tuples_list): if type(i[0]) is float: input_tuples_list[c] = (i[0], 'raw') # # Check that the op matches the number of rasters. # if len(inspect.signature(op).parameters) != input_size: # raise NameError('op given to raster_calculator_af_flex() did not have the same number of parameters as the number of rasters given.') print('input_tuples_list', input_tuples_list) hb.raster_calculator_hb(input_tuples_list, op, output_path, datatype, ndv, gtiff_creation_options=gtiff_creation_options) if kwargs.get('add_overviews'): hb.add_overviews_to_path(output_path) output_af = hb.ArrayFrame(output_path) return output_af
def reproject_align(): input_path = "wgs84_026deg_-9999ndv.tif" af = hb.ArrayFrame(input_path) print(af)
def load_data(p): if p.run_this: crop_types_df = pd.read_csv(p.aggregated_crop_data_csv_path) df_land = pd.read_csv(p.baseline_regression_data_path) df = df_land.merge(crop_types_df, how='outer', on='pixel_id') if p.subset == True: df = df.sample(frac=0.02, replace=False, weights=None, random_state=None, axis=0) elif p.subset == False: #Save validation data x = df.drop(['calories_per_ha'], axis=1) y = df['calories_per_ha'] X, X_validation, Y, y_validation = train_test_split(x, y) df = X.merge(pd.DataFrame(Y), how='outer', left_index=True, right_index=True) elif p.subset is None: # CAREFUL FOOL ONLY DO THIS FOR PLOTTING BECAUSE LEAKAGE pass # Remove cal_per_ha per crop type for now df = df.drop(labels=[ 'c3_annual_calories_per_ha', 'c3_perennial_calories_per_ha', 'c4_annual_calories_per_ha', 'c4_perennial_calories_per_ha', 'nitrogen_fixer_calories_per_ha' ], axis=1) # Remove helper columns (not features) df = df.drop(labels=['Unnamed: 0', 'country_ids', 'ha_per_cell_5m'], axis=1) # Rename cols df = df.rename( columns={ 'bio12': 'precip', 'bio1': 'temperature', 'minutes_to_market_5m': 'min_to_market', 'gdp_per_capita_2000_5m': 'gdp_per_capita', 'gdp_2000': 'gdp' }) # Encode Climate zones as Strings climate_zones_map = { 1: 'Af', 2: 'Am', 3: 'Aw', 5: 'BWk', 4: 'BWh', 7: 'BSk', 6: 'BSh', 14: 'Cfa', 15: 'Cfb', 16: 'Cfc', 8: 'Csa', 9: 'Csb', 10: 'Csc', 11: 'Cwa', 12: 'Cwb', 13: 'Cwc', 25: 'Dfa', 26: 'Dfb', 27: 'Dfc', 28: 'Dfd', 17: 'Dsa', 18: 'Dsb', 19: 'Dsc', 20: 'Dsd', 21: 'Dwa', 22: 'Dwb', 23: 'Dwc', 24: 'Dwd', 30: 'EF', 29: 'ET' } df['climate_zones'] = df['climate_zones'].map( climate_zones_map) # TODO Why was it commented? # Encode climate zones as dummies climate_dummies_df = pd.get_dummies(df['climate_zones']) for col in climate_dummies_df.columns: climate_dummies_df = climate_dummies_df.rename( {col: str('climatezone_' + col)}, axis=1) df = df.merge(climate_dummies_df, right_index=True, left_index=True) df = df.drop('climate_zones', axis=1) # Log some skewed variables df['calories_per_ha'] = df['calories_per_ha'].apply(lambda x: np.log(x) if x != 0 else 0) for col in [ 'gdp_per_capita', 'altitude', 'min_to_market', 'gpw_population' ]: df[str('log_' + col)] = df[col].apply(lambda x: np.log(x) if x != 0 else 0) # TODO figure out how to encode soil variables better? # Add precip_annualrange df['precip_annualrange'] = df['precip_wet_mth'] - df['precip_dry_mth'] # Lat/Lon df['sin_lon'] = df['lon'].apply(lambda x: np.sin(np.radians(x))) # Encode properly NaNs df['slope'] = df['slope'].replace({0: np.nan }) # 143 NaN in 'slope' variable for soil_var in [ 'workability_index', 'toxicity_index', 'rooting_conditions_index', 'oxygen_availability_index', 'protected_areas_index', 'nutrient_retention_index', 'nutrient_availability_index', 'excess_salts_index' ]: df[soil_var] = df[soil_var].replace({255: np.nan}) # Drop NaNs rows and cells with no ag df = df.dropna() df = df[df['calories_per_ha'] != 0] # df.set_index('pixel_id') ## TODO Why is this commented out ? p.df = df match_af = hb.ArrayFrame(p.country_ids_raster_path) zeros_array = np.zeros(match_af.size) p.full_df = pd.DataFrame(zeros_array) p.full_df = pd.merge(p.full_df, p.df, left_index=True, right_on='pixel_id', how='outer')
def add(a_flex, b_flex, output_path): def op(a, b): return a + b hb.raster_calculator_af_flex([a_flex, b_flex], op, output_path) return hb.ArrayFrame(output_path)
def create_baseline_regression_data(p): p.baseline_regression_data_path = os.path.join( p.cur_dir, 'baseline_regression_data.csv') # Iterate through input_paths adding them. Currently also fixes fertilizer nan issues. af_names_list = [] dfs_list = [] paths_to_add = [ # p.country_names_path, p.country_ids_raster_path, p.ha_per_cell_5m_path, #p.precip_path, #p.temperature_path, p.slope_path, p.altitude_path, p.workability_index_path, p.toxicity_index_path, p.rooting_conditions_index_path, # p.rainfed_land_percent_path, p.protected_areas_index_path, p.oxygen_availability_index_path, p.nutrient_retention_index_path, p.nutrient_availability_index_path, # p.irrigated_land_percent_path, p.excess_salts_index_path, # p.cultivated_land_percent_path, # p.crop_suitability_path, p.gdp_2000_path, p.gdp_gecon, p.minutes_to_market_path, p.pop_path, p.climate_zones_path, p.temp_avg_path, p.temp_diurnalrange_path, p.temp_isothermality_path, p.temp_seasonality_path, p.temp_annualmax_path, p.temp_annualmin_path, p.temp_annualrange_path, #p.temp_wettestq_path, #p.temp_dryestq_path, #p.temp_warmestq_path, #p.temp_coldestq_path, p.precip_path, p.precip_wet_mth_path, p.precip_dry_mth_path, p.precip_seasonality_path, # p.precip_wettestq_path, # p.precip_dryestq_path, # p.precip_warmestq_path, # p.precip_coldestq_path ] if p.run_this: match_af = hb.ArrayFrame(paths_to_add[0]) for path in paths_to_add: name = hb.explode_path(path)['file_root'] af = hb.ArrayFrame(path) af_names_list.append(name) df = convert_af_to_1d_df(af) dfs_list.append(df) L.info('Concatenating all dataframes.') df = concatenate_dfs_horizontally(dfs_list, af_names_list) df[df < 0] = 0.0 # Get rid of the oceans cells df['pixel_id'] = df.index df['pixel_id_float'] = df['pixel_id'].astype('float') land_mask = create_land_mask() df = df.merge(land_mask, right_index=True, left_on='pixel_id') df_land = df[df['land_mask'] == 1] df_land = df_land.dropna() df_land['lon'] = ((df['pixel_id_float'] % 4320.) / 4320 - .5) * 360.0 df_land['lat'] = ( (df['pixel_id_float'] / 4320.).round() / 2160 - .5) * 180. df_land.to_csv(p.baseline_regression_data_path)
def aggregate_crops_by_type(p): """CMIP6 and the land-use harmonization project have centered on 5 crop types: c3 annual, c3 perennial, c4 annual, c4 perennial, nitrogen fixer Aggregate the 15 crops to those four categories by modifying the baseline_regression_data.""" p.aggregated_crop_data_csv_path = os.path.join(p.cur_dir, 'aggregated_crop_data.csv') baseline_regression_data_df = pd.read_csv(p.baseline_regression_data_path, index_col='pixel_id') vars_names_to_aggregate = [ # 'production_value_per_ha', # 'calories_per_ha', 'calories_per_ha_masked', # 'yield_per_ha' # 'proportion_cultivated', # 'PotassiumApplication_Rate', # 'PhosphorusApplication_Rate', # 'NitrogenApplication_Rate', ] crop_membership = OrderedDict() crop_membership['c3_annual'] = [ 'aniseetc', 'artichoke', 'asparagus', 'bambara', 'barley', 'buckwheat', 'cabbage', 'canaryseed', 'carob', 'carrot', 'cassava', 'cauliflower', 'cerealnes', 'chestnut', 'cinnamon', 'cucumberetc', 'currant', 'date', 'eggplant', 'fonio', 'garlic', 'ginger', 'mixedgrain', 'hazelnut', 'hempseed', 'hop', 'kapokseed', 'linseed', 'mango', 'mate', 'mustard', 'nutmeg', 'okra', 'onion', 'greenonion', 'peppermint', 'potato', 'pumpkinetc', 'pyrethrum', 'ramie', 'rapeseed', 'rice', 'safflower', 'sisal', 'sorghumfor', 'sourcherry', 'spinach', 'sugarbeet', 'sunflower', 'taro', 'tobacco', 'tomato', 'triticale', 'tung', 'vanilla', 'vetch', 'walnut', 'watermelon', 'wheat', 'yam', 'yautia', ] crop_membership['c3_perennial'] = [ 'almond', 'apple', 'apricot', 'areca', 'avocado', 'banana', 'blueberry', 'brazil', 'cashewapple', 'cashew', 'cherry', 'chicory', 'chilleetc', 'citrusnes', 'clove', 'cocoa', 'coconut', 'coffee', 'cotton', 'cranberry', 'fig', 'flax', 'grapefruitetc', 'grape', 'jute', 'karite', 'kiwi', 'kolanut', 'lemonlime', 'lettuce', 'abaca', 'melonetc', 'melonseed', 'oats', 'oilpalm', 'oilseedfor', 'olive', 'orange', 'papaya', 'peachetc', 'pear', 'pepper', 'persimmon', 'pineapple', 'pistachio', 'plantain', 'plum', 'poppy', 'quince', 'quinoa', 'rasberry', 'rubber', 'rye', 'stonefruitnes', 'strawberry', 'stringbean', 'sweetpotato', 'tangetc', 'tea', ] crop_membership['c4_annual'] = [ 'maize', 'millet', 'sorghum', ] crop_membership['c4_perennial'] = [ 'greencorn', 'sugarcane', ] crop_membership['nitrogen_fixer'] = [ 'bean', 'greenbean', 'soybean', 'chickpea', 'clover', 'cowpea', 'groundnut', 'lupin', 'pea', 'greenpea', 'pigeonpea', 'lentil', 'legumenes', 'broadbean', 'castor', ] p.crop_types = [ 'c3_annual', 'c3_perennial', 'c4_annual', 'c4_perennial', 'nitrogen_fixer', ] if p.run_this: # Create a DF of zeros, ready to hold the summed results for each crop type. Indix given will be from baseline_regression_data_df so that spatial indices match. crop_specific_df = pd.DataFrame( 0, index=baseline_regression_data_df.index, columns=['solo_column']) crop_types_df = pd.DataFrame(0, index=baseline_regression_data_df.index, columns=[ crop_type + '_calories_per_ha' for crop_type in p.crop_types ]) # Iterate through crop_types for crop_type, crops in crop_membership.items(): L.info('Aggregating ' + str(crop_type) + ' ' + str(crops)) crop_type_col_name = crop_type + '_calories_per_ha' # iterate through crops for crop in crops: crop_col_name = crop + '_calories_per_ha' #crop_specific_df[crop_col_name] = np.zeros(len(baseline_regression_data_df.index)) crop_specific_df[crop_col_name] = crop_specific_df[ 'solo_column'] input_crop_file_name = crop + '_calories_per_ha_masked' input_path = os.path.join(p.input_dir, 'Crop/crop_calories', input_crop_file_name + '.tif') af = hb.ArrayFrame(input_path) crop_specific_df[crop_col_name] = convert_af_to_1d_df(af)[0] crop_types_df[crop_type_col_name] += crop_specific_df[ crop_col_name] # To be fixed for weird NoData too high values in inputs files: (JUSTIN?) # crop_types_df[output_col_name][crop_specific_df[output_col_name] > 1e+12] = 0.0 crop_types_df['calories_per_ha'] = sum( crop_types_df[crop_type_cal_per_ha] for crop_type_cal_per_ha in [crop_type + '_calories_per_ha' for crop_type in p.crop_types]) crop_types_df.to_csv(p.aggregated_crop_data_csv_path)
def a_greater_than_zero_b_equal_zero(a_path, b_path, output_path): def op(a, b): return np.where((a > 0) & (b == 0), 1, 0) hb.raster_calculator_flex([a_path, b_path], op, output_path) return hb.ArrayFrame(output_path)
def proportion_change(after, before, output_path): def op(after, before): return (after - before) / before hb.raster_calculator_flex([after, before], op, output_path) return hb.ArrayFrame(output_path)
def divide(a_path, b_path, output_path): def op(a, b): return a / b hb.raster_calculator_flex([a_path, b_path], op, output_path) return hb.ArrayFrame(output_path)
def greater_than(a_path, b_path, output_path): def op(a, b): return np.where(a > b, 1, 0) hb.raster_calculator_flex([a_path, b_path], op, output_path) return hb.ArrayFrame(output_path)
def multiply(a_path, b_path, output_path): def op(a, b): return a * b hb.raster_calculator_flex([a_path, b_path], op, output_path) return hb.ArrayFrame(output_path)
def subtract(a_path, b_path, output_path): def op(a, b): return a - b hb.raster_calculator_flex([a_path, b_path], op, output_path) return hb.ArrayFrame(output_path)
def af_where_lt_value_set_to(a, value, set_to, output_path): def op(a): return np.where(a < value, set_to, a) hb.raster_calculator_af_flex([a], op, output_path) return hb.ArrayFrame(output_path)
avitabile_uri = os.path.join(base_data_folder, 'carbon\\avitabile\\Avitabile_AGB_Map.tif') geocarbon_uri = os.path.join( base_data_folder, 'carbon\\avitabile\\GEOCARBON_Global_Forest_Biomass\\GEOCARBON_Global_Forest_AGB_10072015.tif' ) # Set folders temp_folder = 'C:\\temp' run_folder = os.path.join(temp_folder, 'run_' + hb.random_string()) os.mkdir(run_folder) intermediate_folder = os.path.join( base_data_folder, 'carbon\\johnson\\decision_tree_combined_carbon') # Open fixed inputs as arrayframes ipcc = hb.ArrayFrame(ipcc_uri) avitabile = hb.ArrayFrame(avitabile_uri) geocarbon = hb.ArrayFrame(geocarbon_uri) # Additional resources to calculate totals ha_per_cell_30s_uri = os.path.join(base_data_folder, 'misc\\ha_per_cell_30s.tif') land_ha_per_cell_30s_uri = os.path.join(base_data_folder, 'misc\\land_ha_per_cell_30s.tif') ha_per_cell = hb.ArrayFrame(ha_per_cell_30s_uri) # Logic on abg, c conversions. carbon_abg_proportion_common_value = 0.5 # What saatchi used. explanation_for_carbon_abg_proportion = """From Djomo et al: The forest carbon stocks are widely estimated from the allometric
def raster_calculator_flex( input_, op, output_path, **kwargs ): #, datatype=None, ndv=None, gtiff_creation_options=None, compress=False # If input is a string, put it into a list if isinstance(input_, str): input_ = [input_] elif isinstance(input_, hb.ArrayFrame): input_ = input_.path final_input = [''] * len(input_) for c, i in enumerate(input_): if isinstance(i, hb.ArrayFrame): final_input[c] = i.path else: final_input[c] = i input_ = final_input # Determine size of inputs if isinstance(input_, str) or isinstance(input_, hb.ArrayFrame): input_size = 1 elif isinstance(input_, list): input_size = len(input_) else: raise NameError( 'input_ given to raster_calculator_flex() not understood. Give a path or list of paths.' ) # Check that files exist. for i in input_: if not os.path.exists(i): raise FileNotFoundError( str(input_) + ' not found by raster_calculator_flex()') # Verify datatypes datatype = kwargs.get('datatype', None) if not datatype: datatypes = [hb.get_datatype_from_uri(i) for i in input_] if len(set(datatypes)) > 1: L.info( 'Rasters given to raster_calculator_flex() were not all of the same type. Defaulting to using first input datatype.' ) datatype = datatypes[0] # Check NDVs. ndv = kwargs.get('ndv', None) if not ndv: ndvs = [hb.get_nodata_from_uri(i) for i in input_] if len(set(ndvs)) > 1: L.info( 'NDVs used in rasters given to raster_calculator_flex() were not all the same. Defaulting to using first value.' ) ndv = ndvs[0] gtiff_creation_options = kwargs.get('gtiff_creation_options', None) if not gtiff_creation_options: gtiff_creation_options = ['TILED=YES', 'BIGTIFF=IF_SAFER'] #, 'COMPRESS=lzw'] compress = kwargs.get('compress', None) if compress: gtiff_creation_options.append('COMPRESS=lzw') # Build tuples to match the required format of raster_calculator. if input_size == 1: if isinstance(input_[0], str): input_tuples_list = [(input_[0], 1)] else: input_tuples_list = [(input_[0].path, 1)] else: if isinstance(input_[0], str): input_tuples_list = [(i, 1) for i in input_] else: input_tuples_list = [(i.path, 1) for i in input_] # Check that the op matches the number of rasters. if len(inspect.signature(op).parameters) != input_size: raise NameError( 'op given to raster_calculator_flex() did not have the same number of parameters as the number of rasters given.' ) hb.raster_calculator(input_tuples_list, op, output_path, datatype, ndv, gtiff_creation_options=gtiff_creation_options) output_af = hb.ArrayFrame(output_path) return output_af