def visualize_data(p):
    df_land = pd.read_csv(
        p.baseline_regression_data_path
    )  ## TODO: If we load data, this reads csv twive (time consuming) optimize?
    match_af = hb.ArrayFrame(p.country_ids_raster_path)
    zeros_array = np.zeros(match_af.size)
    zeros_df = pd.DataFrame(zeros_array)
    full_df = pd.merge(zeros_df,
                       df_land,
                       left_index=True,
                       right_on='pixel_id',
                       how='outer')

    #plot_col(agg_df, 'lat')
    #plot_col(agg_df, 'lon')
    plot_col(full_df, 'lat')
    plot_col(full_df, 'lon')

    # plot_col(p.full_df, 'log_gdp_per_capita')
    # plot_col(p.full_df, 'climate_zones')
    # plot_col(p.full_df, 'log_precip')
    # plot_col(p.full_df, 'log_altitude')
    # plot_col(p.full_df, 'log_gdp')
    # plot_col(p.full_df, 'log_min_to_market')
    plot_col(p.full_df, 'slope')
    plot_col(p.full_df, 'lon_sin')
    plot_col(p.full_df, 'lat_sin')
    plot_col(p.full_df, 'lat_sin')
Exemplo n.º 2
0
def export_raster(df,col_name,savefig,full_df_return=False):
    '''export_as_tif'''
    #Make a zeros_df of length 9331200
    match_af = hb.ArrayFrame(match_raster)
    zeros_array = np.zeros(match_af.size)
    zeros_df = pd.DataFrame(zeros_array)
    DF = df[col_name].reset_index()
    ### Merge with zeros_df to include non-ag pixels
    full_df = pd.merge(zeros_df, DF, left_index=True, right_on='pixel_id', how='outer')
    
    
    values = full_df[col_name].as_matrix().reshape((2160, 4320)).astype(np.float32)

    ### to do transform df to array to raster
    target_path = savefig
    x_pixels = 4320  # = match.RasterXSize
    y_pixels = 2160  # = match.RasterYSize
    driver = gdal.GetDriverByName('GTiff')
    output = driver.Create(target_path,x_pixels, y_pixels, 1 ,gdal.GDT_Float32)
    output.GetRasterBand(1).WriteArray(values)

    match = gdal.Open(match_raster)
    proj = match.GetProjection()
    geotrans = match.GetGeoTransform()
    output.SetGeoTransform(geotrans)
    output.SetProjection(proj)
    output.FlushCache()
    #output.GetRasterBand(1).SetNoDataValue(np.nan)
    output=None
 
    print('Exported raster at '+savefig)
Exemplo n.º 3
0
 def test_arrayframe_add(self):
     temp_path = hb.temp('.tif', 'testing_arrayframe_add', True)
     hb.add(self.global_1deg_raster_path, self.global_1deg_raster_path,
            temp_path)
     temp_path = hb.temp('.tif', 'testing_arrayframe_add', True)
     af1 = hb.ArrayFrame(self.global_1deg_raster_path)
     hb.add(af1, af1, temp_path)
Exemplo n.º 4
0
def visualize_data(df,col_name,savefig=False,colorscheme='diverging',
                   vminmax=False,savecmap=False,
                   shape=(2160,4320),title=None,resize=False):

    
    fig,axes = plt.subplots(1, 1, figsize=(20,15))

    # -- Prepare data --
    #Make a zeros_df of length 9331200
    match_af = hb.ArrayFrame(match_raster)
    zeros_array = np.zeros(match_af.size)
    zeros_df = pd.DataFrame(zeros_array)
    DF = df[col_name].reset_index()
    ### Merge with zeros_df to include non-ag pixels
    full_df = pd.merge(zeros_df, DF, left_index=True, right_on='pixel_id', how='outer')
    
    ## -- Plot column --
    
    #Colorscale
    if vminmax==False:
        serie = df[col_name]
        vmax = serie.max()
        vmin = serie.min()
    else:
        vmin = vminmax[0]
        vmax = vminmax[1]
    
    if colorscheme == 'diverging':
        raw_cmap =  plt.get_cmap('PiYG')
        cmap = customColorMap(raw_cmap, vmin, vmax, resize)
    elif colorscheme == 'sequential':
        raw_cmap = plt.get_cmap('inferno_r') #alternatively 'magma'
        cmap = customColorMap(raw_cmap, vmin, vmax, resize)
    #else:
        #cmap = replicateColorMap(colorscheme,vmin=14.022869333967861,vmax=19.24083736317894)
        
    
    
    
    #Plot data
    data = np.array(full_df[col_name])
    bm = Basemap()
    im = bm.imshow(np.flipud(data.reshape(shape)),cmap=cmap)
    bm.drawcoastlines(linewidth=0.15, color='0.1')
    
    cbar = plt.colorbar(im, orientation='vertical',fraction=0.0234, pad=0.04)
    
    if title == None:
        plt.title(col_name)
    else:
        plt.title(title)
    plt.show()
  
    if savefig != False:
        fig.savefig(savefig)
        
        
    if savecmap == True:
        return (vmin, vmax), resize
def create_land_mask():
    countries_af = hb.ArrayFrame(
        '../ipbes_invest_crop_yield_project/input/Cartographic/country_ids.tif'
    )
    df = convert_af_to_1d_df(countries_af)
    df['land_mask'] = df[0].apply(lambda x: 1 if x > 0 else 0)
    df = df.drop(0, axis=1)
    return df
Exemplo n.º 6
0
def input_flex_as_af(intput_af_or_path):
    if isinstance(intput_af_or_path, str):
        af = hb.ArrayFrame(intput_af_or_path)
    elif isinstance(intput_af_or_path, hb.ArrayFrame):
        af = intput_af_or_path
    else:
        raise NameError('input_flex_as_af unable to interpret intput_af_or_path of ' + str(intput_af_or_path))
    return af
Exemplo n.º 7
0
def visualize_two_maps(serie1, serie2,
                     savefig=False,colorscheme='diverging',
                     resize=False,
                     shape=(2160,4320)):

    fig, axes = plt.subplots(2, 1, figsize=(20,15))
    
    # Define global vmin and vmax
    vmax = max(serie1.max(),serie2.max())
    vmin = min(serie1.min(),serie2.min())
    
    # -- Prepare data --
    #Make a zeros_df of length 9331200
    match_af = hb.ArrayFrame(match_raster)
    zeros_array = np.zeros(match_af.size)
    zeros_df = pd.DataFrame(zeros_array)
    
    ### Merge with zeros_df to include non-ag pixels
    full_df1 = pd.merge(zeros_df, serie1.reset_index(), left_index=True, right_on='pixel_id', how='outer')
    full_df2 = pd.merge(zeros_df, serie2.reset_index(), left_index=True, right_on='pixel_id', how='outer')
    
    ## -- Plot columns --
    
    #Colorscale
    if colorscheme == 'diverging':
        raw_cmap =  plt.get_cmap('PiYG')
        cmap = customColorMap_v(raw_cmap, vmin, vmax,#serie1, serie2, serie3, ### Cleaner option: woudl take vmin, vmax as args instead of series
                                    resize=resize)
    elif colorscheme == 'sequential':
        raw_cmap = plt.get_cmap('inferno_r') #alternatively 'magma'
        cmap = customColorMap_v(raw_cmap, vmin, vmax,#serie1, serie2, serie3,
                                    resize=resize)
    else:
        print('Wrong colorscheme')
    
    
    #Plot data
    data = np.array(full_df1[full_df1.columns[-1]])
    bm = Basemap(ax=axes[0])
    im = bm.imshow(np.flipud(data.reshape(shape)),cmap=cmap,vmin=vmin,vmax=vmax)
    bm.drawcoastlines(linewidth=0.15, color='0.1')
    axes[0].set_title(serie1.name)

    data = np.array(full_df2[full_df2.columns[-1]])
    bm = Basemap(ax=axes[1])
    im = bm.imshow(np.flipud(data.reshape(shape)),cmap=cmap,vmin=vmin,vmax=vmax)
    bm.drawcoastlines(linewidth=0.15, color='0.1')
    axes[1].set_title(serie2.name)
    
    #cbar = plt.colorbar(im, orientation='vertical',fraction=0.0234, pad=0.04)
    
    fig.colorbar(im, ax=axes.ravel().tolist())
    #Or:
    #cax,kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
    #plt.colorbar(im, cax=cax, **kw)
  
    if savefig != False:
        fig.savefig(savefig,dpi=300)
        def add_crop_layers_from_dir(input_dir):

            crop_layer_names = [
                "c4per ^ area_fraction ^ C4 perennial crops.tif",
                "c4ann ^ area_fraction ^ C4 annual crops.tif",
                "c3per ^ area_fraction ^ C3 perennial crops.tif",
                "c3nfx ^ area_fraction ^ C3 nitrogen-fixing crops.tif",
                "c3ann ^ area_fraction ^ C3 annual crops.tif",
            ]
            uris_to_combine = [os.path.join(input_dir, i) for i in crop_layer_names]
            print('uris_to_combine', uris_to_combine)
            match_af = hb.ArrayFrame(uris_to_combine[0])
            proportion_cultivated = np.zeros(match_af.shape)
            mask = np.where((match_af.data >= 0.0) & (match_af.data <= 1.0))
            for uri in uris_to_combine:
                proportion_cultivated[mask] += hb.ArrayFrame(uri).data[mask]

            return proportion_cultivated
Exemplo n.º 9
0
def add_with_valid_mask(a_path, b_path, output_path, valid_mask_path, ndv):
    def op(a, b, valid_mask):
        return np.where(valid_mask == 1, a + b, ndv)

    hb.raster_calculator_flex([a_path, b_path, valid_mask_path],
                              op,
                              output_path,
                              ndv=ndv)
    return hb.ArrayFrame(output_path)
Exemplo n.º 10
0
def add_smart(a, b, a_valid_mask, b_valid_mask, output_ndv, output_path):
    def op(a, b, a_valid_mask, b_valid_mask, output_ndv):
        return np.where((a_valid_mask == 1 & b_valid_mask == 1), a + b,
                        output_ndv)

    hb.raster_calculator_flex([a, b, a.valid_mask, b.valid_mask],
                              op,
                              output_path,
                              ndv=output_ndv)
    return hb.ArrayFrame(output_path)
def rasters_to_tabular_csv(rasters_paths,
                           csv_name,
                           latlon=False,
                           col_names=None):
    # Create tabular data
    rasters_names = []
    dfs_list = []

    match_af = hb.ArrayFrame(rasters_paths[0])
    for path in rasters_paths:
        af = hb.ArrayFrame(path)
        df = convert_af_to_1d_df(af)
        dfs_list.append(df)

        name = hb.explode_path(path)['file_root_no_suffix']
        rasters_names.append(name)

    if col_names == None:
        col_names = rasters_names

    df = concatenate_dfs_horizontally(dfs_list, col_names)

    # Remove NaNs
    # Or don't ?

    # Get rid of the oceans cells
    df['pixel_id'] = df.index
    #df['pixel_id_float'] = df['pixel_id'].astype('float')
    land_mask = create_land_mask()
    df = df.merge(land_mask, right_index=True, left_on='pixel_id')
    df_land = df[df['land_mask'] == 1]
    df_land = df_land.dropna()

    if latlon == True:
        df_land['lon'] = round(
            (((df['pixel_id'] % 4320.) / 4320 - .5) * 360.0), 2)
        df_land['lat'] = round(
            (((df['pixel_id'] / 4320.).round() / 2160 - .5) * 180.), 2)

    dfland = df_land.set_index('pixel_id')

    print('Writing csv ' + csv_name)
    df_land.to_csv('../Data/intermediate/' + csv_name + '.csv')
Exemplo n.º 12
0
def parse_input_flex(input_flex):
    if isinstance(input_flex, str):
        output = hb.ArrayFrame(input_flex)
    elif isinstance(input_flex, np.ndarray):
        print(
            'parse_input_flex is NYI for arrays because i first need to figure out how to have an af without georeferencing.'
        )
        # output = hb.create_af_from_array(input_flex)
    else:
        output = input_flex
    return output
def resample_lulc(p):
    if p.tasks['resample_lulc']:
        match_af = hb.ArrayFrame(p.base_data_ha_per_cell_path)
        match_r_path = p.match_r_path
        hb.reproject_to_cylindrical(match_af.uri, match_r_path)
        # hb.reproject_to_epsg(match_af.uri, match_r_path, 54012)
        match_r_af = hb.ArrayFrame(match_r_path)

        for scenario in p.scenario_names:
            for year  in p.years:
                read_dir = os.path.join(p.task_dirs['extract_lulc'], scenario, str(year))
                write_dir = os.path.join(p.resample_lulc_dir, scenario, str(year))
                hb.create_dirs(write_dir)
                for filename in hb.list_files_in_dir_recursively(read_dir, filter_extensions=['.tif']):
                    input_path = os.path.join(read_dir, filename)
                    output_path = os.path.join(write_dir, os.path.basename(filename) + '.tif')
                    print('input output', input_path, output_path)

                    hb.align_dataset_to_match(input_path, match_r_path, output_path)
    else:
        pass
Exemplo n.º 14
0
def arrayframe_load_and_save():
    input_array = np.arange(0, 18, 1).reshape((3, 6))

    input_uri = hb.temp('.tif', remove_at_exit=False)

    geotransform = hb.calc_cylindrical_geotransform_from_array(input_array)
    # projection = hb.get_wkt_from_epsg_code(hb.common_epsg_codes_by_name['plate_carree'])
    projection = 'plate_carree'
    hb.save_array_as_geotiff(input_array,
                             input_uri,
                             geotransform_override=geotransform,
                             projection_override=projection)

    hb.ArrayFrame(input_uri)
Exemplo n.º 15
0
    def test_arrayframe_load_and_save(self):
        input_array = np.arange(0, 18, 1).reshape((3, 6))
        input_uri = hb.temp('.tif', remove_at_exit=True)
        geotransform = hb.calc_cylindrical_geotransform_from_array(input_array)
        # projection = hb.get_wkt_from_epsg_code(hb.common_epsg_codes_by_name['plate_carree'])
        projection = 'wgs84'
        ndv = 255
        data_type = 1
        hb.save_array_as_geotiff(input_array,
                                 input_uri,
                                 geotransform_override=geotransform,
                                 projection_override=projection,
                                 ndv=ndv,
                                 data_type=data_type)

        hb.ArrayFrame(input_uri)
def raster_calculator_af_flex(
    input_, op, output_path, **kwargs
):  #KWARGS: datatype=None, ndv=None, gtiff_creation_options=None, compress=False, add_overviews=False
    """KWARGS:
    datatype=None,
    ndv=None,
    gtiff_creation_options=None,
    compress=False,
    add_overviews=False

    In HB, a flex input is one of [string that points to a file, an array frame, or a suitabily formatted list of the above"""
    print('input_', input_)
    # If input is a string, put it into a list
    if isinstance(input_, str):
        input_ = [input_]
    elif isinstance(input_, hb.ArrayFrame):
        input_ = input_.path

    final_input = [''] * len(input_)
    for c, i in enumerate(input_):
        print('c,i', c, i)
        if isinstance(i, hb.ArrayFrame):
            final_input[c] = i.path
        else:
            final_input[c] = i
    input_ = final_input

    # Determine size of inputs
    if isinstance(input_, str) or isinstance(input_, hb.ArrayFrame):
        input_size = 1
    elif isinstance(input_, list):
        input_size = len(input_)
    else:
        raise NameError(
            'input_ given to raster_calculator_af_flex() not understood. Give a path or list of paths.'
        )

    # # Check that files exist.
    # for i in input_:
    #     if not os.path.exists(i):
    #         raise FileNotFoundError(str(input_) + ' not found by raster_calculator_af_flex()')

    # Verify datatypes
    datatype = kwargs.get('datatype', None)
    if not datatype:
        print('input_', input_)
        datatypes = [
            hb.get_datatype_from_uri(i) for i in input_ if type(i) is not float
        ]
        print('datatypes', datatypes)
        if len(set(datatypes)) > 1:
            L.info(
                'Rasters given to raster_calculator_af_flex() were not all of the same type. Defaulting to using first input datatype.'
            )
        datatype = datatypes[0]

    # Check NDVs.
    ndv = kwargs.get('ndv', None)
    if not ndv:
        ndvs = [
            hb.get_ndv_from_path(i) for i in input_ if type(i) is not float
        ]
        if len(set(ndvs)) > 1:
            L.info(
                'NDVs used in rasters given to raster_calculator_af_flex() were not all the same. Defaulting to using first value.'
            )
        ndv = ndvs[0]

    gtiff_creation_options = kwargs.get('gtiff_creation_options', None)
    if not gtiff_creation_options:
        gtiff_creation_options = ['TILED=YES',
                                  'BIGTIFF=IF_SAFER']  #, 'COMPRESS=lzw']

    compress = kwargs.get('compress', None)
    if compress:
        gtiff_creation_options.append('COMPRESS=deflate')

    # Build tuples to match the required format of raster_calculator.
    if input_size == 1:
        if isinstance(input_[0], str):
            input_tuples_list = [(input_[0], 1)]
        else:
            input_tuples_list = [(input_[0].path, 1)]
    else:
        if isinstance(input_[0], str):
            input_tuples_list = [(i, 1) for i in input_]

        else:
            input_tuples_list = [(i.path, 1) for i in input_]

    for c, i in enumerate(input_tuples_list):
        if type(i[0]) is float:
            input_tuples_list[c] = (i[0], 'raw')

    # # Check that the op matches the number of rasters.
    # if len(inspect.signature(op).parameters) != input_size:
    #     raise NameError('op given to raster_calculator_af_flex() did not have the same number of parameters as the number of rasters given.')

    print('input_tuples_list', input_tuples_list)
    hb.raster_calculator_hb(input_tuples_list,
                            op,
                            output_path,
                            datatype,
                            ndv,
                            gtiff_creation_options=gtiff_creation_options)

    if kwargs.get('add_overviews'):
        hb.add_overviews_to_path(output_path)

    output_af = hb.ArrayFrame(output_path)
    return output_af
Exemplo n.º 17
0
def reproject_align():
    input_path = "wgs84_026deg_-9999ndv.tif"
    af = hb.ArrayFrame(input_path)
    print(af)
def load_data(p):

    if p.run_this:
        crop_types_df = pd.read_csv(p.aggregated_crop_data_csv_path)
        df_land = pd.read_csv(p.baseline_regression_data_path)

        df = df_land.merge(crop_types_df, how='outer', on='pixel_id')

        if p.subset == True:
            df = df.sample(frac=0.02,
                           replace=False,
                           weights=None,
                           random_state=None,
                           axis=0)

        elif p.subset == False:  #Save validation data
            x = df.drop(['calories_per_ha'], axis=1)
            y = df['calories_per_ha']

            X, X_validation, Y, y_validation = train_test_split(x, y)

            df = X.merge(pd.DataFrame(Y),
                         how='outer',
                         left_index=True,
                         right_index=True)

        elif p.subset is None:  # CAREFUL FOOL ONLY DO THIS FOR PLOTTING BECAUSE LEAKAGE
            pass

        # Remove cal_per_ha per crop type for now
        df = df.drop(labels=[
            'c3_annual_calories_per_ha', 'c3_perennial_calories_per_ha',
            'c4_annual_calories_per_ha', 'c4_perennial_calories_per_ha',
            'nitrogen_fixer_calories_per_ha'
        ],
                     axis=1)

        # Remove helper columns (not features)
        df = df.drop(labels=['Unnamed: 0', 'country_ids', 'ha_per_cell_5m'],
                     axis=1)

        # Rename cols
        df = df.rename(
            columns={
                'bio12': 'precip',
                'bio1': 'temperature',
                'minutes_to_market_5m': 'min_to_market',
                'gdp_per_capita_2000_5m': 'gdp_per_capita',
                'gdp_2000': 'gdp'
            })

        # Encode Climate zones as Strings
        climate_zones_map = {
            1: 'Af',
            2: 'Am',
            3: 'Aw',
            5: 'BWk',
            4: 'BWh',
            7: 'BSk',
            6: 'BSh',
            14: 'Cfa',
            15: 'Cfb',
            16: 'Cfc',
            8: 'Csa',
            9: 'Csb',
            10: 'Csc',
            11: 'Cwa',
            12: 'Cwb',
            13: 'Cwc',
            25: 'Dfa',
            26: 'Dfb',
            27: 'Dfc',
            28: 'Dfd',
            17: 'Dsa',
            18: 'Dsb',
            19: 'Dsc',
            20: 'Dsd',
            21: 'Dwa',
            22: 'Dwb',
            23: 'Dwc',
            24: 'Dwd',
            30: 'EF',
            29: 'ET'
        }

        df['climate_zones'] = df['climate_zones'].map(
            climate_zones_map)  # TODO Why was it commented?

        # Encode climate zones as dummies
        climate_dummies_df = pd.get_dummies(df['climate_zones'])
        for col in climate_dummies_df.columns:
            climate_dummies_df = climate_dummies_df.rename(
                {col: str('climatezone_' + col)}, axis=1)

        df = df.merge(climate_dummies_df, right_index=True, left_index=True)
        df = df.drop('climate_zones', axis=1)

        # Log some skewed variables
        df['calories_per_ha'] = df['calories_per_ha'].apply(lambda x: np.log(x)
                                                            if x != 0 else 0)

        for col in [
                'gdp_per_capita', 'altitude', 'min_to_market', 'gpw_population'
        ]:
            df[str('log_' +
                   col)] = df[col].apply(lambda x: np.log(x) if x != 0 else 0)

        # TODO figure out how to encode soil variables better?

        # Add precip_annualrange
        df['precip_annualrange'] = df['precip_wet_mth'] - df['precip_dry_mth']

        # Lat/Lon
        df['sin_lon'] = df['lon'].apply(lambda x: np.sin(np.radians(x)))

        # Encode properly NaNs
        df['slope'] = df['slope'].replace({0: np.nan
                                           })  # 143 NaN in 'slope' variable
        for soil_var in [
                'workability_index', 'toxicity_index',
                'rooting_conditions_index', 'oxygen_availability_index',
                'protected_areas_index', 'nutrient_retention_index',
                'nutrient_availability_index', 'excess_salts_index'
        ]:
            df[soil_var] = df[soil_var].replace({255: np.nan})

        # Drop NaNs rows and cells with no ag
        df = df.dropna()
        df = df[df['calories_per_ha'] != 0]

        # df.set_index('pixel_id') ## TODO Why is this commented out ?

        p.df = df

        match_af = hb.ArrayFrame(p.country_ids_raster_path)
        zeros_array = np.zeros(match_af.size)
        p.full_df = pd.DataFrame(zeros_array)
        p.full_df = pd.merge(p.full_df,
                             p.df,
                             left_index=True,
                             right_on='pixel_id',
                             how='outer')
def add(a_flex, b_flex, output_path):
    def op(a, b):
        return a + b

    hb.raster_calculator_af_flex([a_flex, b_flex], op, output_path)
    return hb.ArrayFrame(output_path)
def create_baseline_regression_data(p):
    p.baseline_regression_data_path = os.path.join(
        p.cur_dir, 'baseline_regression_data.csv')
    # Iterate through input_paths adding them.  Currently also fixes fertilizer nan issues.
    af_names_list = []
    dfs_list = []
    paths_to_add = [
        # p.country_names_path,
        p.country_ids_raster_path,
        p.ha_per_cell_5m_path,
        #p.precip_path,
        #p.temperature_path,
        p.slope_path,
        p.altitude_path,
        p.workability_index_path,
        p.toxicity_index_path,
        p.rooting_conditions_index_path,
        # p.rainfed_land_percent_path,
        p.protected_areas_index_path,
        p.oxygen_availability_index_path,
        p.nutrient_retention_index_path,
        p.nutrient_availability_index_path,
        # p.irrigated_land_percent_path,
        p.excess_salts_index_path,
        # p.cultivated_land_percent_path,
        # p.crop_suitability_path,
        p.gdp_2000_path,
        p.gdp_gecon,
        p.minutes_to_market_path,
        p.pop_path,
        p.climate_zones_path,
        p.temp_avg_path,
        p.temp_diurnalrange_path,
        p.temp_isothermality_path,
        p.temp_seasonality_path,
        p.temp_annualmax_path,
        p.temp_annualmin_path,
        p.temp_annualrange_path,
        #p.temp_wettestq_path,
        #p.temp_dryestq_path,
        #p.temp_warmestq_path,
        #p.temp_coldestq_path,
        p.precip_path,
        p.precip_wet_mth_path,
        p.precip_dry_mth_path,
        p.precip_seasonality_path,
        # p.precip_wettestq_path,
        # p.precip_dryestq_path,
        # p.precip_warmestq_path,
        # p.precip_coldestq_path
    ]

    if p.run_this:
        match_af = hb.ArrayFrame(paths_to_add[0])
        for path in paths_to_add:
            name = hb.explode_path(path)['file_root']
            af = hb.ArrayFrame(path)
            af_names_list.append(name)
            df = convert_af_to_1d_df(af)
            dfs_list.append(df)

        L.info('Concatenating all dataframes.')
        df = concatenate_dfs_horizontally(dfs_list, af_names_list)
        df[df < 0] = 0.0

        # Get rid of the oceans cells
        df['pixel_id'] = df.index
        df['pixel_id_float'] = df['pixel_id'].astype('float')
        land_mask = create_land_mask()
        df = df.merge(land_mask, right_index=True, left_on='pixel_id')
        df_land = df[df['land_mask'] == 1]

        df_land = df_land.dropna()

        df_land['lon'] = ((df['pixel_id_float'] % 4320.) / 4320 - .5) * 360.0
        df_land['lat'] = (
            (df['pixel_id_float'] / 4320.).round() / 2160 - .5) * 180.

        df_land.to_csv(p.baseline_regression_data_path)
def aggregate_crops_by_type(p):
    """CMIP6 and the land-use harmonization project have centered on 5 crop types: c3 annual, c3 perennial, c4 annual, c4 perennial, nitrogen fixer
    Aggregate the 15 crops to those four categories by modifying the baseline_regression_data."""

    p.aggregated_crop_data_csv_path = os.path.join(p.cur_dir,
                                                   'aggregated_crop_data.csv')
    baseline_regression_data_df = pd.read_csv(p.baseline_regression_data_path,
                                              index_col='pixel_id')

    vars_names_to_aggregate = [
        # 'production_value_per_ha',
        # 'calories_per_ha',
        'calories_per_ha_masked',
        # 'yield_per_ha'
        # 'proportion_cultivated',
        # 'PotassiumApplication_Rate',
        # 'PhosphorusApplication_Rate',
        # 'NitrogenApplication_Rate',
    ]

    crop_membership = OrderedDict()
    crop_membership['c3_annual'] = [
        'aniseetc',
        'artichoke',
        'asparagus',
        'bambara',
        'barley',
        'buckwheat',
        'cabbage',
        'canaryseed',
        'carob',
        'carrot',
        'cassava',
        'cauliflower',
        'cerealnes',
        'chestnut',
        'cinnamon',
        'cucumberetc',
        'currant',
        'date',
        'eggplant',
        'fonio',
        'garlic',
        'ginger',
        'mixedgrain',
        'hazelnut',
        'hempseed',
        'hop',
        'kapokseed',
        'linseed',
        'mango',
        'mate',
        'mustard',
        'nutmeg',
        'okra',
        'onion',
        'greenonion',
        'peppermint',
        'potato',
        'pumpkinetc',
        'pyrethrum',
        'ramie',
        'rapeseed',
        'rice',
        'safflower',
        'sisal',
        'sorghumfor',
        'sourcherry',
        'spinach',
        'sugarbeet',
        'sunflower',
        'taro',
        'tobacco',
        'tomato',
        'triticale',
        'tung',
        'vanilla',
        'vetch',
        'walnut',
        'watermelon',
        'wheat',
        'yam',
        'yautia',
    ]
    crop_membership['c3_perennial'] = [
        'almond',
        'apple',
        'apricot',
        'areca',
        'avocado',
        'banana',
        'blueberry',
        'brazil',
        'cashewapple',
        'cashew',
        'cherry',
        'chicory',
        'chilleetc',
        'citrusnes',
        'clove',
        'cocoa',
        'coconut',
        'coffee',
        'cotton',
        'cranberry',
        'fig',
        'flax',
        'grapefruitetc',
        'grape',
        'jute',
        'karite',
        'kiwi',
        'kolanut',
        'lemonlime',
        'lettuce',
        'abaca',
        'melonetc',
        'melonseed',
        'oats',
        'oilpalm',
        'oilseedfor',
        'olive',
        'orange',
        'papaya',
        'peachetc',
        'pear',
        'pepper',
        'persimmon',
        'pineapple',
        'pistachio',
        'plantain',
        'plum',
        'poppy',
        'quince',
        'quinoa',
        'rasberry',
        'rubber',
        'rye',
        'stonefruitnes',
        'strawberry',
        'stringbean',
        'sweetpotato',
        'tangetc',
        'tea',
    ]
    crop_membership['c4_annual'] = [
        'maize',
        'millet',
        'sorghum',
    ]
    crop_membership['c4_perennial'] = [
        'greencorn',
        'sugarcane',
    ]
    crop_membership['nitrogen_fixer'] = [
        'bean',
        'greenbean',
        'soybean',
        'chickpea',
        'clover',
        'cowpea',
        'groundnut',
        'lupin',
        'pea',
        'greenpea',
        'pigeonpea',
        'lentil',
        'legumenes',
        'broadbean',
        'castor',
    ]

    p.crop_types = [
        'c3_annual',
        'c3_perennial',
        'c4_annual',
        'c4_perennial',
        'nitrogen_fixer',
    ]

    if p.run_this:
        # Create a DF of zeros, ready to hold the summed results for each crop type. Indix given will  be from baseline_regression_data_df so that spatial indices match.
        crop_specific_df = pd.DataFrame(
            0,
            index=baseline_regression_data_df.index,
            columns=['solo_column'])

        crop_types_df = pd.DataFrame(0,
                                     index=baseline_regression_data_df.index,
                                     columns=[
                                         crop_type + '_calories_per_ha'
                                         for crop_type in p.crop_types
                                     ])

        # Iterate through crop_types
        for crop_type, crops in crop_membership.items():

            L.info('Aggregating ' + str(crop_type) + ' ' + str(crops))
            crop_type_col_name = crop_type + '_calories_per_ha'

            # iterate through crops
            for crop in crops:
                crop_col_name = crop + '_calories_per_ha'
                #crop_specific_df[crop_col_name] = np.zeros(len(baseline_regression_data_df.index))
                crop_specific_df[crop_col_name] = crop_specific_df[
                    'solo_column']

                input_crop_file_name = crop + '_calories_per_ha_masked'

                input_path = os.path.join(p.input_dir, 'Crop/crop_calories',
                                          input_crop_file_name + '.tif')
                af = hb.ArrayFrame(input_path)
                crop_specific_df[crop_col_name] = convert_af_to_1d_df(af)[0]

                crop_types_df[crop_type_col_name] += crop_specific_df[
                    crop_col_name]

            # To be fixed for weird NoData too high values in inputs files: (JUSTIN?)
            # crop_types_df[output_col_name][crop_specific_df[output_col_name] > 1e+12] = 0.0

        crop_types_df['calories_per_ha'] = sum(
            crop_types_df[crop_type_cal_per_ha] for crop_type_cal_per_ha in
            [crop_type + '_calories_per_ha' for crop_type in p.crop_types])
        crop_types_df.to_csv(p.aggregated_crop_data_csv_path)
Exemplo n.º 22
0
def a_greater_than_zero_b_equal_zero(a_path, b_path, output_path):
    def op(a, b):
        return np.where((a > 0) & (b == 0), 1, 0)

    hb.raster_calculator_flex([a_path, b_path], op, output_path)
    return hb.ArrayFrame(output_path)
Exemplo n.º 23
0
def proportion_change(after, before, output_path):
    def op(after, before):
        return (after - before) / before

    hb.raster_calculator_flex([after, before], op, output_path)
    return hb.ArrayFrame(output_path)
Exemplo n.º 24
0
def divide(a_path, b_path, output_path):
    def op(a, b):
        return a / b

    hb.raster_calculator_flex([a_path, b_path], op, output_path)
    return hb.ArrayFrame(output_path)
Exemplo n.º 25
0
def greater_than(a_path, b_path, output_path):
    def op(a, b):
        return np.where(a > b, 1, 0)

    hb.raster_calculator_flex([a_path, b_path], op, output_path)
    return hb.ArrayFrame(output_path)
Exemplo n.º 26
0
def multiply(a_path, b_path, output_path):
    def op(a, b):
        return a * b

    hb.raster_calculator_flex([a_path, b_path], op, output_path)
    return hb.ArrayFrame(output_path)
Exemplo n.º 27
0
def subtract(a_path, b_path, output_path):
    def op(a, b):
        return a - b

    hb.raster_calculator_flex([a_path, b_path], op, output_path)
    return hb.ArrayFrame(output_path)
def af_where_lt_value_set_to(a, value, set_to, output_path):
    def op(a):
        return np.where(a < value, set_to, a)

    hb.raster_calculator_af_flex([a], op, output_path)
    return hb.ArrayFrame(output_path)
avitabile_uri = os.path.join(base_data_folder,
                             'carbon\\avitabile\\Avitabile_AGB_Map.tif')
geocarbon_uri = os.path.join(
    base_data_folder,
    'carbon\\avitabile\\GEOCARBON_Global_Forest_Biomass\\GEOCARBON_Global_Forest_AGB_10072015.tif'
)

# Set folders
temp_folder = 'C:\\temp'
run_folder = os.path.join(temp_folder, 'run_' + hb.random_string())
os.mkdir(run_folder)
intermediate_folder = os.path.join(
    base_data_folder, 'carbon\\johnson\\decision_tree_combined_carbon')

# Open fixed inputs as arrayframes
ipcc = hb.ArrayFrame(ipcc_uri)
avitabile = hb.ArrayFrame(avitabile_uri)
geocarbon = hb.ArrayFrame(geocarbon_uri)

# Additional resources to calculate totals
ha_per_cell_30s_uri = os.path.join(base_data_folder,
                                   'misc\\ha_per_cell_30s.tif')
land_ha_per_cell_30s_uri = os.path.join(base_data_folder,
                                        'misc\\land_ha_per_cell_30s.tif')
ha_per_cell = hb.ArrayFrame(ha_per_cell_30s_uri)

# Logic on abg, c conversions.
carbon_abg_proportion_common_value = 0.5  # What saatchi used.

explanation_for_carbon_abg_proportion = """From Djomo et al: The forest carbon stocks are 
widely estimated from the allometric
Exemplo n.º 30
0
def raster_calculator_flex(
    input_, op, output_path, **kwargs
):  #, datatype=None, ndv=None, gtiff_creation_options=None, compress=False

    # If input is a string, put it into a list
    if isinstance(input_, str):
        input_ = [input_]
    elif isinstance(input_, hb.ArrayFrame):
        input_ = input_.path

    final_input = [''] * len(input_)
    for c, i in enumerate(input_):
        if isinstance(i, hb.ArrayFrame):
            final_input[c] = i.path
        else:
            final_input[c] = i
    input_ = final_input

    # Determine size of inputs
    if isinstance(input_, str) or isinstance(input_, hb.ArrayFrame):
        input_size = 1
    elif isinstance(input_, list):
        input_size = len(input_)
    else:
        raise NameError(
            'input_ given to raster_calculator_flex() not understood. Give a path or list of paths.'
        )

    # Check that files exist.
    for i in input_:
        if not os.path.exists(i):
            raise FileNotFoundError(
                str(input_) + ' not found by raster_calculator_flex()')

    # Verify datatypes
    datatype = kwargs.get('datatype', None)
    if not datatype:
        datatypes = [hb.get_datatype_from_uri(i) for i in input_]
        if len(set(datatypes)) > 1:
            L.info(
                'Rasters given to raster_calculator_flex() were not all of the same type. Defaulting to using first input datatype.'
            )
        datatype = datatypes[0]

    # Check NDVs.
    ndv = kwargs.get('ndv', None)
    if not ndv:
        ndvs = [hb.get_nodata_from_uri(i) for i in input_]
        if len(set(ndvs)) > 1:
            L.info(
                'NDVs used in rasters given to raster_calculator_flex() were not all the same. Defaulting to using first value.'
            )
        ndv = ndvs[0]

    gtiff_creation_options = kwargs.get('gtiff_creation_options', None)
    if not gtiff_creation_options:
        gtiff_creation_options = ['TILED=YES',
                                  'BIGTIFF=IF_SAFER']  #, 'COMPRESS=lzw']

    compress = kwargs.get('compress', None)
    if compress:
        gtiff_creation_options.append('COMPRESS=lzw')

    # Build tuples to match the required format of raster_calculator.
    if input_size == 1:
        if isinstance(input_[0], str):
            input_tuples_list = [(input_[0], 1)]
        else:
            input_tuples_list = [(input_[0].path, 1)]
    else:
        if isinstance(input_[0], str):
            input_tuples_list = [(i, 1) for i in input_]
        else:
            input_tuples_list = [(i.path, 1) for i in input_]

    # Check that the op matches the number of rasters.
    if len(inspect.signature(op).parameters) != input_size:
        raise NameError(
            'op given to raster_calculator_flex() did not have the same number of parameters as the number of rasters given.'
        )

    hb.raster_calculator(input_tuples_list,
                         op,
                         output_path,
                         datatype,
                         ndv,
                         gtiff_creation_options=gtiff_creation_options)

    output_af = hb.ArrayFrame(output_path)
    return output_af