Exemplo n.º 1
0
                    ecozone_values = []
                    for zone in ecozones:
                        cwd = os.getcwd()
                        ecozone_shapefile = cwd + '/ecozone_shp/' + zone + '.shp'
                        boolean_map = GD.get_intersect_boolean_array(
                            ecozone_shapefile, shapefile, False, True)
                        surface, maxmin = idw.IDW(latlon_station, days_dict,
                                                  year, '# Days', shapefile,
                                                  False, 4, True)
                        AvVal = GD.get_average_in_ecozone(boolean_map, surface)
                        ecozone_values.append(AvVal)

                elif interpolator == 'RF':
                    grd_size, maxmin = rf.random_forest_interpolator(
                        latlon_station, days_dict, year, '# Days', shapefile,
                        False, file_path_elev, idx_list, True)

                    try:
                        inBoreal = GD.is_station_in_boreal(
                            latlon_station, days_dict, boreal_shapefile)
                        filtered_dict = {
                            k: v
                            for k, v in days_dict.items() if k in inBoreal
                        }
                        num_stations = len(filtered_dict.keys(
                        ))  #Number clusters= # stations / 3, /5, /10
                        cluster_num1 = int(round(num_stations / 3))
                        cluster_num2 = int(round(num_stations / 5))
                        cluster_num3 = int(round(num_stations / 10))
                        cluster_num,MAE,stdev_stations = rf.select_block_size_rf(10,'clusters',latlon_station,days_dict,grd_size,shapefile,\
def run_comparison(
    var_name,
    input_date,
    interpolation_types,
    rep,
    loc_dictionary,
    cvar_dictionary,
    file_path_elev,
    elev_array,
    idx_list,
    phi_input=None,
    calc_phi=True,
    kernels={
        'temp':
        ['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'],
        'rh':
        ['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],
        'pcp':
        ['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],
        'wind':
        ['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']
    }):
    '''Execute the shuffle-split cross-validation for the given interpolation types

   Parameters
   ----------
        var_name : string
             name of weather variable you are interpolating
        input_date : string
             date of weather data (day of fire season) 
        interpolation_types : list
             list of interpolation types to consider
        rep : int
             number of replications to run
        loc_dictionary : dictionary
             dictionary of station locations
        cvar_dictionary : dictionary
             dictionary containing the weather data for each station available
        file_path_elev : string
             path to the elevation lookup file
        elev_array : ndarray
             array for elevation, create using IDEW interpolation (this is a trick to speed up code)
        idx_list : int
             position of the elevation column in the lookup file
        phi_input : float
             smoothing parameter for the thin plate spline, if 0 no smoothing, default is None (it is calculated)
        calc_phi : bool
             whether to calculate phi in the function, if True, phi can = None
        kernels : dictionary
             the kernels for each weather variable for gaussian process regression
             
        
   Returns
   ----------
        string
            - returns the selected interpolation type name 
    '''
    MAE_dict = {}
    for method in interpolation_types:
        if method not in [
                'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS',
                'GPR', 'RF'
        ]:
            print(
                'The method %s is not currently a supported interpolation type.'
                % (method))
            sys.exit()

        else:
            if method == 'IDW2':
                MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary,
                                        shapefile, 2, rep, False)
                MAE_dict[method] = MAE

            if method == 'IDW3':
                MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary,
                                        shapefile, 3, rep, False)
                MAE_dict[method] = MAE

            if method == 'IDW4':
                MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary,
                                        shapefile, 4, rep, False)
                MAE_dict[method] = MAE

            if method == 'IDEW2':
                MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary,
                                              shapefile, file_path_elev,
                                              elev_array, idx_list, 2, rep)
                MAE_dict[method] = MAE

            if method == 'IDEW3':
                MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary,
                                              shapefile, file_path_elev,
                                              elev_array, idx_list, 3, rep)
                MAE_dict[method] = MAE

            if method == 'IDEW4':
                MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary,
                                              shapefile, file_path_elev,
                                              elev_array, idx_list, 4, rep)
                MAE_dict[method] = MAE

            if method == 'TPS':
                MAE = tps.shuffle_split_tps(loc_dictionary, cvar_dictionary,
                                            shapefile, 10)
                MAE_dict[method] = MAE

            if method == 'RF':
                MAE = rf.shuffle_split_rf(loc_dictionary, cvar_dictionary,
                                          shapefile, file_path_elev,
                                          elev_array, idx_list, 10)
                MAE_dict[method] = MAE

            if method == 'GPR':
                MAE = gpr.shuffle_split_gpr(loc_dictionary, cvar_dictionary,
                                            shapefile, file_path_elev,
                                            elev_array, idx_list,
                                            kernels[var_name], 10)
                MAE_dict[method] = MAE

    best_method = min(MAE_dict, key=MAE_dict.get)
    print('The best method for %s is: %s' % (var_name, best_method))
    if method == 'IDW2':
        choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     2,
                                     False)  # Expand_area is not supported yet

    if method == 'IDW3':
        choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     3,
                                     False)  # Expand_area is not supported yet

    if method == 'IDW4':
        choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     4,
                                     False)  # Expand_area is not supported yet

    if method == 'IDEW2':
        choix_surf, maxmin, elev_array = idew.IDEW(
            loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile,
            False, file_path_elev, idx_list,
            2)  # Expand_area is not supported yet

    if method == 'IDEW3':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary, input_date,
                                                   'Variable', shapefile,
                                                   False, file_path_elev,
                                                   idx_list, 3)

    if method == 'IDEW4':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary, input_date,
                                                   'Variable', shapefile,
                                                   False, file_path_elev,
                                                   idx_list, 4)

    if method == 'TPS':
        choix_surf, maxmin = tps.TPS(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     phi_input, False, calc_phi)

    if method == 'RF':
        choix_surf, maxmin = rf.random_forest_interpolator(
            loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile,
            False, file_path_elev, idx_list, False)

    if method == 'GPR':
        choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary,
                                                  cvar_dictionary, input_date,
                                                  'Variable', shapefile, False,
                                                  file_path_elev, idx_list,
                                                  False, kernels[var_name],
                                                  None, None, False, False)

    return best_method, choix_surf, maxmin
def run_comparison(var_name,input_date,interpolation_types,rep,loc_dictionary,cvar_dictionary,file_path_elev,elev_array,idx_list,phi_input=None,calc_phi=True,\
                   kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)']\
                            ,'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],\
                            'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],\
                            'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}):
    '''Execute the shuffle-split cross-validation for the given interpolation types 
     Parameters
         interpolation_types (list of str): list of interpolation types to consider
     Returns 
         interpolation_best (str): returns the selected interpolation type name 
     '''
    MAE_dict = {}
    for method in interpolation_types[var_name]:
        if method not in [
                'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS',
                'GPR', 'RF'
        ]:
            print(
                'The method %s is not currently a supported interpolation type.'
                % (method))
            sys.exit()

        else:
            if method == 'IDW2':
                MAE = idw.shuffle_split(loc_dictionary,
                                        cvar_dictionary,
                                        shapefile,
                                        2,
                                        rep,
                                        False,
                                        res=10000)
                MAE_dict[method] = MAE

            if method == 'IDW3':
                MAE = idw.shuffle_split(loc_dictionary,
                                        cvar_dictionary,
                                        shapefile,
                                        3,
                                        rep,
                                        False,
                                        res=10000)
                MAE_dict[method] = MAE

            if method == 'IDW4':
                MAE = idw.shuffle_split(loc_dictionary,
                                        cvar_dictionary,
                                        shapefile,
                                        4,
                                        rep,
                                        False,
                                        res=10000)
                MAE_dict[method] = MAE

            if method == 'IDEW2':
                MAE = idew.shuffle_split_IDEW(loc_dictionary,
                                              cvar_dictionary,
                                              shapefile,
                                              file_path_elev,
                                              elev_array,
                                              idx_list,
                                              2,
                                              rep,
                                              res=10000)
                MAE_dict[method] = MAE

            if method == 'IDEW3':
                MAE = idew.shuffle_split_IDEW(loc_dictionary,
                                              cvar_dictionary,
                                              shapefile,
                                              file_path_elev,
                                              elev_array,
                                              idx_list,
                                              3,
                                              rep,
                                              res=10000)
                MAE_dict[method] = MAE

            if method == 'IDEW4':
                MAE = idew.shuffle_split_IDEW(loc_dictionary,
                                              cvar_dictionary,
                                              shapefile,
                                              file_path_elev,
                                              elev_array,
                                              idx_list,
                                              4,
                                              rep,
                                              res=10000)
                MAE_dict[method] = MAE

            if method == 'TPS':
                MAE = tps.shuffle_split_tps(loc_dictionary,
                                            cvar_dictionary,
                                            shapefile,
                                            10,
                                            res=10000)
                MAE_dict[method] = MAE

            if method == 'RF':
                MAE = rf.shuffle_split_rf(loc_dictionary,
                                          cvar_dictionary,
                                          shapefile,
                                          file_path_elev,
                                          elev_array,
                                          idx_list,
                                          10,
                                          res=10000)
                MAE_dict[method] = MAE

            if method == 'GPR':
                MAE = gpr.shuffle_split_gpr(loc_dictionary,
                                            cvar_dictionary,
                                            shapefile,
                                            file_path_elev,
                                            elev_array,
                                            idx_list,
                                            kernels[var_name],
                                            10,
                                            res=10000)
                MAE_dict[method] = MAE

    best_method = min(MAE_dict, key=MAE_dict.get)
    print('The best method for %s is: %s' % (var_name, best_method))
    if method == 'IDW2':
        choix_surf, maxmin = idw.IDW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            2,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDW3':
        choix_surf, maxmin = idw.IDW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            3,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDW4':
        choix_surf, maxmin = idw.IDW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            4,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDEW2':
        choix_surf, maxmin, elev_array = idew.IDEW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            file_path_elev,
            idx_list,
            2,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDEW3':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary,
                                                   input_date,
                                                   'Variable',
                                                   shapefile,
                                                   False,
                                                   file_path_elev,
                                                   idx_list,
                                                   3,
                                                   False,
                                                   res=10000)

    if method == 'IDEW4':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary,
                                                   input_date,
                                                   'Variable',
                                                   shapefile,
                                                   False,
                                                   file_path_elev,
                                                   idx_list,
                                                   4,
                                                   False,
                                                   res=10000)

    if method == 'TPS':
        choix_surf, maxmin = tps.TPS(loc_dictionary,
                                     cvar_dictionary,
                                     input_date,
                                     'Variable',
                                     shapefile,
                                     False,
                                     phi_input,
                                     False,
                                     calc_phi,
                                     res=10000)

    if method == 'RF':
        choix_surf, maxmin = rf.random_forest_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\
                                                           file_path_elev,idx_list,False,res=10000)

    if method == 'GPR':
        choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\
                    file_path_elev,idx_list,False,kernels[var_name],0,False,False,res=10000)

    return best_method, choix_surf, maxmin
def stack_and_average(year1, year2, file_path_daily, file_path_hourly,
                      shapefile, file_path_elev, idx_list, method):
    '''Get the fire season duration for every year in between the two input years
    and average them. Output the average array.

    Parameters
    ----------

    year1 : int
        first year taken into account
    year2 : int
        last year taken into account
    file_path_daily : string
        path to the daily weather csv files from Environment & Climate Change Canada
    file_path_hourly : string
        path to the hourly feather files
    shapefile : string
        path to the study area shapefile
    file_path_elev : string
        path to the elevation lookup file
    idx_list : list
        column index of elevation information in the lookup file
    method : string
        type of interpolation to use to create the yearly arrays, one of: 'IDW2', 'IDW3', 'IDW4', 'TPSS', 'RF'

    Returns
    ----------
    ndarray
        - average of each pixel of all the years considered in array format 
    '''
    list_of_arrays = []
    for year in range(int(year1), int(year2) + 1):
        print('Processing...' + str(year))
        days_dict, latlon_station = fwi.start_date_calendar_csv(
            file_path_daily, str(year))
        end_dict, latlon_station2 = fwi.end_date_calendar_csv(
            file_path_daily, str(year), 'oct')
        if year >= 2020:
            hourly_dict, latlon_stationH = fwi.start_date_add_hourly(
                file_path_hourly, str(year))
            hourly_end, latlon_stationE = fwi.end_date_add_hourly(
                file_path_hourly, str(year))

            days_dict = combine_stations(days_dict, hourly_dict)
            latlon_station = combine_stations(latlon_station, latlon_stationH)

            end_dict = combine_stations(end_dict, hourly_end)
            latlon_station2 = combine_stations(latlon_station2,
                                               latlon_stationE)

        if method == 'IDW2':

            start_surface, maxmin = idw.IDW(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, 2, True)
            end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, 2, True)

        elif method == 'IDW3':

            start_surface, maxmin = idw.IDW(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, 3, True)
            end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, 3, True)

        elif method == 'IDW4':

            start_surface, maxmin = idw.IDW(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, 4, True)
            end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, 4, True)

        elif method == 'TPSS':
            num_stationsS = int(len(days_dict.keys()))
            phi_inputS = int(num_stations) - (math.sqrt(2 * num_stations))
            num_stationsE = int(len(end_dict.keys()))
            phi_inputE = int(num_stations) - (math.sqrt(2 * num_stations))
            start_surface, maxmin = tps.TPS(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, phi_inputS, True, True)
            end_surface, maxmin = tps.TPS(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, phi_inputE,
                                          True, True)

        elif method == 'RF':
            start_surface, maxmin = rf.random_forest_interpolator(
                latlon_station, days_dict, str(year), 'Start', shapefile,
                False, file_path_elev, idx_list, True)
            end_surface, maxmin = rf.random_forest_interpolator(
                latlon_station2, end_dict, str(year), 'End', shapefile, False,
                file_path_elev, idx_list, True)

        else:
            print(
                'Either that method does not exist or there is no support for it. You can use IDW2-4, TPSS, or RF'
            )

        dur_array = calc_season_duration(start_surface, end_surface, year)
        list_of_arrays.append(dur_array)
    voxels = np.dstack(list_of_arrays)  #stack arrays based on depth
    averaged_voxels = np.array([[np.mean(x) for x in group]
                                for group in voxels])
    return averaged_voxels