Пример #1
0
def test_misfit_calculation_lowess():
    profile = create_profile_for_calculating_misfit()
    profile_manual = create_manual_profile_for_calculating_misfit()
    misfit_df = misfit_calculation(profile,
                                   profile_manual,
                                   method="lowess",
                                   bin_max=11,
                                   bin_min=5,
                                   bin_step=2,
                                   theta_T_max=40,
                                   theta_T_min=30,
                                   theta_T_step=2,
                                   phi_T=5,
                                   plot=False,
                                   save_file=False)

    misfit_df['misfit_height'] = misfit_df['misfit_height'].fillna(0).astype(
        int)
    misfit_df['misfit_width'] = misfit_df['misfit_width'].fillna(0).astype(int)
    misfit_df['misfit_slope'] = misfit_df['misfit_slope'].fillna(0).astype(int)
    misfit_df['misfit_total'] = misfit_df['misfit_total'].fillna(0).astype(int)

    assert misfit_df.loc[0].to_list() == [1, 'lowess', 1, 5, 4, 30, 5, 10]
def find_best(df, df_manual, methods, bin_max, bin_min,
              bin_step, theta_t_max,
              theta_t_min, theta_t_step, phi_t, criteria_weights,
              plot):
    """
    Finds the best set of parameters to use in the algorithm
    :param: df <DataFrame> The DataFrame with profile data in
    :param: df_manual <DataFrame> The DataFrame with the manual results in to compare to
    :param: methods <list> A list of methods to be used
    :param: bin_max <int> The maximum smoothing bin value to use
    :param: bin_min <int> The minimum smoothing bin value to use
    :param: bin_step <int> The smoothing bin step value to use
    :param: theta_t_max <int> The maximum theta value to use
    :param: theta_t_min <int> The minimum theta value to use
    :param: theta_t_step <int> The theta step value to use
    :param: phi_t <int> The phi value to use
    :param: plot <boolean> Whether to make plots or not
    :param: criteria_weights <list> The list of weights to apply to the results
    """

    for method in methods:
        print(f"Performing misfit analysis for method: {method}")
        for n, profile in tqdm(enumerate(df_manual.profile)):

            profile_number = df_manual['profile'][n]
            profile = df[df['profile'] == profile_number].reset_index()
            profile_manual = df_manual[df_manual['profile'] == profile_number]

            df_tmp = misfit_calculation(profile, profile_manual,
                                        method, bin_max, bin_min,
                                        bin_step, theta_t_max,
                                        theta_t_min, theta_t_step, phi_t,
                                        plot = False, save_file = False)

            try:
                df_out = df_out.append(df_tmp)
            except NameError:
                df_out = df_tmp

    df_out_average = df_out.groupby(['method', 'theta_t', 'bin']).mean().reset_index().drop('profile', axis = 1)

    if plot:
        for method in methods:
            df_out_average_method = df_out_average[df_out_average['method'] == method]
            for value in ['height', 'width', 'slope']:
                df_out_average_plot = df_out_average_method[[f'misfit_{value}', 'bin', 'theta_t']].groupby(
                    ['theta_t', 'bin']).mean().unstack(
                    level = 0)
                df_out_average_plot.columns = df_out_average_plot.columns.droplevel()
                ax = sns.heatmap(df_out_average_plot, linewidths = 0.1, annot = False, cbar = True,
                                 cbar_kws = {'label': f'misfit_{value}'}, vmin = 0, cmap = "viridis")
                ax.set_title(f'{method}')

                plt.show()

    df_out_average['misfit_total'] = \
        df_out_average['misfit_height'] * criteria_weights[0] + \
        df_out_average['misfit_width'] * criteria_weights[1] + \
        df_out_average['misfit_slope'] * criteria_weights[2]

    for criteria in ['total', 'height', 'width', 'slope']:
        best_value = min(df_out_average[f'misfit_{criteria}'])
        the_best = df_out_average[df_out_average[f'misfit_{criteria}'] == best_value]
        print(f'\nThe best parameters using criteria {criteria} are:')
        print(the_best.to_string())

        try:
            df_out_best = df_out_best.append(the_best)
        except NameError:
            df_out_best = the_best

    save_to_csv(df_out_average, name = 'all')
    save_to_csv(df_out_best, name = 'best')