def test_misfit_calculation_lowess(): profile = create_profile_for_calculating_misfit() profile_manual = create_manual_profile_for_calculating_misfit() misfit_df = misfit_calculation(profile, profile_manual, method="lowess", bin_max=11, bin_min=5, bin_step=2, theta_T_max=40, theta_T_min=30, theta_T_step=2, phi_T=5, plot=False, save_file=False) misfit_df['misfit_height'] = misfit_df['misfit_height'].fillna(0).astype( int) misfit_df['misfit_width'] = misfit_df['misfit_width'].fillna(0).astype(int) misfit_df['misfit_slope'] = misfit_df['misfit_slope'].fillna(0).astype(int) misfit_df['misfit_total'] = misfit_df['misfit_total'].fillna(0).astype(int) assert misfit_df.loc[0].to_list() == [1, 'lowess', 1, 5, 4, 30, 5, 10]
def find_best(df, df_manual, methods, bin_max, bin_min, bin_step, theta_t_max, theta_t_min, theta_t_step, phi_t, criteria_weights, plot): """ Finds the best set of parameters to use in the algorithm :param: df <DataFrame> The DataFrame with profile data in :param: df_manual <DataFrame> The DataFrame with the manual results in to compare to :param: methods <list> A list of methods to be used :param: bin_max <int> The maximum smoothing bin value to use :param: bin_min <int> The minimum smoothing bin value to use :param: bin_step <int> The smoothing bin step value to use :param: theta_t_max <int> The maximum theta value to use :param: theta_t_min <int> The minimum theta value to use :param: theta_t_step <int> The theta step value to use :param: phi_t <int> The phi value to use :param: plot <boolean> Whether to make plots or not :param: criteria_weights <list> The list of weights to apply to the results """ for method in methods: print(f"Performing misfit analysis for method: {method}") for n, profile in tqdm(enumerate(df_manual.profile)): profile_number = df_manual['profile'][n] profile = df[df['profile'] == profile_number].reset_index() profile_manual = df_manual[df_manual['profile'] == profile_number] df_tmp = misfit_calculation(profile, profile_manual, method, bin_max, bin_min, bin_step, theta_t_max, theta_t_min, theta_t_step, phi_t, plot = False, save_file = False) try: df_out = df_out.append(df_tmp) except NameError: df_out = df_tmp df_out_average = df_out.groupby(['method', 'theta_t', 'bin']).mean().reset_index().drop('profile', axis = 1) if plot: for method in methods: df_out_average_method = df_out_average[df_out_average['method'] == method] for value in ['height', 'width', 'slope']: df_out_average_plot = df_out_average_method[[f'misfit_{value}', 'bin', 'theta_t']].groupby( ['theta_t', 'bin']).mean().unstack( level = 0) df_out_average_plot.columns = df_out_average_plot.columns.droplevel() ax = sns.heatmap(df_out_average_plot, linewidths = 0.1, annot = False, cbar = True, cbar_kws = {'label': f'misfit_{value}'}, vmin = 0, cmap = "viridis") ax.set_title(f'{method}') plt.show() df_out_average['misfit_total'] = \ df_out_average['misfit_height'] * criteria_weights[0] + \ df_out_average['misfit_width'] * criteria_weights[1] + \ df_out_average['misfit_slope'] * criteria_weights[2] for criteria in ['total', 'height', 'width', 'slope']: best_value = min(df_out_average[f'misfit_{criteria}']) the_best = df_out_average[df_out_average[f'misfit_{criteria}'] == best_value] print(f'\nThe best parameters using criteria {criteria} are:') print(the_best.to_string()) try: df_out_best = df_out_best.append(the_best) except NameError: df_out_best = the_best save_to_csv(df_out_average, name = 'all') save_to_csv(df_out_best, name = 'best')