def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "experiments", nargs="*", type=str, help="Exploit Candidates experiment result file path(s)") parser.add_argument( "-p", "--fpath", type=str, help= "Full path of the folder containing Exploit Candidates experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument( "--rtrim", nargs="*", type=str, help= "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error( "Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] dec_digits = args.dec rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format( x, dec_digits) if isinstance(x, (int, float)) else x LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" # Create output dataframe df_gains_output = pd.DataFrame( columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) # Populate summary dictionaries for exp in experiments: print("Exp: {}".format(exp)) # Load 'exploit candidates' experiment output exp_exploit_output = common.load_obj( exp) # type: exploit.ExpExploitOutput # Get the dataset name dataset_name = common.file_name_wo_ext( exp_exploit_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) # Get the average gains in experiments exp_exploit_data = exp_exploit_output.data exp_exploit_data = exp_exploit_data.loc[ exp_exploit_data["update"] != 0] # Filter the initial problem df_avg_gains = exp_exploit_data[["gain", "iterator" ]].groupby("iterator").mean() # Avg gain dict dict_avg_gains = { LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str( exp_exploit_output.settings.tw_width), LBL_FSTEP: exp_exploit_output.settings.tw_step } # avg_gain_keys = [str(c) if c is not None else "-" for c in df_avg_gains.index.tolist()] avg_gain_keys = df_avg_gains.index.tolist() avg_gain_values = df_avg_gains["gain"].values # Add the results to the output dataframe dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values))) df_gains_output = df_gains_output.append(dict_avg_gains, ignore_index=True) # Export the df to LaTeX if len(df_gains_output) > 0: # Create a multiindex for a sorted (and prettier) output df_gains_output = df_gains_output.set_index( [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_gains_output = df_gains_output.sort_index() # df_gains_output = df_gains_output.round(dec_digits) save_fn = "gain_exploit_(x{})".format(len(df_gains_output)) save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) df_gains_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True) print( "Avg Gain for TopDown vs ExploitCandidates Rank Iterations saved as LaTeX table into '{}'." .format(save_fpath)) else: print("No average gain results could be calculated.")
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("experiments", nargs="*", type=str, help="Classification experiment result file path(s)") parser.add_argument( "-p", "--fpath", type=str, help= "Full path of the folder containing classification experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8], help="Confidence thresholds") parser.add_argument("-z", "--z", type=float, default=-1., help="z factor of the efficiency measure") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument( "--knni", type=int, help= "Zero-based index of the kNN for which the average 'confidence performance' is calculated." " 'None' to calculate for all kNNs." " Normally, it makes sense either for the last or all NNs.") parser.add_argument( "--wsoln", choices=[0, 1], type=int, default=0, help="1 to display hits upon interruption w/ exact solution") parser.add_argument( "--rtrim", nargs="*", type=str, help= "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error( "Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] conf_thold = args.confthold arg_z = args.z dec_digits = args.dec knn_i = args.knni rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits) float_formatter_hit = lambda x: "{0:.{1}f}".format( x * 100, dec_digits) if isinstance(x, (int, float)) else x wsoln = args.wsoln wsoln_tag = "_ws_{}".format(wsoln) if wsoln else "" LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" LBL_STOP_W_SOLN = "w\u2215Soln" # Create output dataframe df_hits_output = pd.DataFrame( columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] + ([LBL_STOP_W_SOLN] if wsoln else []) + [ gain_intrpt_classify.conf_col_label(c, float_formatter, arg_z) for c in conf_thold if c != 1 ]) # Exclude conf=1.00 for hits, makes no sense for uninterruption # Populate summary dictionary for exp in experiments: print("Exp: {}".format(exp)) # Load classification experiment exp_output = common.load_obj( exp ) # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput] exp_z = exp_output.settings.z if arg_z != exp_z: print( "Ignored. The 'z' command line argument ({}) is different from " "the experiment 'z' setting ({}).".format( helper.is_float_int(arg_z), helper.is_float_int(exp_z))) else: dataset_name = common.file_name_wo_ext(exp_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) # Get the average hits in the classification experiment if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) # Avg hit dict df_avg_hits = alk.exp.classify.get_avg_hit_for_classify_exp( exp, conf_thold, wsoln, lblwsoln=LBL_STOP_W_SOLN) dict_avg_hits = { LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width), LBL_FSTEP: exp_output.settings.tw_step } avg_hits_keys = [ gain_intrpt_classify.conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_hits.index.tolist() ] avg_hits_values = df_avg_hits["hit"].values dict_avg_hits.update(dict(zip(avg_hits_keys, avg_hits_values))) # Add the results to the output dataframe df_hits_output = df_hits_output.append(dict_avg_hits, ignore_index=True) # Export the df_hits to LaTeX if len(df_hits_output) > 0: # Create a multiindex for a sorted (and prettier) output df_hits_output = df_hits_output.set_index( [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_hits_output = df_hits_output.sort_index() save_fn_hit = "soln_hit_(x{})_[{}]_sd_{}_ki_{}{}".format( len(df_hits_output), "_".join([str(c) for c in conf_thold]), helper.is_float_int(arg_z), knn_i if knn_i is not None else "All", wsoln_tag) save_fpath_hit = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn_hit)) df_hits_output.to_latex(buf=save_fpath_hit, float_format=float_formatter_hit, escape=False, multirow=True, index=True) print("Avg Solution Hit %s saved as LaTeX table into '{}'.".format( save_fpath_hit)) else: print("No average solution hit results could be calculated.")
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("experiments", nargs="*", type=str, help="Interruption experiment result file path(s)") parser.add_argument( "-p", "--fpath", type=str, help= "Full path of the folder containing interruption experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument( "--rtrim", nargs="*", type=str, help= "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error( "Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] dec_digits = args.dec rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format( x, dec_digits) if isinstance(x, (int, float)) else x int_formatter = lambda x: '{:,}'.format(x) LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" LBL_UPDATES = "Updates" LBL_CB_SIZE = "\u007CCB\u007C" LBL_GAIN = "Gain" # Create output dataframe df_gains_output = pd.DataFrame(columns=[ LBL_DATASET, LBL_FWIDTH, LBL_FSTEP, LBL_UPDATES, LBL_CB_SIZE, LBL_GAIN ]) # Populate summary dictionaries for exp in experiments: print("Exp: {}".format(exp)) # Load insights experiment exp_insights_output = common.load_obj( exp) # type: insights.ExpInsightsOutput dataset_name = common.file_name_wo_ext( exp_insights_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) # Add the results to the output dataframe if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) # Get the average gain for the insights experiment avg_gain = insights.get_avg_gain_for_exp(exp) n_updates = np.max([u[0] for u in exp_insights_output.data.gain]) # Avg gain dict dict_avg_gains = { LBL_DATASET: dataset_name, LBL_FWIDTH: time_window_width_str(exp_insights_output.settings.tw_width), LBL_FSTEP: exp_insights_output.settings.tw_step, LBL_UPDATES: n_updates + 1, LBL_CB_SIZE: exp_insights_output.settings.cb_size, LBL_GAIN: avg_gain } # Add the results to the output dataframe df_gains_output = df_gains_output.append(dict_avg_gains, ignore_index=True) # Export the df to LaTeX if len(df_gains_output) > 0: # Create a multiindex for a sorted (and prettier) output df_gains_output = df_gains_output.set_index( [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_gains_output = df_gains_output.sort_index() # df_gains_output = df_gains_output.round(dec_digits) save_fn = "gain_insights_(x{})".format(len(df_gains_output)) save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) df_gains_output.to_latex(buf=save_fpath, formatters={ LBL_UPDATES: int_formatter, LBL_CB_SIZE: int_formatter }, float_format=float_formatter, escape=False, multirow=True, index=True) print("Avg Gain results saved as LaTeX table into '{}'.".format( save_fpath)) else: print("No average gain results could be calculated.")
def main(argv=None): # Configure argument parser parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("experiments", nargs="*", type=str, help="Interruption/classification experiment result file path(s)") parser.add_argument("-p", "--fpath", type=str, help="Full path of the folder containing experiment result files. " "Optionally used instead of the 'experiments' argument.") parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8], help="Confidence thresholds") parser.add_argument("-z", "--z", type=float, default=-1., help="z factor of the efficiency measure") parser.add_argument("-d", "--dec", type=int, default=2, help="Decimal digits to be used in gain percentage") parser.add_argument("--knni", type=int, help="Zero-based index of the kNN for which the average 'confidence performance' is calculated." " 'None' to calculate for all kNNs." " Normally, it makes sense either for the last or all NNs.") parser.add_argument("--clsfy", choices=[0, 1], type=int, default=0, help="0 for interruption experiments;" " 1 for classification experiments to display gains also upon interruption w/ exact solution.") parser.add_argument("--rtrim", nargs="*", type=str, help="Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST") # Parse arguments args = parser.parse_args(argv) # Required params check if args.experiments and args.fpath is not None: parser.error("'experiments' and 'fpath' arguments may not coexist!") if not args.experiments and args.fpath is None: parser.error("Either 'experiments' or 'fpath' argument should be given!") # Get experiment files if args.fpath: fpath = os.path.expanduser(args.fpath) files_ = common.listdir_non_hidden(fpath) experiments = [os.path.join(fpath, f) for f in files_] else: experiments = [os.path.expanduser(e) for e in args.experiments] conf_thold = args.confthold arg_z = args.z dec_digits = args.dec knn_i = args.knni rtrim = args.rtrim float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits) clsfy = args.clsfy exp_tag = "{}".format("classify" if clsfy else "intrpt") LBL_DATASET = "Dataset" LBL_FWIDTH = "Width" LBL_FSTEP = "Step" LBL_CONF_PERF = "Effcy" LBL_CONF_PERF_STD = "\u03C3" LBL_STOP_W_SOLN = "w\u2215Soln" # Create output dataframe df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] + ([LBL_STOP_W_SOLN] if clsfy else []) + [conf_col_label(c, float_formatter, arg_z) for c in conf_thold] + [LBL_CONF_PERF, LBL_CONF_PERF_STD]) # Populate summary dictionary for exp in experiments: print("Exp: {}".format(exp)) # Load interruption/classification experiment exp_output = common.load_obj(exp) # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput] exp_z = exp_output.settings.z if arg_z != exp_z: print("Ignored. The 'z' command line argument ({}) is different from " "the experiment 'z' setting ({}).".format(helper.is_float_int(arg_z), helper.is_float_int(exp_z))) else: dataset_name = common.file_name_wo_ext(exp_output.settings.dataset) print("...Dataset: {}".format(dataset_name)) # Get the average gains in the interruption/classification experiment # Average gain is calculated for the last kNN member for confthold experiments and # for stopwsoln=1 for the interruption w/ exact solution experiments (if wsoln=True) if not clsfy: df_avg_gains = intrpt.get_avg_gain_for_intrpt_exp(exp, conf_thold) else: df_avg_gains = classify.get_avg_gain_for_classify_exp(exp, conf_thold, wsoln=True, lblwsoln=LBL_STOP_W_SOLN) # Add the results to the output dataframe if rtrim: dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True) dict_avg_gains = {LBL_DATASET: dataset_name, LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width), LBL_FSTEP: exp_output.settings.tw_step} avg_gain_keys = [conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_gains.index.tolist()] avg_gain_values = df_avg_gains["gain"].values dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values))) # Add average efficiency and its std deviation columns too if not clsfy: avg_conf_perf, avg_conf_perf_std = intrpt.get_avg_effcy_for_intrpt_exp(exp, knn_i=knn_i) else: avg_conf_perf, avg_conf_perf_std = classify.get_avg_effcy_for_classify_exp(exp, knn_i=knn_i) dict_avg_gains.update({LBL_CONF_PERF: avg_conf_perf, LBL_CONF_PERF_STD: avg_conf_perf_std}) df_output = df_output.append(dict_avg_gains, ignore_index=True) # Export the df to LaTeX if len(df_output) > 0: # Swap wsoln and 1.0 columns if clsfy: unint_col = conf_col_label(1., float_formatter, arg_z) gain_cols = df_output.columns.tolist() if unint_col in gain_cols: unint_col_idx = gain_cols.index(unint_col) wsoln_col_idx = gain_cols.index(LBL_STOP_W_SOLN) gain_cols[unint_col_idx], gain_cols[wsoln_col_idx] = gain_cols[wsoln_col_idx], gain_cols[unint_col_idx] df_output = df_output[gain_cols] # Create a multiindex for a sorted (and prettier) output df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP]) df_output = df_output.sort_index() # df_output = df_output.round(dec_digits) save_fn = "gain_{}_(x{})_[{}]_sd_{}_ki_{}".format(exp_tag, len(df_output), "_".join([str(c) for c in conf_thold]), helper.is_float_int(arg_z), knn_i if knn_i is not None else "All") save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn)) df_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True) print_msg_header = "Avg Gain for Interruptions at Confidence Thresholds{}".format(" and with Exact Solutions" if clsfy else "") print("{} saved as LaTeX table into '{}'.".format(print_msg_header, save_fpath)) else: print("No average gain results could be calculated.")