Пример #1
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        "experiments",
        nargs="*",
        type=str,
        help="Exploit Candidates experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing Exploit Candidates experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    dec_digits = args.dec
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"

    # Create output dataframe
    df_gains_output = pd.DataFrame(
        columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])

    # Populate summary dictionaries
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load 'exploit candidates' experiment output
        exp_exploit_output = common.load_obj(
            exp)  # type: exploit.ExpExploitOutput
        # Get the dataset name
        dataset_name = common.file_name_wo_ext(
            exp_exploit_output.settings.dataset)
        print("...Dataset: {}".format(dataset_name))
        if rtrim:
            dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                 rtrim,
                                                 latex_it=True)
        # Get the average gains in experiments
        exp_exploit_data = exp_exploit_output.data
        exp_exploit_data = exp_exploit_data.loc[
            exp_exploit_data["update"] != 0]  # Filter the initial problem
        df_avg_gains = exp_exploit_data[["gain", "iterator"
                                         ]].groupby("iterator").mean()
        # Avg gain dict
        dict_avg_gains = {
            LBL_DATASET:
            dataset_name,
            LBL_FWIDTH:
            run_common.time_window_width_str(
                exp_exploit_output.settings.tw_width),
            LBL_FSTEP:
            exp_exploit_output.settings.tw_step
        }
        # avg_gain_keys = [str(c) if c is not None else "-" for c in df_avg_gains.index.tolist()]
        avg_gain_keys = df_avg_gains.index.tolist()
        avg_gain_values = df_avg_gains["gain"].values
        # Add the results to the output dataframe
        dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values)))
        df_gains_output = df_gains_output.append(dict_avg_gains,
                                                 ignore_index=True)

    # Export the df to LaTeX
    if len(df_gains_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_gains_output = df_gains_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_gains_output = df_gains_output.sort_index()
        # df_gains_output = df_gains_output.round(dec_digits)
        save_fn = "gain_exploit_(x{})".format(len(df_gains_output))
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        df_gains_output.to_latex(buf=save_fpath,
                                 float_format=float_formatter,
                                 escape=False,
                                 multirow=True,
                                 index=True)
        print(
            "Avg Gain for TopDown vs ExploitCandidates Rank Iterations saved as LaTeX table into '{}'."
            .format(save_fpath))
    else:
        print("No average gain results could be calculated.")
Пример #2
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments",
                        nargs="*",
                        type=str,
                        help="Classification experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing classification experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-c",
                        "--confthold",
                        nargs="+",
                        type=float,
                        default=[1., .98, .95, .9, .8],
                        help="Confidence thresholds")
    parser.add_argument("-z",
                        "--z",
                        type=float,
                        default=-1.,
                        help="z factor of the efficiency measure")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--knni",
        type=int,
        help=
        "Zero-based index of the kNN for which the average 'confidence performance' is calculated."
        " 'None' to calculate for all kNNs."
        " Normally, it makes sense either for the last or all NNs.")
    parser.add_argument(
        "--wsoln",
        choices=[0, 1],
        type=int,
        default=0,
        help="1 to display hits upon interruption w/ exact solution")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    conf_thold = args.confthold
    arg_z = args.z
    dec_digits = args.dec
    knn_i = args.knni
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits)
    float_formatter_hit = lambda x: "{0:.{1}f}".format(
        x * 100, dec_digits) if isinstance(x, (int, float)) else x
    wsoln = args.wsoln
    wsoln_tag = "_ws_{}".format(wsoln) if wsoln else ""

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_STOP_W_SOLN = "w\u2215Soln"

    # Create output dataframe
    df_hits_output = pd.DataFrame(
        columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
        ([LBL_STOP_W_SOLN] if wsoln else []) + [
            gain_intrpt_classify.conf_col_label(c, float_formatter, arg_z)
            for c in conf_thold if c != 1
        ])  # Exclude conf=1.00 for hits, makes no sense for uninterruption

    # Populate summary dictionary
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load classification experiment
        exp_output = common.load_obj(
            exp
        )  # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput]
        exp_z = exp_output.settings.z
        if arg_z != exp_z:
            print(
                "Ignored. The 'z' command line argument ({}) is different from "
                "the experiment 'z' setting ({}).".format(
                    helper.is_float_int(arg_z), helper.is_float_int(exp_z)))
        else:
            dataset_name = common.file_name_wo_ext(exp_output.settings.dataset)
            print("...Dataset: {}".format(dataset_name))
            # Get the average hits in the classification experiment
            if rtrim:
                dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                     rtrim,
                                                     latex_it=True)
            # Avg hit dict
            df_avg_hits = alk.exp.classify.get_avg_hit_for_classify_exp(
                exp, conf_thold, wsoln, lblwsoln=LBL_STOP_W_SOLN)
            dict_avg_hits = {
                LBL_DATASET:
                dataset_name,
                LBL_FWIDTH:
                run_common.time_window_width_str(exp_output.settings.tw_width),
                LBL_FSTEP:
                exp_output.settings.tw_step
            }
            avg_hits_keys = [
                gain_intrpt_classify.conf_col_label(c, float_formatter, exp_z)
                if isinstance(c, float) else c
                for c in df_avg_hits.index.tolist()
            ]
            avg_hits_values = df_avg_hits["hit"].values
            dict_avg_hits.update(dict(zip(avg_hits_keys, avg_hits_values)))
            # Add the results to the output dataframe
            df_hits_output = df_hits_output.append(dict_avg_hits,
                                                   ignore_index=True)

    # Export the df_hits to LaTeX
    if len(df_hits_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_hits_output = df_hits_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_hits_output = df_hits_output.sort_index()
        save_fn_hit = "soln_hit_(x{})_[{}]_sd_{}_ki_{}{}".format(
            len(df_hits_output), "_".join([str(c) for c in conf_thold]),
            helper.is_float_int(arg_z), knn_i if knn_i is not None else "All",
            wsoln_tag)
        save_fpath_hit = os.path.join(common.APP.FOLDER.FIGURE,
                                      "{}.tex".format(save_fn_hit))
        df_hits_output.to_latex(buf=save_fpath_hit,
                                float_format=float_formatter_hit,
                                escape=False,
                                multirow=True,
                                index=True)
        print("Avg Solution Hit %s saved as LaTeX table into '{}'.".format(
            save_fpath_hit))
    else:
        print("No average solution hit results could be calculated.")
Пример #3
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments",
                        nargs="*",
                        type=str,
                        help="Interruption experiment result file path(s)")
    parser.add_argument(
        "-p",
        "--fpath",
        type=str,
        help=
        "Full path of the folder containing interruption experiment result files. "
        "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-d",
                        "--dec",
                        type=int,
                        default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument(
        "--rtrim",
        nargs="*",
        type=str,
        help=
        "Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")

    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error(
            "Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    dec_digits = args.dec
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x
    int_formatter = lambda x: '{:,}'.format(x)

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_UPDATES = "Updates"
    LBL_CB_SIZE = "\u007CCB\u007C"
    LBL_GAIN = "Gain"

    # Create output dataframe
    df_gains_output = pd.DataFrame(columns=[
        LBL_DATASET, LBL_FWIDTH, LBL_FSTEP, LBL_UPDATES, LBL_CB_SIZE, LBL_GAIN
    ])

    # Populate summary dictionaries
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load insights experiment
        exp_insights_output = common.load_obj(
            exp)  # type: insights.ExpInsightsOutput
        dataset_name = common.file_name_wo_ext(
            exp_insights_output.settings.dataset)
        print("...Dataset: {}".format(dataset_name))
        # Add the results to the output dataframe
        if rtrim:
            dataset_name = ts.rtrim_dataset_name(dataset_name,
                                                 rtrim,
                                                 latex_it=True)
        # Get the average gain for the insights experiment
        avg_gain = insights.get_avg_gain_for_exp(exp)
        n_updates = np.max([u[0] for u in exp_insights_output.data.gain])
        # Avg gain dict
        dict_avg_gains = {
            LBL_DATASET:
            dataset_name,
            LBL_FWIDTH:
            time_window_width_str(exp_insights_output.settings.tw_width),
            LBL_FSTEP:
            exp_insights_output.settings.tw_step,
            LBL_UPDATES:
            n_updates + 1,
            LBL_CB_SIZE:
            exp_insights_output.settings.cb_size,
            LBL_GAIN:
            avg_gain
        }
        # Add the results to the output dataframe
        df_gains_output = df_gains_output.append(dict_avg_gains,
                                                 ignore_index=True)

    # Export the df to LaTeX
    if len(df_gains_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_gains_output = df_gains_output.set_index(
            [LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_gains_output = df_gains_output.sort_index()
        # df_gains_output = df_gains_output.round(dec_digits)
        save_fn = "gain_insights_(x{})".format(len(df_gains_output))
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                                  "{}.tex".format(save_fn))
        df_gains_output.to_latex(buf=save_fpath,
                                 formatters={
                                     LBL_UPDATES: int_formatter,
                                     LBL_CB_SIZE: int_formatter
                                 },
                                 float_format=float_formatter,
                                 escape=False,
                                 multirow=True,
                                 index=True)
        print("Avg Gain results saved as LaTeX table into '{}'.".format(
            save_fpath))
    else:
        print("No average gain results could be calculated.")
Пример #4
0
def main(argv=None):
    # Configure argument parser
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("experiments", nargs="*", type=str,
                        help="Interruption/classification experiment result file path(s)")
    parser.add_argument("-p", "--fpath", type=str,
                        help="Full path of the folder containing experiment result files. "
                             "Optionally used instead of the 'experiments' argument.")
    parser.add_argument("-c", "--confthold", nargs="+", type=float, default=[1., .98, .95, .9, .8],
                        help="Confidence thresholds")
    parser.add_argument("-z", "--z", type=float, default=-1.,
                        help="z factor of the efficiency measure")
    parser.add_argument("-d", "--dec", type=int, default=2,
                        help="Decimal digits to be used in gain percentage")
    parser.add_argument("--knni", type=int,
                        help="Zero-based index of the kNN for which the average 'confidence performance' is calculated."
                             " 'None' to calculate for all kNNs."
                             " Normally, it makes sense either for the last or all NNs.")
    parser.add_argument("--clsfy", choices=[0, 1], type=int, default=0,
                        help="0 for interruption experiments;"
                             " 1 for classification experiments to display gains also upon interruption w/ exact solution.")
    parser.add_argument("--rtrim", nargs="*", type=str,
                       help="Remove given strings at the end of dataset names; e.g. _TRAIN, _TEST")
    # Parse arguments
    args = parser.parse_args(argv)
    # Required params check
    if args.experiments and args.fpath is not None:
        parser.error("'experiments' and 'fpath' arguments may not coexist!")
    if not args.experiments and args.fpath is None:
        parser.error("Either 'experiments' or 'fpath' argument should be given!")
    # Get experiment files
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        experiments = [os.path.join(fpath, f) for f in files_]
    else:
        experiments = [os.path.expanduser(e) for e in args.experiments]
    conf_thold = args.confthold
    arg_z = args.z
    dec_digits = args.dec
    knn_i = args.knni
    rtrim = args.rtrim
    float_formatter = lambda x: "{0:.{1}f}".format(x, dec_digits)
    clsfy = args.clsfy
    exp_tag = "{}".format("classify" if clsfy else "intrpt")

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"
    LBL_CONF_PERF = "Effcy"
    LBL_CONF_PERF_STD = "\u03C3"
    LBL_STOP_W_SOLN = "w\u2215Soln"

    # Create output dataframe
    df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
                                     ([LBL_STOP_W_SOLN] if clsfy else []) +
                                     [conf_col_label(c, float_formatter, arg_z) for c in conf_thold] +
                                     [LBL_CONF_PERF, LBL_CONF_PERF_STD])
    # Populate summary dictionary
    for exp in experiments:
        print("Exp: {}".format(exp))
        # Load interruption/classification experiment
        exp_output = common.load_obj(exp)  # type: Union[intrpt.ExpIntrptOutput, classify.ExpClassifierOutput]
        exp_z = exp_output.settings.z
        if arg_z != exp_z:
            print("Ignored. The 'z' command line argument ({}) is different from "
                  "the experiment 'z' setting ({}).".format(helper.is_float_int(arg_z), helper.is_float_int(exp_z)))
        else:
            dataset_name = common.file_name_wo_ext(exp_output.settings.dataset)
            print("...Dataset: {}".format(dataset_name))
            # Get the average gains in the interruption/classification experiment
            # Average gain is calculated for the last kNN member for confthold experiments and
            # for stopwsoln=1 for the interruption w/ exact solution experiments (if wsoln=True)
            if not clsfy:
                df_avg_gains = intrpt.get_avg_gain_for_intrpt_exp(exp, conf_thold)
            else:
                df_avg_gains = classify.get_avg_gain_for_classify_exp(exp, conf_thold, wsoln=True, lblwsoln=LBL_STOP_W_SOLN)
            # Add the results to the output dataframe
            if rtrim:
                dataset_name = ts.rtrim_dataset_name(dataset_name, rtrim, latex_it=True)
            dict_avg_gains = {LBL_DATASET: dataset_name,
                              LBL_FWIDTH: run_common.time_window_width_str(exp_output.settings.tw_width),
                              LBL_FSTEP: exp_output.settings.tw_step}
            avg_gain_keys = [conf_col_label(c, float_formatter, exp_z) if isinstance(c, float) else c for c in df_avg_gains.index.tolist()]
            avg_gain_values = df_avg_gains["gain"].values
            dict_avg_gains.update(dict(zip(avg_gain_keys, avg_gain_values)))
            # Add average efficiency and its std deviation columns too
            if not clsfy:
                avg_conf_perf, avg_conf_perf_std = intrpt.get_avg_effcy_for_intrpt_exp(exp, knn_i=knn_i)
            else:
                avg_conf_perf, avg_conf_perf_std = classify.get_avg_effcy_for_classify_exp(exp, knn_i=knn_i)
            dict_avg_gains.update({LBL_CONF_PERF: avg_conf_perf, LBL_CONF_PERF_STD: avg_conf_perf_std})
            df_output = df_output.append(dict_avg_gains, ignore_index=True)

    # Export the df to LaTeX
    if len(df_output) > 0:
        # Swap wsoln and 1.0 columns
        if clsfy:
            unint_col = conf_col_label(1., float_formatter, arg_z)
            gain_cols = df_output.columns.tolist()
            if unint_col in gain_cols:
                unint_col_idx = gain_cols.index(unint_col)
                wsoln_col_idx = gain_cols.index(LBL_STOP_W_SOLN)
                gain_cols[unint_col_idx], gain_cols[wsoln_col_idx] = gain_cols[wsoln_col_idx], gain_cols[unint_col_idx]
                df_output = df_output[gain_cols]
        # Create a multiindex for a sorted (and prettier) output
        df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_output = df_output.sort_index()
        # df_output = df_output.round(dec_digits)
        save_fn = "gain_{}_(x{})_[{}]_sd_{}_ki_{}".format(exp_tag,
                                                          len(df_output),
                                                          "_".join([str(c) for c in conf_thold]),
                                                          helper.is_float_int(arg_z),
                                                          knn_i if knn_i is not None else "All")
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.tex".format(save_fn))
        df_output.to_latex(buf=save_fpath, float_format=float_formatter, escape=False, multirow=True, index=True)
        print_msg_header = "Avg Gain for Interruptions at Confidence Thresholds{}".format(" and with Exact Solutions" if clsfy else "")
        print("{} saved as LaTeX table into '{}'.".format(print_msg_header, save_fpath))
    else:
        print("No average gain results could be calculated.")