Пример #1
0
def main(argv=None):
    start_time = time.time()
    # add and parse arguments
    args = _parse_args(argv)
    dataset = os.path.expanduser(args.dataset)
    # Read k, tw_width, tw_step and cls_rank_iterator of the insights experiment to use the same settings
    exp_ins_settings = pdp.get_exp_ins_settings(args.pdpfile)
    k = exp_ins_settings.k
    tw_width = exp_ins_settings.tw_width
    tw_step = exp_ins_settings.tw_step
    cls_rank_iterator = exp_ins_settings.cls_rank_iterator
    cls_rank_iterator_kwargs = exp_ins_settings.cls_rank_iterator_kwargs
    reuse = REUSE_CLASSES[args.reuse]["cls"]
    stop_w_soln = args.wsoln
    # Generate file names
    out_file = args.outfile if args.outfile else classify.gen_classifier_output_f_path(dataset, args.pdpfile, tw_width, tw_step, k, args.confthold, args.z, args.testsize, cls_rank_iterator, reuse, stop_w_soln)
    log_file = args.logfile if args.logfile else common.gen_log_file(out_file)
    # Set logger
    logger = common.initialize_logger(console_level=args.logc, output_dir=common.APP.FOLDER.LOG, log_file=log_file, file_level=args.logf)
    logger.info("Classification experiment script launched with args: {}".format(str(vars(args))))
    # Create an interruption experiment engine
    engine = _create_exp_classifier_engine(dataset, args.pdpfile, tw_width, tw_step, k, args.confthold, args.z, args.testsize,
                                           reuse, stop_w_soln, cls_rank_iterator=cls_rank_iterator, cls_rank_iterator_kwargs=cls_rank_iterator_kwargs)

    # engine.run -> collected data
    processed_data = engine.run()
    # create a result obj
    output = classify.ExpClassifierOutput(
        settings=classify.ExpClassifierSettings(dataset, args.pdpfile, tw_width, tw_step, k, args.confthold, args.z, args.testsize, reuse, stop_w_soln,
                                                cls_rank_iterator=cls_rank_iterator, cls_rank_iterator_kwargs=cls_rank_iterator_kwargs),
        data=processed_data)
    logger.info("Classification experiment finished in {}.".format(datetime.timedelta(seconds=(time.time() - start_time))))
    # save the output
    output.save(out_file=out_file)
Пример #2
0
def main(argv=None):
    start_time = time.time()
    # add and parse arguments
    args = _parse_args(argv)
    dataset = os.path.expanduser(args.dataset)
    # Generate file names
    out_file = args.outfile if args.outfile else exploit.gen_exploit_output_f_path(
        dataset, args.width, args.step, args.k, args.testsize)
    log_file = args.logfile if args.logfile else common.gen_log_file(out_file)
    # Set logger
    logger = common.initialize_logger(console_level=args.logc,
                                      output_dir=common.APP.FOLDER.LOG,
                                      log_file=log_file,
                                      file_level=args.logf)
    logger.info(
        "Exploit Candidates Iteration experiment script launched with args: {}"
        .format(str(vars(args))))
    # create the experiment engine
    engine = _create_exp_exploit_engine(dataset, args.width, args.step, args.k,
                                        args.testsize)
    # engine.run -> experiment data
    exploit_data = engine.run()
    # create a result obj
    output = exploit.ExpExploitOutput(settings=exploit.ExpExploitSettings(
        dataset, args.width, args.step, args.k, args.testsize,
        engine.cb.size()),
                                      data=exploit_data)
    logger.info(
        "Exploit Candidates Iteration experiment finished in {}.".format(
            datetime.timedelta(seconds=(time.time() - start_time))))
    # save the output
    output.save(out_file=out_file)
Пример #3
0
def main(argv=None):
    start_time = time.time()
    # add and parse arguments
    args = _parse_args(argv)
    pdp_file = args.outfile
    log_file = args.logfile
    # Generate file names
    if not pdp_file:
        pdp_file = pdp.gen_pdp_f_path(args.expfile, args.calcstep, args.qstep)
    if not log_file:
        log_file = common.gen_log_file(pdp_file)
    # Set logger
    logger = common.initialize_logger(console_level=args.logc,
                                      output_dir=common.APP.FOLDER.LOG,
                                      log_file=log_file,
                                      file_level=args.logf)
    logger.info("PDP generation script launched with args: {}".format(
        str(vars(args))))
    # build the PDP
    pdp_data, calc_step_actual = pdp.build_pdp(os.path.expanduser(
        args.expfile),
                                               calc_step=args.calcstep,
                                               q_step=args.qstep)
    # create an output object
    pdp_output = pdp.PDPOutput(settings=pdp.PDPSettings(
        experiment=args.expfile,
        calc_step_arg=args.calcstep,
        calc_step=calc_step_actual,
        q_step=args.qstep),
                               data=pdp_data)
    logger.info("PDP generation finished in {}.".format(
        datetime.timedelta(seconds=(time.time() - start_time))))
    # save the pdp
    pdp_output.save(out_file=pdp_file)
Пример #4
0
def main(argv=None):
    start_time = time.time()
    # add and parse arguments
    args = _parse_args(argv)
    dataset = os.path.expanduser(args.dataset)
    cls_rank_iterator = run_common.RANK_ITER_OPTIONS[args.iter]["cls"]
    cls_rank_iterator_kwargs = args.kwargsiter
    # Generate file names
    out_file = args.outfile if args.outfile else insights.gen_insights_output_f_path(
        dataset, args.width, args.step, args.k, args.testsize,
        cls_rank_iterator)
    log_file = args.logfile if args.logfile else common.gen_log_file(out_file)
    # Set logger
    logger = common.initialize_logger(console_level=args.logc,
                                      output_dir=common.APP.FOLDER.LOG,
                                      log_file=log_file,
                                      file_level=args.logf)
    logger.info("Insights experiment script launched with args: {}".format(
        str(vars(args))))
    # create the experiment engine
    engine = _create_exp_insights_engine(
        dataset,
        args.width,
        args.step,
        args.k,
        args.testsize,
        n_exp=args.runs,
        cls_rank_iterator=cls_rank_iterator,
        cls_rank_iterator_kwargs=cls_rank_iterator_kwargs)
    # engine.run -> processed data
    processed_insights = engine.run()
    # create a result obj
    output = insights.ExpInsightsOutput(settings=insights.ExpInsightsSettings(
        dataset, args.width, args.step, args.k, args.testsize,
        engine.cb.size(), cls_rank_iterator, cls_rank_iterator_kwargs),
                                        data=processed_insights)
    logger.info("Insights experiment finished in {}.".format(
        datetime.timedelta(seconds=(time.time() - start_time))))
    # save the output
    output.save(out_file=out_file)
Пример #5
0
def main(argv=None):
    start_time = time.time()
    # add and parse arguments
    args = _parse_args(argv)

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"

    bins = args.bins
    bin_width = 1. / bins
    decimals_bin_width = len(str(bin_width).split('.')[1])
    if decimals_bin_width > 2:
        decimals_bin_width = 2
    cols_bin = [
        "{0:.{1}f}".format(i, decimals_bin_width)
        for i in list(np.arange(0, 1. + bin_width, bin_width)[1:])
    ]

    # Create output dataframe
    df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
                             cols_bin)

    # Get datasets
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        datasets = [os.path.join(fpath, f) for f in files_]
    else:
        datasets = [os.path.expanduser(e) for e in args.datasets]
    test_size = args.testsize if 0. < args.testsize < 1. else int(
        args.testsize)
    update = args.update
    dec_digits = args.dec
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x

    # Set logger
    save_fn = "similarity_distr_(x{n})_w_{w}_s_{s}_t_{t}{u}".format(
        n=len(datasets),
        w=str(args.width),
        s=str(args.step),
        t=str(test_size),
        u="_u_{}".format(update) if update is not None else "")
    save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                              "{}.tex".format(save_fn))
    log_file = common.gen_log_file(save_fpath)
    logger = common.initialize_logger(console_level="INFO",
                                      output_dir=common.APP.FOLDER.LOG,
                                      log_file=log_file,
                                      file_level="INFO")
    logger.info(
        "Case base similarity distribution script launched with args: {}".
        format(str(vars(args))))

    # Start computing distributions for CBs
    for dataset in datasets:
        dataset_name = os.path.expanduser(dataset)
        dataset_name = os.path.splitext(os.path.basename(dataset_name))[0]
        logger.info("Dataset: {}".format(dataset_name))
        # get the similarity for the dataset
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
        for tw_width in args.width:
            logger.info(".. TW width: {}".format(tw_width))
            for tw_step in args.step:
                logger.info(".. TW step: {}".format(tw_step))
                # read the dataset -> cb
                cb = ts.gen_cb(dataset=dataset,
                               tw_width=tw_width,
                               tw_step=tw_step)
                distr_hist = sim_hist(cb, similarity, bins, test_size, update)
                # logger.info(".... Distribution:\n{}".format(
                #     pd.DataFrame([distr_hist * 100], columns=cols_bin).to_string(index=False,
                #                                                                  float_format=float_formatter)))
                dict_distr = {
                    LBL_DATASET: dataset_name,
                    LBL_FWIDTH: run_common.time_window_width_str(tw_width),
                    LBL_FSTEP: tw_step
                }
                dict_distr.update(dict(zip(cols_bin, distr_hist * 100.)))
                # Add the distribution to the output dataframe
                df_output = df_output.append(dict_distr, ignore_index=True)

    # Export the df to LaTeX
    if len(df_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_output = df_output.sort_index()
        df_output.to_latex(buf=save_fpath,
                           float_format=float_formatter,
                           escape=True,
                           multirow=True,
                           index=True)
        logger.info(
            "Similarity distribution saved as LaTeX table into '{}'.".format(
                save_fpath))
    else:
        logger.info("No similarity distribution data could be calculated.")
    logger.info("Script finished in {}.".format(
        datetime.timedelta(seconds=(time.time() - start_time))))