Exemplo n.º 1
0
def _create_exp_exploit_engine(dataset,
                               tw_width,
                               tw_step,
                               k,
                               test_size,
                               similarity=None,
                               gen_profile=None):
    """ Creates an 'exploit candidates' iteration experiment engine.

    Returns:
        exploit.ExpExploitEngine:

    """
    # read the dataset -> cb
    cb = ts.gen_cb(dataset=dataset,
                   tw_width=tw_width,
                   tw_step=tw_step,
                   gen_profile=gen_profile)
    # create an experiment engine
    if similarity is None:
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
    engine = exploit.ExpExploitEngine(cb=cb,
                                      k=k,
                                      similarity=similarity,
                                      test_size=test_size)
    return engine
Exemplo n.º 2
0
def _create_exp_jump_engine(dataset,
                            tw_width,
                            tw_step,
                            k,
                            test_size,
                            jump_at_lst,
                            similarity=None,
                            gen_profile=None):
    """ Creates an jumping iteration experiment engine.

    Returns:
        jump.ExpJumpEngine:

    """
    # read the dataset -> cb
    cb = ts.gen_cb(dataset=dataset,
                   tw_width=tw_width,
                   tw_step=tw_step,
                   gen_profile=gen_profile)
    # create an experiment engine
    if similarity is None:
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
    engine = jump.ExpJumpEngine(cb=cb,
                                k=k,
                                similarity=similarity,
                                jump_at_lst=jump_at_lst,
                                test_size=test_size)
    return engine
Exemplo n.º 3
0
def _create_exp_insights_engine(dataset,
                                tw_width,
                                tw_step,
                                k,
                                test_size,
                                similarity=None,
                                cls_rank_iterator=rank.TopDownIterator,
                                cls_rank_iterator_kwargs={},
                                n_exp=1,
                                gen_profile=None):
    """ Creates an insights experiment engine.

    Returns:
        insights.ExpInsightsEngine:

    """
    # read the dataset -> cb
    cb = ts.gen_cb(dataset=dataset,
                   tw_width=tw_width,
                   tw_step=tw_step,
                   gen_profile=gen_profile)
    # create an experiment engine
    if similarity is None:
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
    engine = insights.ExpInsightsEngine(
        cb=cb,
        k=k,
        similarity=similarity,
        cls_rank_iterator=cls_rank_iterator,
        cls_rank_iterator_kwargs=cls_rank_iterator_kwargs,
        test_size=test_size,
        n_exp=n_exp)
    return engine
Exemplo n.º 4
0
def _create_exp_classifier_engine(dataset, pdp_file, tw_width, tw_step, k, conf_tholds, z, test_size, reuse, stop_w_soln,
                                  similarity=None, cls_rank_iterator=rank.TopDownIterator, cls_rank_iterator_kwargs={}, gen_profile=None):
    """ Creates a classification experiment engine.

    Returns:
        classify.ExpClassifierEngine:

    """
    # read the dataset -> cb
    cb = ts.gen_cb(dataset=dataset, tw_width=tw_width, tw_step=tw_step, gen_profile=gen_profile)
    # create an experiment engine
    if similarity is None:
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
    engine = classify.ExpClassifierEngine(pdp_file=pdp_file, cb=cb, k=k, similarity=similarity, conf_tholds=conf_tholds,
                                          reuse=reuse, stop_w_soln=stop_w_soln, cls_rank_iterator=cls_rank_iterator,
                                          cls_rank_iterator_kwargs=cls_rank_iterator_kwargs, z=z, test_size=test_size)
    return engine
Exemplo n.º 5
0
def _create_exp_intrpt_engine(dataset,
                              pdp_file,
                              tw_width,
                              tw_step,
                              k,
                              conf_tholds,
                              z,
                              test_size,
                              similarity=None,
                              cls_rank_iterator=rank.TopDownIterator,
                              cls_rank_iterator_kwargs={},
                              gen_profile=None):
    """ Creates an interruption experiment engine.

    Returns:
        intrpt.ExpIntrptEngine:

    """
    # read the dataset -> cb
    cb = ts.gen_cb(dataset=dataset,
                   tw_width=tw_width,
                   tw_step=tw_step,
                   gen_profile=gen_profile)
    # create an experiment engine
    if similarity is None:
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
    engine = intrpt.ExpIntrptEngine(
        pdp_file=pdp_file,
        cb=cb,
        k=k,
        similarity=similarity,
        conf_tholds=conf_tholds,
        cls_rank_iterator=cls_rank_iterator,
        cls_rank_iterator_kwargs=cls_rank_iterator_kwargs,
        z=z,
        test_size=test_size)
    return engine
Exemplo n.º 6
0
def main(argv=None):
    start_time = time.time()
    # add and parse arguments
    args = _parse_args(argv)

    LBL_DATASET = "Dataset"
    LBL_FWIDTH = "Width"
    LBL_FSTEP = "Step"

    bins = args.bins
    bin_width = 1. / bins
    decimals_bin_width = len(str(bin_width).split('.')[1])
    if decimals_bin_width > 2:
        decimals_bin_width = 2
    cols_bin = [
        "{0:.{1}f}".format(i, decimals_bin_width)
        for i in list(np.arange(0, 1. + bin_width, bin_width)[1:])
    ]

    # Create output dataframe
    df_output = pd.DataFrame(columns=[LBL_DATASET, LBL_FWIDTH, LBL_FSTEP] +
                             cols_bin)

    # Get datasets
    if args.fpath:
        fpath = os.path.expanduser(args.fpath)
        files_ = common.listdir_non_hidden(fpath)
        datasets = [os.path.join(fpath, f) for f in files_]
    else:
        datasets = [os.path.expanduser(e) for e in args.datasets]
    test_size = args.testsize if 0. < args.testsize < 1. else int(
        args.testsize)
    update = args.update
    dec_digits = args.dec
    float_formatter = lambda x: "{0:.{1}f}".format(
        x, dec_digits) if isinstance(x, (int, float)) else x

    # Set logger
    save_fn = "similarity_distr_(x{n})_w_{w}_s_{s}_t_{t}{u}".format(
        n=len(datasets),
        w=str(args.width),
        s=str(args.step),
        t=str(test_size),
        u="_u_{}".format(update) if update is not None else "")
    save_fpath = os.path.join(common.APP.FOLDER.FIGURE,
                              "{}.tex".format(save_fn))
    log_file = common.gen_log_file(save_fpath)
    logger = common.initialize_logger(console_level="INFO",
                                      output_dir=common.APP.FOLDER.LOG,
                                      log_file=log_file,
                                      file_level="INFO")
    logger.info(
        "Case base similarity distribution script launched with args: {}".
        format(str(vars(args))))

    # Start computing distributions for CBs
    for dataset in datasets:
        dataset_name = os.path.expanduser(dataset)
        dataset_name = os.path.splitext(os.path.basename(dataset_name))[0]
        logger.info("Dataset: {}".format(dataset_name))
        # get the similarity for the dataset
        similarity = ts.euclidean_similarity_ts_dataset(dataset)
        for tw_width in args.width:
            logger.info(".. TW width: {}".format(tw_width))
            for tw_step in args.step:
                logger.info(".. TW step: {}".format(tw_step))
                # read the dataset -> cb
                cb = ts.gen_cb(dataset=dataset,
                               tw_width=tw_width,
                               tw_step=tw_step)
                distr_hist = sim_hist(cb, similarity, bins, test_size, update)
                # logger.info(".... Distribution:\n{}".format(
                #     pd.DataFrame([distr_hist * 100], columns=cols_bin).to_string(index=False,
                #                                                                  float_format=float_formatter)))
                dict_distr = {
                    LBL_DATASET: dataset_name,
                    LBL_FWIDTH: run_common.time_window_width_str(tw_width),
                    LBL_FSTEP: tw_step
                }
                dict_distr.update(dict(zip(cols_bin, distr_hist * 100.)))
                # Add the distribution to the output dataframe
                df_output = df_output.append(dict_distr, ignore_index=True)

    # Export the df to LaTeX
    if len(df_output) > 0:
        #  Create a multiindex for a sorted (and prettier) output
        df_output = df_output.set_index([LBL_DATASET, LBL_FWIDTH, LBL_FSTEP])
        df_output = df_output.sort_index()
        df_output.to_latex(buf=save_fpath,
                           float_format=float_formatter,
                           escape=True,
                           multirow=True,
                           index=True)
        logger.info(
            "Similarity distribution saved as LaTeX table into '{}'.".format(
                save_fpath))
    else:
        logger.info("No similarity distribution data could be calculated.")
    logger.info("Script finished in {}.".format(
        datetime.timedelta(seconds=(time.time() - start_time))))
Exemplo n.º 7
0
def cb_sequences(dataset, file_format="pdf", width=0, step=1, seqs=[0], upd_ind=None,
                 full=True, with_title=True, signature=True, **kwargs):
    """Plots given sequences of a given time series CB.

    Args:
        dataset (str): The sequence dataset file to be converted to CB.
        file_format (str): One of the file extensions supported by the active backend.
            Most backends support png, pdf, ps, eps and svg.
            if is None, the plot is displayed and not saved.
        width (int): if > 0, width of the moving time window; otherwise expanding windows approach is applied.
        step (int): number of steps (in terms of data points in TS) taken by the window at each update.
                This can also be seen as the number of data points changed in each update.
        seqs (list or int): if is a `list`, then the items are the indexes of the sequences in the CB to be plotted;
            if is an `int`, `seq_ind` number of randomly selected sequences are plotted
        upd_ind (int): the update (i.e. the upd_ind^th problem profile) of the sequences to be plotted;
            if None, *last* update of the sequences are plotted
        full (bool): if True, plots full sequence in light grey color under the given `update`
        with_title (bool): if True, shows the figure title.
        signature (bool): If True, name of the plotting function is also displayed.
        **kwargs: to passed over to the `matplotlib.pyplot.plot` function
    Returns:
        list : indices of the plotted sequences.

    """
    COLORS_ = plt.rcParams['axes.prop_cycle'].by_key()['color']
    cb = ts.gen_cb(dataset=dataset, tw_width=width, tw_step=step)
    max_updates = max([len(seq.data) for seq in cb.sequences()])  # Data points
    max_profiles = max([seq.n_profiles() for seq in cb.sequences()])  # Cases, max_updates=max_profiles if step=1
    Y_max, Y_min = ts.get_max_min(dataset)
    X = list(range(max_updates))
    xticks_ = list(np.arange(0, max_updates, step=math.ceil(max_updates / 10)))
    if [max_updates - 1] not in xticks_:
        xticks_.append(max_updates - 1)
    if upd_ind is None:
        upd_ind = max_profiles - 1

    if not isinstance(seqs, list):
        seq_ind = random.sample(range(len(cb.sequences())), seqs)
    else:
        seq_ind = seqs
    for ix, ind in enumerate(seq_ind):
        case_features = cb[ind].profile(idx=upd_ind)
        start_, end_ = cb[ind]._get_range_profile(upd_ind, len(cb[ind].data), width, step)
        pad_right = start_
        pad_left = max_updates - end_
        Y = [None] * pad_right + list(case_features) + [None] * pad_left  # do padding to the sides too
        if full and upd_ind < max_updates:
            full_seq = cb[ind].data
            plt.plot(X, list(full_seq), label=None, color=COLORS_[2 + ix], alpha=0.2)  # Hack not to repeat gain plot colors
        plt.plot(X, Y, color=COLORS_[2 + ix], label="Seq {}".format(ind), alpha=0.8 if ix > 0 else 1, **kwargs)
    plt.xticks(xticks_)
    plt.xlim(left=min(xticks_))
    plt.ylim(Y_min, Y_max)
    # Show both gridlines
    ax = plt.gca()
    ax.grid(True, linestyle=":", linewidth=.5)
    plt.ylabel("value", fontsize="large")
    plt.xlabel("data point", fontsize="large")
    # plt.margins(x=0)  # ! This removes the None Y values from the plot, which we do NOT want.
    plt.legend(fontsize="medium", ncol=2)
    fn_wo_ext = common.file_name_wo_ext(dataset)
    if with_title:
        title_ = "Case Base Sequences\n"
        title_ = "{}CB: {}\n".format(title_, fn_wo_ext)
        title_ = "{}Time-window width:{}, step:{},  Update:{}".format(title_, run_common.time_window_width_str(width), step, upd_ind)
        plt.title(title_ + "\n")
    if signature:
        plt_common.sign_plot(plt, cb_sequences.__name__)
    plt.tight_layout()
    save_fn = "CB_SEQUENCES_{}_w_{}_s_{}_seqs_{}_u_{}{}".format(fn_wo_ext,
                                                                width,
                                                                step,
                                                                str(seqs),
                                                                upd_ind,
                                                                "_t" if with_title else "")
    if file_format:
        save_fpath = os.path.join(common.APP.FOLDER.FIGURE, "{}.{}".format(save_fn, file_format))
        plt.savefig(save_fpath, dpi=150, bbox_inches="tight")
        print("CB sequences figure saved into '{}'.".format(save_fpath))
    else:
        # Update the title of the plot window
        plt.gcf().canvas.set_window_title(save_fn)
        plt.show()
    plt.close()
    return seq_ind