def run_experiment(attributes, disjunct_degree, n, task_groups, tasks_per_group,
                   noise, data_rnd_seed, n_learning_sets, rnd_seed,
                   results_path, base_learners, measures, learners,
                   test=True, unpickle=False, visualize=True,
                   weighting="all_equal", error_margin="std",
                   error_bars=True, separate_figs=True, cfg_logger=True):
    """Run the binarization experiment according to the given parameters and
    save the results where indicated.
    
    Parameters
    ----------
    attributes : int
        Number of attributes/variables of the generated Boolean functions.
    disjunct_degree : int
        The expected number of attributes/variables in a disjunct.
    n : int
        The number of examples for each task to generate.
    task_groups : int
        The number of task groups to generate. Each task group shares the
        same Boolean function.
    tasks_per_group : int
        The number of tasks (with their corresponding data) to generate for
        each task group.
    noise : float
        The proportion of examples of each task that have their class values
        determined randomly.
    data_rnd_seed : int
        The random seed with which to initialize a private Random object.
    n_learning_sets : int
        The number of different learning sets to create for each task.
    rnd_seed : int
        The random seed to be used for the BinarizationExperimentMTLTester
        object.
    results_path : string
        The path where to store the results (if it doesn't exist, it will be
        created).
    base_learners : OrderedDict
        An ordered dictionary with items of the form (name, learner), where
        name is a string representing the base learner's name and learner is a
        scikit-learn estimator object.
    measures : list
        A list of strings representing measure's names.
    learners : OrderedDict
        An ordered dictionary with items of the form (name, learner),
        where name is a string representing the learner's name and
        learner is a merging learning algorithm (e.g. ERM, NoMerging, ...).
    test : boolean 
        Indicates whether to perform tests on the MTL problem (with the given
        base_learners, measures and learners).
    unpickle : boolean
        Indicates whether to search for previously computed testing results and
        include them in the MTL problem.
    visualize : boolean
        Indicates whether to visualize the results of the current tasks (for
        each combination of base learners, measures and learners of the MTL
        problem).
    weighting : string
        Indicator of the type of weighting to use when computing the overall
        results.
    error_margin : string
        The measure to use for error margins when computing the overall results.
    error_bars : boolean
        Indicates whether to plot the error bars when visualizing the results.
    separate_figs : boolean
        Indicates whether to plot the results of each base learner as a separate
        figure or as one combined figure
    cfg_logger : boolean
        Indicates whether to re-configure the global logger object.
    
    """
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if cfg_logger:
        log_file = os.path.join(results_path,
                            "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
        configure_logger(logger, console_level=logging.INFO, file_name=log_file)
    pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl")
    # generate boolean data with complete test sets
    funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl")
    tasks_data, tasks_complete_test_sets = \
        synthetic_data.generate_boolean_data_with_complete_test_sets(
            attributes, disjunct_degree, n, task_groups, tasks_per_group,
            noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets,
            funcs_pickle_path=funcs_pickle_path)
    # create a MTL tester with tasks' data
    mtlt = BinarizationExperimentMTLTester(tasks_data, rnd_seed, repeats=1,
            preprepared_test_sets=tasks_complete_test_sets)
    # test all combinations of learners and base learners (compute the testing
    # results with the defined measures) and save the results if test == True
    if test:
        mtlt.test_tasks(learners, base_learners, measures, results_path,
                        save_orange_data=True)
        mtlt.pickle_test_results(pickle_path_fmt)
    # find previously computed testing results and check if they were computed
    # using the same data tables and cross-validation indices if
    # unpickle == True
    if unpickle:
        mtlt.find_pickled_test_results(pickle_path_fmt)
        if not mtlt.check_test_results_compatible():
            raise ValueError("Test results for different base learners are not "
                             "compatible.")
    # visualize the results of the current tasks for each combination of base
    # learners, learners and measures that are in the MTL problem; in addition,
    # visualize the dendrograms showing merging history of ERM
    if visualize:
        if not mtlt.contains_test_results():
            raise ValueError("The MTLTester object doesn't contain any testing"
                             " results.")
        bls = mtlt.get_base_learners()
        ls = mtlt.get_learners()
        ms = mtlt.get_measures()
        mtlt.visualize_results(bls, ls, ms, results_path,
            {"ForcedTree": "blue", "Tree": "green", "ERM": "red"},
            error_bars=error_bars, separate_figs=separate_figs)
        mtlt.visualize_dendrograms(bls, results_path)
        mtlt.compute_overall_results(bls, ls, ms, results_path,
                weighting=weighting, error_margin=error_margin)
        convert_svgs_to_pdfs(results_path)
        build_and_crop_tex_files(results_path, r"-tikz.tex$")
        combine_dendrograms_and_trees(bls, results_path)
        build_and_crop_tex_files(results_path, r"^comparison.*.tex$",
                                 crop=False)
示例#2
0
def _test_config_44(data_rnd_seed_values,
                    noise_values,
                    results_dir="results/synthetic_data"):
    """Testing configuration for the changing amount of noise experiment.
    
    Parameters
    ----------
    data_rnd_seed_values : list
        Values of data_rnd_seed for which to repeat the experiment.
    noise_values : list
        Values of the amount of noise for which to repeat the experiment.
    results_dir : str
        Directory in which to put the chaning_noise directory with the relevant
        experiment results.
    
    """
    # parameters of the synthetic Boolean MTL problem
    attributes = 12
    disjunct_degree = 6
    n = 50
    task_groups = 5
    tasks_per_group = 5
    noise = 0.0
    n_learning_sets = 10
    # parameters of the MTL problem tester
    rnd_seed = 51

    # dynamic parameters of the synthetic Boolean MTL problem

    for data_rnd_seed in data_rnd_seed_values:
        results_path_fmt = os.path.join(
            path_prefix, results_dir,
            "changing_noise/bool_func-a{}d{}n{}g{}tg{}"
            "nse{{}}rs{}nls{}-seed{}-complete_test".format(
                attributes, disjunct_degree, n, task_groups, tasks_per_group,
                data_rnd_seed, n_learning_sets, rnd_seed))
        if "run" in mode:
            for noise in noise_values:
                # prepare directories and loggers
                results_path = results_path_fmt.format(noise)
                if not os.path.exists(results_path):
                    os.makedirs(results_path)
                log_file = os.path.join(results_path, "run-{}.log".\
                            format(time.strftime("%Y%m%d_%H%M%S")))
                configure_logger(logger,
                                 console_level=logging.INFO,
                                 file_name=log_file)
                log_base_learner_info(logger, base_learners_bool)
                # generate boolean data with complete test sets
                funcs_pickle_path = os.path.join(results_path,
                                                 "boolean_funcs.pkl")
                tasks_data, tasks_complete_test_sets = \
                    synthetic_data.generate_boolean_data_with_complete_test_sets(
                        attributes, disjunct_degree, n, task_groups,
                        tasks_per_group, noise,
                        random_seed=data_rnd_seed,
                        n_learning_sets=n_learning_sets,
                        funcs_pickle_path=funcs_pickle_path)
                # test the generated MTL problem
                test_tasks(tasks_data,
                           results_path,
                           base_learners_bool,
                           measures_clas,
                           learners,
                           "pre-prepared_test",
                           rnd_seed=rnd_seed,
                           test=test,
                           unpickle=unpickle,
                           visualize=visualize,
                           preprepared_test_sets=tasks_complete_test_sets,
                           separate_figs=True,
                           cfg_logger=False)
        if "combine" in mode:
            combine_experiment_results(
                results_path_fmt,
                noise_values,
                (results_path_fmt.format(pprint_iter(noise_values)) +
                 "-{}-{{}}.pdf".format(error_measure)),
                n_learning_sets,
                error_measure=error_measure,
                title="Avg. results for tasks",
                xlabel="% of noise")
示例#3
0
 data_rnd_seed = 11
 # parameters of the MTL problem tester
 rnd_seed = 51
 repeats = 3
 test_prop = 0.5
 results_path = os.path.join(path_prefix, "results/synthetic_data/"
                 "boolean_func-a{}d{}n{}g{}tg{}-seed{}-repeats{}".\
                 format(attributes, disjunct_degree, n, task_groups,
                        tasks_per_group, rnd_seed, repeats))
 if not os.path.exists(results_path):
     os.makedirs(results_path)
 log_file = os.path.join(
     results_path,
     "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
 configure_logger(logger,
                  console_level=logging.INFO,
                  file_name=log_file)
 tasks_data = synthetic_data.generate_boolean_data(
     attributes,
     disjunct_degree,
     n,
     task_groups,
     tasks_per_group,
     noise,
     random_seed=data_rnd_seed)
 test_tasks(tasks_data,
            results_path,
            base_learners_bool,
            measures_clas,
            learners,
            "train_test_split",
def run_experiment(attributes,
                   disjunct_degree,
                   n,
                   task_groups,
                   tasks_per_group,
                   noise,
                   data_rnd_seed,
                   n_learning_sets,
                   rnd_seed,
                   results_path,
                   base_learners,
                   measures,
                   learners,
                   test=True,
                   unpickle=False,
                   visualize=True,
                   weighting="all_equal",
                   error_margin="std",
                   error_bars=True,
                   separate_figs=True,
                   cfg_logger=True):
    """Run the binarization experiment according to the given parameters and
    save the results where indicated.
    
    Parameters
    ----------
    attributes : int
        Number of attributes/variables of the generated Boolean functions.
    disjunct_degree : int
        The expected number of attributes/variables in a disjunct.
    n : int
        The number of examples for each task to generate.
    task_groups : int
        The number of task groups to generate. Each task group shares the
        same Boolean function.
    tasks_per_group : int
        The number of tasks (with their corresponding data) to generate for
        each task group.
    noise : float
        The proportion of examples of each task that have their class values
        determined randomly.
    data_rnd_seed : int
        The random seed with which to initialize a private Random object.
    n_learning_sets : int
        The number of different learning sets to create for each task.
    rnd_seed : int
        The random seed to be used for the BinarizationExperimentMTLTester
        object.
    results_path : string
        The path where to store the results (if it doesn't exist, it will be
        created).
    base_learners : OrderedDict
        An ordered dictionary with items of the form (name, learner), where
        name is a string representing the base learner's name and learner is a
        scikit-learn estimator object.
    measures : list
        A list of strings representing measure's names.
    learners : OrderedDict
        An ordered dictionary with items of the form (name, learner),
        where name is a string representing the learner's name and
        learner is a merging learning algorithm (e.g. ERM, NoMerging, ...).
    test : boolean 
        Indicates whether to perform tests on the MTL problem (with the given
        base_learners, measures and learners).
    unpickle : boolean
        Indicates whether to search for previously computed testing results and
        include them in the MTL problem.
    visualize : boolean
        Indicates whether to visualize the results of the current tasks (for
        each combination of base learners, measures and learners of the MTL
        problem).
    weighting : string
        Indicator of the type of weighting to use when computing the overall
        results.
    error_margin : string
        The measure to use for error margins when computing the overall results.
    error_bars : boolean
        Indicates whether to plot the error bars when visualizing the results.
    separate_figs : boolean
        Indicates whether to plot the results of each base learner as a separate
        figure or as one combined figure
    cfg_logger : boolean
        Indicates whether to re-configure the global logger object.
    
    """
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if cfg_logger:
        log_file = os.path.join(
            results_path, "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
        configure_logger(logger,
                         console_level=logging.INFO,
                         file_name=log_file)
    pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl")
    # generate boolean data with complete test sets
    funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl")
    tasks_data, tasks_complete_test_sets = \
        synthetic_data.generate_boolean_data_with_complete_test_sets(
            attributes, disjunct_degree, n, task_groups, tasks_per_group,
            noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets,
            funcs_pickle_path=funcs_pickle_path)
    # create a MTL tester with tasks' data
    mtlt = BinarizationExperimentMTLTester(
        tasks_data,
        rnd_seed,
        repeats=1,
        preprepared_test_sets=tasks_complete_test_sets)
    # test all combinations of learners and base learners (compute the testing
    # results with the defined measures) and save the results if test == True
    if test:
        mtlt.test_tasks(learners,
                        base_learners,
                        measures,
                        results_path,
                        save_orange_data=True)
        mtlt.pickle_test_results(pickle_path_fmt)
    # find previously computed testing results and check if they were computed
    # using the same data tables and cross-validation indices if
    # unpickle == True
    if unpickle:
        mtlt.find_pickled_test_results(pickle_path_fmt)
        if not mtlt.check_test_results_compatible():
            raise ValueError(
                "Test results for different base learners are not "
                "compatible.")
    # visualize the results of the current tasks for each combination of base
    # learners, learners and measures that are in the MTL problem; in addition,
    # visualize the dendrograms showing merging history of ERM
    if visualize:
        if not mtlt.contains_test_results():
            raise ValueError("The MTLTester object doesn't contain any testing"
                             " results.")
        bls = mtlt.get_base_learners()
        ls = mtlt.get_learners()
        ms = mtlt.get_measures()
        mtlt.visualize_results(bls,
                               ls,
                               ms,
                               results_path, {
                                   "ForcedTree": "blue",
                                   "Tree": "green",
                                   "ERM": "red"
                               },
                               error_bars=error_bars,
                               separate_figs=separate_figs)
        mtlt.visualize_dendrograms(bls, results_path)
        mtlt.compute_overall_results(bls,
                                     ls,
                                     ms,
                                     results_path,
                                     weighting=weighting,
                                     error_margin=error_margin)
        convert_svgs_to_pdfs(results_path)
        build_and_crop_tex_files(results_path, r"-tikz.tex$")
        combine_dendrograms_and_trees(bls, results_path)
        build_and_crop_tex_files(results_path,
                                 r"^comparison.*.tex$",
                                 crop=False)
示例#5
0
def _test_config_44(data_rnd_seed_values, noise_values,
                    results_dir="results/synthetic_data"):
    """Testing configuration for the changing amount of noise experiment.
    
    Parameters
    ----------
    data_rnd_seed_values : list
        Values of data_rnd_seed for which to repeat the experiment.
    noise_values : list
        Values of the amount of noise for which to repeat the experiment.
    results_dir : str
        Directory in which to put the chaning_noise directory with the relevant
        experiment results.
    
    """
    # parameters of the synthetic Boolean MTL problem
    attributes = 12
    disjunct_degree = 6
    n = 50
    task_groups = 5
    tasks_per_group = 5
    noise = 0.0
    n_learning_sets = 10
    # parameters of the MTL problem tester
    rnd_seed = 51
    
    # dynamic parameters of the synthetic Boolean MTL problem
    
    
    for data_rnd_seed in data_rnd_seed_values:
        results_path_fmt = os.path.join(path_prefix, results_dir,
            "changing_noise/bool_func-a{}d{}n{}g{}tg{}"
            "nse{{}}rs{}nls{}-seed{}-complete_test".format(attributes,
            disjunct_degree, n, task_groups, tasks_per_group,
            data_rnd_seed, n_learning_sets, rnd_seed))
        if "run" in mode:
            for noise in noise_values:
                # prepare directories and loggers
                results_path = results_path_fmt.format(noise)
                if not os.path.exists(results_path):
                    os.makedirs(results_path)
                log_file = os.path.join(results_path, "run-{}.log".\
                            format(time.strftime("%Y%m%d_%H%M%S")))
                configure_logger(logger, console_level=logging.INFO,
                                 file_name=log_file)
                log_base_learner_info(logger, base_learners_bool)
                # generate boolean data with complete test sets
                funcs_pickle_path = os.path.join(results_path,
                                                 "boolean_funcs.pkl")
                tasks_data, tasks_complete_test_sets = \
                    synthetic_data.generate_boolean_data_with_complete_test_sets(
                        attributes, disjunct_degree, n, task_groups,
                        tasks_per_group, noise,
                        random_seed=data_rnd_seed,
                        n_learning_sets=n_learning_sets,
                        funcs_pickle_path=funcs_pickle_path)
                # test the generated MTL problem
                test_tasks(tasks_data, results_path, base_learners_bool,
                           measures_clas, learners, "pre-prepared_test",
                           rnd_seed=rnd_seed, test=test,
                           unpickle=unpickle, visualize=visualize,
                           preprepared_test_sets=tasks_complete_test_sets,
                           separate_figs=True, cfg_logger=False)
        if "combine" in mode:
            combine_experiment_results(results_path_fmt,
                noise_values,
                (results_path_fmt.format(pprint_iter(noise_values)) +
                 "-{}-{{}}.pdf".format(error_measure)),
                n_learning_sets, error_measure=error_measure,
                title="Avg. results for tasks", xlabel="% of noise")
示例#6
0
     tasks_per_group = 5
     noise = 0.0
     data_rnd_seed = 11
     # parameters of the MTL problem tester
     rnd_seed = 51
     repeats = 3
     test_prop=0.5
     results_path = os.path.join(path_prefix, "results/synthetic_data/"
                     "boolean_func-a{}d{}n{}g{}tg{}-seed{}-repeats{}".\
                     format(attributes, disjunct_degree, n, task_groups,
                            tasks_per_group, rnd_seed, repeats))
     if not os.path.exists(results_path):
         os.makedirs(results_path)
     log_file = os.path.join(results_path,
                     "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
     configure_logger(logger, console_level=logging.INFO,
                      file_name=log_file)
     tasks_data = synthetic_data.generate_boolean_data(attributes,
                     disjunct_degree, n, task_groups, tasks_per_group,
                     noise, random_seed=data_rnd_seed)
     test_tasks(tasks_data, results_path, base_learners_bool,
                measures_clas, learners, "train_test_split",
                rnd_seed=rnd_seed,
                test=test, unpickle=unpickle, visualize=visualize,
                test_prop=test_prop, repeats=repeats, cfg_logger=False,
                separate_figs=True)
 
 if test_config == 21:
     # parameters of the synthetic Boolean MTL problem
     attributes = 8
     disjunct_degree = 4
     n = 100