Exemplo n.º 1
0
def run_master(args):
    NS = hpns.NameServer(run_id=args.run_id,
                         nic_name=args.nic_name,
                         working_directory=args.bohb_root_path)
    ns_host, ns_port = NS.start()

    # Start a background worker for the master node
    if args.optimize_generalist:
        w = AggregateWorker(run_id=args.run_id,
                            host=ns_host,
                            nameserver=ns_host,
                            nameserver_port=ns_port,
                            working_directory=args.bohb_root_path,
                            n_repeat=args.n_repeat,
                            has_repeats_as_budget=args.n_repeat is None,
                            time_budget=args.time_budget,
                            time_budget_approx=args.time_budget_approx,
                            performance_matrix=args.performance_matrix)
    else:
        w = SingleWorker(run_id=args.run_id,
                         host=ns_host,
                         nameserver=ns_host,
                         nameserver_port=ns_port,
                         working_directory=args.bohb_root_path,
                         n_repeat=args.n_repeat,
                         dataset=args.dataset,
                         time_budget=args.time_budget,
                         time_budget_approx=args.time_budget_approx)
    w.run(background=True)

    # Create an optimizer
    result_logger = hpres.json_result_logger(directory=args.bohb_root_path,
                                             overwrite=False)

    if args.previous_run_dir is not None:
        previous_result = hpres.logged_results_to_HBS_result(
            args.previous_run_dir)
    else:
        pervious_result = None

    logger = logging.getLogger(__file__)
    logging_level = getattr(logging, args.logger_level)
    logger.setLevel(logging_level)

    optimizer = BOHB(configspace=get_configspace(),
                     run_id=args.run_id,
                     host=ns_host,
                     nameserver=ns_host,
                     nameserver_port=ns_port,
                     min_budget=args.n_repeat_lower_budget,
                     max_budget=args.n_repeat_upper_budget,
                     result_logger=result_logger,
                     logger=logger,
                     previous_result=previous_result)

    res = optimizer.run(n_iterations=args.n_iterations)

    # Shutdown
    optimizer.shutdown(shutdown_workers=True)
    NS.shutdown()
Exemplo n.º 2
0
    def test_incumbent_trajectory(self):
        """ Load example result and check incumbent_trajectory generation for general errors (whitebox-test)"""
        result = logged_results_to_HBS_result(self.result_path)

        # All budgets
        traj = get_incumbent_trajectory(result,
                                        result.HB_config['budgets'],
                                        mode='racing')
        traj = get_incumbent_trajectory(result,
                                        result.HB_config['budgets'],
                                        mode='minimum')
        traj = get_incumbent_trajectory(result,
                                        result.HB_config['budgets'],
                                        mode='prefer_higher_budget')

        # Single budgets
        traj = get_incumbent_trajectory(result,
                                        [result.HB_config['budgets'][0]],
                                        mode='racing')
        traj = get_incumbent_trajectory(result,
                                        [result.HB_config['budgets'][0]],
                                        mode='minimum')
        traj = get_incumbent_trajectory(result,
                                        [result.HB_config['budgets'][0]],
                                        mode='prefer_higher_budget')
Exemplo n.º 3
0
def build_run_trajectories(results_folder, autonet_config):
    # parse results
    res = logged_results_to_HBS_result(results_folder)
    incumbent_trajectory = res.get_incumbent_trajectory(
        bigger_is_better=False, non_decreasing_budget=False)

    # prepare
    metric_name = autonet_config["train_metric"]
    all_metrics = autonet_config["additional_metrics"] + [metric_name]
    additional_metric_names = ["val_" + m for m in all_metrics]
    additional_metric_names += ["train_" + m for m in all_metrics]
    additional_metric_names += autonet_config["additional_logs"]

    # initialize incumbent trajectories
    incumbent_trajectories = dict()

    # save incumbent trajectories
    incumbent_trajectories[metric_name] = incumbent_trajectory
    incumbent_trajectory["flipped"] = not autonet_config["minimize"]
    for name in additional_metric_names:
        tj = copy(incumbent_trajectory)
        tj["losses"] = [
            run["info"][name]
            for config_id, budget in zip(tj["config_ids"], tj["budgets"])
            for run in res.get_runs_by_id(config_id)
            if run["budget"] == budget and name in run["info"]
        ]
        tj["flipped"] = False
        if tj["losses"]:
            incumbent_trajectories[name] = tj

    return incumbent_trajectories
Exemplo n.º 4
0
    def parse_results(self, pipeline_config):
        try:
            res = logged_results_to_HBS_result(
                pipeline_config["result_logger_dir"])
            id2config = res.get_id2config_mapping()
            incumbent_trajectory = res.get_incumbent_trajectory(
                bigger_is_better=False, non_decreasing_budget=False)
        except Exception as e:
            raise RuntimeError(
                "Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet."
            )

        if (len(incumbent_trajectory['config_ids']) == 0):
            return dict()

        final_config_id = incumbent_trajectory['config_ids'][-1]
        final_budget = incumbent_trajectory['budgets'][-1]
        best_run = [
            r for r in res.get_runs_by_id(final_config_id)
            if r.budget == final_budget
        ][0]
        return {
            'optimized_hyperparameter_config':
            id2config[final_config_id]['config'],
            'budget': final_budget,
            'loss': best_run.loss,
            'info': best_run.info
        }
Exemplo n.º 5
0
def get_best_models_from_log(log_dir):
    if not os.path.isdir(log_dir):
        log_dir = log_dir.replace('nierhoff', 'dingsda')

    result = hpres.logged_results_to_HBS_result(log_dir)

    best_models = []

    for value in result.data.values():
        try:
            loss = value.results[1.0]['loss']
            model_name = value.results[1.0]['info']['model_name']

            if not os.path.isfile(model_name):
                model_name = model_name.replace('nierhoff', 'dingsda')
            best_models.append((loss, model_name))
        except:
            continue

    # before AUC (objective minimized)
    # print("sorting from low to high values (non-AUC)")
    # best_models.sort(key=lambda x: x[0])
    # best_models = best_models[:MODEL_NUM]

    # AUC (objective maximized)
    print("sorting from high to low values (AUC)")
    best_models.sort(key=lambda x: x[0], reverse=True)
    best_models = best_models[:MODEL_NUM]

    return best_models
Exemplo n.º 6
0
def generateLossComparison(out_dir, show=False):
    '''
    Function to generate box plots over different budgets for an entire BOHB run
    :param out_dir: Directory where the plots are to be saved
    :param show: True/False to display the plots (additionally to saving)
    :return: void
    '''
    # load the example run from the log files
    result = hpres.logged_results_to_HBS_result(out_dir)

    plot_data = {}
    for k in result.data.keys():
        try:
            sample = result.data[k].results
        except TryError:
            continue
        for b in sample:
            if sample[b] is None:
                continue
            if b not in plot_data.keys():
                plot_data[int(b)] = [[sample[b]['info']['train_loss']],
                                     [sample[b]['info']['test_loss']]]
            else:
                # print(k, b)
                plot_data[int(b)][0].append(sample[b]['info']['train_loss'])
                plot_data[int(b)][1].append(sample[b]['info']['test_loss'])

    max_loss = 0
    for i, k in enumerate(plot_data.keys()):
        max_loss = max(max_loss, np.max(np.array(plot_data[k])))

    fig = plt.figure(figsize=(10, 4), dpi=150)
    plt.suptitle(
        "Loss comparison of Train and Validation over Epochs (Budget)")
    gs = gridspec.GridSpec(1, len(plot_data.keys()))
    for i, k in enumerate(plot_data.keys()):
        exec("ax" + str(i + 1) + " = plt.subplot(gs[0, " + str(i) + "])")
        exec("ax" + str(i + 1) +
             ".grid(which='major', linestyle=':', axis='y')")
        exec(
            "bp = ax" + str(i + 1) + ".boxplot(plot_data[" + str(k) +
            "], showmeans=True, meanline=True, " +
            "sym='+', meanprops={'linestyle':'-'},whiskerprops={'linestyle': '--', 'color': 'blue'})"
        )
        exec("ax" + str(i + 1) + ".set_ylim(0, " + str(max_loss + 0.1) + ")")
        exec("ax" + str(i + 1) + ".set_xlabel('Budget:" + str(k) + "')")
        if i == 0:
            exec("ax" + str(i + 1) + ".set_ylabel('Loss')")
        exec("setBoxColors(bp)")
    hB, = plt.plot([1, 1], 'b-')
    hR, = plt.plot([1, 1], 'r-')
    hG, = plt.plot([1, 1], 'g-')
    plt.figlegend((hB, hR, hG), ('Training', 'Validation', 'Mean'),
                  loc='upper right')
    hB.set_visible(False)
    hR.set_visible(False)
    hG.set_visible(False)
    plt.savefig(out_dir + '/loss_comparison_plot.png', dpi=300)
    if show:
        plt.show()
Exemplo n.º 7
0
    def __init__(self, hp_names, result_object=None, result_path=None):
        """
        Visualize hpbandster learning curves in an interactive bokeh-plot.

        Parameters
        ----------
        hp_names: List[str]
            list with hyperparameters-names
        result_object: Result
            hpbandster-result object. must be specified if result_path is not
        result_path: str
            path to hpbandster result-folder. must contain configs.json and results.json. must be specified if result_object is not
        """
        self.logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
        try:
            from hpbandster.core.result import logged_results_to_HBS_result
            from hpbandster.core.result import extract_HBS_learning_curves
        except ImportError as err:
            self.logger.exception(err)
            raise ImportError("You need to install hpbandster (e.g. 'pip install hpbandster') to analyze bohb-results.")

        if (result_path and result_object) or not (result_path or result_object):
            raise ValueError("Specify either result_path or result_object. (currently \"%s\" and \"%s\")" % (result_path, result_object))
        elif result_path:
            result_object = logged_results_to_HBS_result(result_path)

        incumbent_trajectory = result_object.get_incumbent_trajectory()

        self.hp_names = hp_names
        self.result_object = result_object
        self.lcs = result_object.get_learning_curves(lc_extractor=extract_HBS_learning_curves)
Exemplo n.º 8
0
def setup_fanova_analysis(run_name):
    bohb_logs_dir = run_name
    res = hpres.logged_results_to_HBS_result(bohb_logs_dir)

    inc_id = res.get_incumbent_id()

    id2conf = res.get_id2config_mapping()

    inc_trajectory = res.get_incumbent_trajectory()
    print(inc_trajectory)
    print(res.get_runs_by_id(inc_id))

    all_runs = list(
        filter(lambda r: not (r.info is None or r.loss is None),
               res.get_all_runs()))

    budgets = res.HB_config['budgets']

    runs_by_budget = {}

    for b in budgets:
        runs_by_budget[b] = list(filter(lambda r: r.budget == b, all_runs))

    fanova_analysis(budgets=budgets,
                    res=res,
                    runs_by_budget=runs_by_budget,
                    id2conf=id2conf,
                    bohb_logs_dir=bohb_logs_dir)
Exemplo n.º 9
0
def get_trajectories(true_paths,
                     surrogate_paths,
                     methods=['BANANAS'],
                     surrogate='xgb'):
    print(true_paths)
    print(surrogate_paths)
    all_trajectories = {}

    for m in methods:
        dfs = []
        for i, true_path in enumerate(true_paths):
            print(true_path)
            true_results = hpres.logged_results_to_HBS_result(true_path)
            true_inc = extract_incumbents(true_results, surrogate=False)
            error = 100 - true_inc[:, 0]
            times = true_inc[:, 1]
            df = pd.DataFrame({str(i): error}, index=times)
            dfs.append(df)

        df_true = merge_and_fill_trajectories(dfs, default_value=None)

        dfs = []
        for i, surr_path in enumerate(surrogate_paths):
            try:
                print(surr_path)
                surr_results = hpres.logged_results_to_HBS_result(surr_path)
                surr_inc = extract_incumbents(surr_results, surrogate=True)
                error = 100 - surr_inc[:, 0]
                times = surr_inc[:, 1]
                df = pd.DataFrame({str(i): error}, index=times)
                dfs.append(df)
            except Exception as e:
                print('Could not  read:', surr_path)

        df_surr = merge_and_fill_trajectories(dfs, default_value=None)

        all_trajectories[m + ' true'] = {
            'time_stamps': np.array(df_true.index),
            'errors': np.array(df_true.T)
        }
        all_trajectories[m + ' surr'] = {
            'time_stamps': np.array(df_surr.index),
            'errors': np.array(df_surr.T)
        }

    return all_trajectories
Exemplo n.º 10
0
def extract_best_config(dataset):
    """
    Gets the bohb results of the given dataset and return the best architecture and configuration.
    :param dataset: string
    :return:
    incumbent_model: string, one of ['ESN', 'CNN_1D', 'LSTM', 'FCN']
    incumbent_config: configuration object
    """
    models = ['ESN', 'CNN_1D', 'LSTM', 'FCN']
    result_dir = 'logs_sample_dataset'

    # BOHB results for each model&dataset combination
    results = [
        os.path.join(result_dir, name) for name in os.listdir(result_dir)
    ]

    results_current_dataset = [r for r in results if dataset in r]

    # to store the losses of each and compare the results later
    incumbent_configs = []
    incumbent_losses = []

    for model in models:
        result_folder = [r for r in results_current_dataset if model in r]
        result = result_folder[0]

        # load the example run from the log files
        result = hpres.logged_results_to_HBS_result(result)

        # get the 'dict' that translates config ids to the actual configurations
        id2conf = result.get_id2config_mapping()

        # get incumbent id
        inc_id = result.get_incumbent_id()

        # get result of the incumbent
        inc_runs = result.get_runs_by_id(inc_id)
        inc_run = inc_runs[-1]

        inc_config = id2conf[inc_id]['config']  # best config
        inc_loss = inc_run.loss  # loss of the best config

        incumbent_configs.append(inc_config)
        incumbent_losses.append(inc_loss)

    min_loss, idx = min(
        (val, idx) for (idx, val) in enumerate(incumbent_losses))
    incumbent_model = models[idx]
    incumbent_config = incumbent_configs[idx]
    print(
        "################################### Dataset {} #######################################"
        .format(dataset))
    print("Best performing model: {}".format(incumbent_model))
    print("Configuration: {}".format(incumbent_config))
    print("Score: {}".format(-min_loss))
    print("\n")

    return incumbent_model, incumbent_config
def get_data():
    list_data = []
    for log_dir in LOG_DIRS:
        result = hpres.logged_results_to_HBS_result(log_dir)
        all_runs = result.get_all_runs()
        id2conf = result.get_id2config_mapping()

        # calculate avg. runtime
        ts = []
        for i, run in enumerate(all_runs):
            t_s = run['time_stamps']['started']
            t_f = run['time_stamps']['finished']
            ts.append(t_f - t_s)

            if i >= MAX_VALS:
                break

        print(log_dir)
        print('mean [s]: ' + str(statistics.mean(ts)))
        print('std [s]: ' + str(statistics.stdev(ts)))

        # copy data to list
        data = []

        for run in all_runs:
            avg_rewards = ast.literal_eval(run['info']['score_list'])
            #print(avg_rewards)

            config_id = run['config_id']

            # handle timeout cases (impute missing values)
            if avg_rewards[0] < -1e5 and avg_rewards[1] > -1e5:
                avg_rewards[0] = avg_rewards[1]
            for k in range(1, len(avg_rewards)):
                if avg_rewards[k] < -1e5 and avg_rewards[k - 1] > -1e5:
                    avg_rewards[k] = avg_rewards[k - 1]

            data.append(avg_rewards)
        list_data.append(data)

    # copy from list to numpy array
    proc_data = []

    n = len(list_data[0][0])
    for data in list_data:
        np_data = np.zeros([MAX_VALS, n])

        for i in range(len(np_data)):
            np_data[i] = np.array(data[i])

        mean = np.mean(np_data, axis=0)
        std = np.std(np_data, axis=0)

        proc_data.append((mean, std))

    return proc_data, list_data
Exemplo n.º 12
0
 def parse_results(self, result_logger_dir):
     res = logged_results_to_HBS_result(result_logger_dir)
     id2config = res.get_id2config_mapping()
     incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False)
     
     if (len(incumbent_trajectory['config_ids']) == 0):
         return dict()
     
     final_config_id = incumbent_trajectory['config_ids'][-1]
     return incumbent_trajectory['losses'][-1], id2config[final_config_id]['config'], incumbent_trajectory['budgets'][-1]
Exemplo n.º 13
0
def get_trajectories_per_method(methods,
                                suffix='',
                                surrogate=False,
                                append_instead_of_combining=False):
    print(methods)
    all_trajectories = {}

    for m, (paths, surrogate, append_instead_of_combining) in methods.items():
        dfs = []

        # append configs to one long trajectory
        if append_instead_of_combining:
            hp_results = []
            for i, path in enumerate(paths):
                print(path)
                true_results = hpres.logged_results_to_HBS_result(path)
                hp_results.append(true_results)
            true_inc = extract_incumbents(hp_results, surrogate=surrogate)
            error = 1 - true_inc[:, 0] / 100
            times = true_inc[:, 1]
            df = pd.DataFrame({str(0): error}, index=times)
            dfs.append(df)

        # average over trajectories
        else:
            for i, path in enumerate(paths):
                print(path)
                true_results = hpres.logged_results_to_HBS_result(path)
                true_inc = extract_incumbents(true_results,
                                              surrogate=surrogate)
                error = 1 - true_inc[:, 0] / 100
                times = true_inc[:, 1]
                df = pd.DataFrame({str(i): error}, index=times)
                dfs.append(df)

        df_true = merge_and_fill_trajectories(dfs, default_value=None)

        all_trajectories[m + suffix] = {
            'time_stamps': np.array(df_true.index),
            'errors': np.array(df_true.T)
        }

    return all_trajectories
def analyze_bohb(log_dir):
    # load the example run from the log files
    result = hpres.logged_results_to_HBS_result(log_dir)

    plot_parallel_scatter(result,
                          with_mirrored_sampling=False,
                          with_nes_step_size=False)
    #plot_parallel_scatter(result, with_mirrored_sampling=False, with_nes_step_size=True)
    plot_parallel_scatter(result,
                          with_mirrored_sampling=True,
                          with_nes_step_size=False)
Exemplo n.º 15
0
    def fit(self,
            pipeline_config,
            final_metric_score,
            optimized_hyperparameter_config,
            budget,
            refit=None):
        if refit or pipeline_config["ensemble_size"] == 0 or pipeline_config[
                "task_id"] not in [-1, 1]:
            return {
                "final_metric_score": final_metric_score,
                "optimized_hyperparameter_config":
                optimized_hyperparameter_config,
                "budget": budget
            }

        filename = os.path.join(pipeline_config["result_logger_dir"],
                                'predictions_for_ensemble.npy')
        train_metric = self.pipeline[MetricSelector.get_name()].metrics[
            pipeline_config["train_metric"]]
        y_transform = self.pipeline[
            OneHotEncoding.get_name()].complete_y_tranformation
        result = logged_results_to_HBS_result(
            pipeline_config["result_logger_dir"])

        all_predictions, labels, model_identifiers, _ = read_ensemble_prediction_file(
            filename=filename, y_transform=y_transform)
        ensemble_selection, ensemble_configs = build_ensemble(
            result=result,
            train_metric=train_metric,
            minimize=pipeline_config["minimize"],
            ensemble_size=pipeline_config["ensemble_size"],
            all_predictions=all_predictions,
            labels=labels,
            model_identifiers=model_identifiers,
            only_consider_n_best=pipeline_config[
                "ensemble_only_consider_n_best"],
            sorted_initialization_n_best=pipeline_config[
                "ensemble_sorted_initialization_n_best"])

        return {
            "final_metric_score":
            final_metric_score,
            "optimized_hyperparameter_config":
            optimized_hyperparameter_config,
            "budget":
            budget,
            "ensemble":
            ensemble_selection,
            "ensemble_final_metric_score":
            ensemble_selection.get_validation_performance(),
            "ensemble_configs":
            ensemble_configs
        }
Exemplo n.º 16
0
def load_config(dir):
    '''
    Given a directory where BOHB results have been logged, loads the incumbent configuration
    The directory needs to have 'config.json' and 'results.json' as BOHB outputs
    :param dir: Directory where BOHB results exist
    :return: JSON containing incumbent configuration
    '''
    result = hpres.logged_results_to_HBS_result(dir)
    id2conf = result.get_id2config_mapping()
    inc_id = result.get_incumbent_id()
    inc_config = id2conf[inc_id]['config']
    return inc_config
Exemplo n.º 17
0
    def _get_incumbent(self, i):
        result = self.results[i]
        config_space = self.config_spaces[i]

        if isinstance(result, str):
            result = logged_results_to_HBS_result(result)
        id2config = result.get_id2config_mapping()
        trajectory = result.get_incumbent_trajectory(
            bigger_is_better=self.bigger_is_better,
            non_decreasing_budget=self.bigger_is_better)

        incumbent = id2config[trajectory["config_ids"][-1]]["config"]
        return Configuration(config_space, incumbent)
Exemplo n.º 18
0
    def add_result(self, result, config_space, origin):
        try:
            if isinstance(result, str):
                result = logged_results_to_HBS_result(result)
            result.get_incumbent_trajectory()
        except:
            print("Did not add empty result")
            return False

        self.results.append(result)
        self.config_spaces.append(config_space)
        self.origins.append(origin)
        return True
def visualizeBOHB(log_dir):
    # load the example run from the log files
    result = hpres.logged_results_to_HBS_result(log_dir)

    # get all executed runs
    all_runs = result.get_all_runs()

    # get the 'dict' that translates config ids to the actual configurations
    id2conf = result.get_id2config_mapping()

    # Here is how you get he incumbent (best configuration)
    inc_id = result.get_incumbent_id()

    # let's grab the run on the highest budget
    inc_runs = result.get_runs_by_id(inc_id)
    inc_run = inc_runs[-1]

    # We have access to all information: the config, the loss observed during
    # optimization, and all the additional information
    inc_valid_score = inc_run.loss
    inc_config = id2conf[inc_id]['config']

    print(inc_config)

    print('Best found configuration:')
    print(inc_config)
    #print('It achieved accuracies of %f (validation) and %f (test).' % (-inc_valid_score, inc_test_score))

    # Let's plot the observed losses grouped by budget,
    hpvis.losses_over_time(all_runs)

    # the number of concurent runs,
    hpvis.concurrent_runs_over_time(all_runs)

    # and the number of finished runs.
    hpvis.finished_runs_over_time(all_runs)

    # This one visualizes the spearman rank correlation coefficients of the losses
    # between different budgets.
    hpvis.correlation_across_budgets(result)

    # For model based optimizers, one might wonder how much the model actually helped.
    # The next plot compares the performance of configs picked by the model vs. random ones
    hpvis.performance_histogram_model_vs_random(all_runs, id2conf)

    plot_accuracy_over_budget(result)

    plot_parallel_scatter(result)

    plt.show()
Exemplo n.º 20
0
    def test_model_warmstarting(self):
        result1_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    "test_result1")
        result2_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                    "test_result2")
        empty_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  "empty_result")
        result1 = logged_results_to_HBS_result(result1_path)
        result2 = logged_results_to_HBS_result(result2_path)

        cs = ConfigSpace.ConfigurationSpace()
        cs.add_hyperparameters([
            ConfigSpace.hyperparameters.UniformFloatHyperparameter("A",
                                                                   lower=0,
                                                                   upper=10),
            ConfigSpace.hyperparameters.UniformFloatHyperparameter("B",
                                                                   lower=0,
                                                                   upper=10)
        ])

        # train model
        builder = WarmstartedModelBuilder()
        r = builder.train_kde(result1, cs)
        self.assertEqual(len(r[0]), 1)
        self.assertEqual(len(r[1]), 1)
        self.assertEqual(r[0][0].data.shape, (3, 2))
        self.assertEqual(r[1][0].data.shape, (6, 2))

        # build
        builder.add_result(empty_path, cs, "empty")
        builder.add_result(result1, cs, "result1")
        builder.add_result(result2, cs, "result2")
        r = builder.build()
        self.assertEqual(len(r._good_kdes), 2)
        self.assertEqual(len(r._bad_kdes), 2)
        self.assertEqual(len(r._kde_config_spaces), 2)
Exemplo n.º 21
0
def save_config(source, dest, name):
    '''
    Reads the incumbent from a BOHB output directory and writes it as a JSON in the specified directory
    :param source: Directory from where to read the incumbent
    :param dest: Directory to save the file
    :param name: Name given to the JSON being saved
    :return: void
    '''
    result = hpres.logged_results_to_HBS_result(source)
    id2conf = result.get_id2config_mapping()
    inc_id = result.get_incumbent_id()
    inc_config = id2conf[inc_id]['config']
    f = open(dest + name + '.json', 'w')
    f.write(json.dumps(inc_config))
    f.close()
Exemplo n.º 22
0
    def parse_results(self, result_logger_dir):
        try:
            res = logged_results_to_HBS_result(result_logger_dir)
            id2config = res.get_id2config_mapping()
            incumbent_trajectory = res.get_incumbent_trajectory(
                bigger_is_better=False, non_decreasing_budget=False)
        except Exception as e:
            raise RuntimeError(
                "Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet."
            )

        if (len(incumbent_trajectory['config_ids']) == 0):
            return dict()

        final_config_id = incumbent_trajectory['config_ids'][-1]
        return incumbent_trajectory['losses'][-1], id2config[final_config_id][
            'config'], incumbent_trajectory['budgets'][-1]
Exemplo n.º 23
0
def build_run_trajectories(results_folder, autonet_config):
    # parse results
    try:
        res = logged_results_to_HBS_result(results_folder)
        incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False)
    except:
        print("No incumbent trajectory found")
        return dict()

    # prepare
    metric_name = autonet_config["train_metric"]
    all_metrics = autonet_config["additional_metrics"] + [metric_name]
    additional_metric_names = ["val_" + m for m in all_metrics]
    additional_metric_names += ["train_" + m for m in all_metrics]
    additional_metric_names += autonet_config["additional_logs"]

    # initialize incumbent trajectories
    incumbent_trajectories = dict()
    
    # save incumbent trajectories
    incumbent_trajectories[metric_name] = incumbent_trajectory
    incumbent_trajectory["flipped"] = not autonet_config["minimize"]
    for name in additional_metric_names:
        tj = copy(incumbent_trajectory)
        log_available = [name in run["info"] for config_id, budget in zip(tj["config_ids"], tj["budgets"])
                                             for run in res.get_runs_by_id(config_id)
                                             if run["budget"] == budget]
        tj["losses"] = [run["info"][name] for config_id, budget in zip(tj["config_ids"], tj["budgets"])
                                          for run in res.get_runs_by_id(config_id)
                                          if run["budget"] == budget and name in run["info"]]
        for key, value_list in tj.items():
            if key in ["losses", "flipped"]:
                continue
            tj[key] = [value for i, value in enumerate(value_list) if log_available[i]]
        tj["flipped"] = False
        if tj["losses"]:
            incumbent_trajectories[name] = tj
    
    # assume first random config has been evaluated already at time 0
    for name, trajectory in incumbent_trajectories.items():
        for key, value_list in trajectory.items():
            if not isinstance(value_list, (list, tuple)):
                continue
            trajectory[key] = [value_list[0] if key != "times_finished" else 0] + value_list

    return incumbent_trajectories
Exemplo n.º 24
0
    def convert(self, folders, output_dir=None):
        """Convert hpbandster-results into smac-format, aggregating parallel runs along the budgets, so it is treated as
        one run with the same budgets. Throws ValueError when budgets of individual runs dont match.

        WIP: make hpbandsterconversion not aggregate parallel runs

        Parameters
        ----------
        folders: List[str]
            list of parallel hpbandster-runs (folder paths!)
        output_dir: str
            path to CAVE's output-directory

        Returns
        -------
        folder2result: {str : hpbandster.core.result}
            map parallel-run-folder-paths to hpbandster-result in original format
        folder2budgets: {str : {str or int or float : str}}
            map folder to budget to pathpaths to converted data
        """
        try:
            from hpbandster.core.result import Result as HPBResult
            from hpbandster.core.result import logged_results_to_HBS_result
        except ImportError as e:
            raise ImportError(
                "To analyze BOHB-data, please install hpbandster (e.g. `pip install hpbandster`)"
            )

        # Original hpbandster-formatted result-object
        folder2result = OrderedDict([(f, logged_results_to_HBS_result(f))
                                     for f in folders])

        # Get a list with alternative interpretations of the configspace-file (if it's a .pcs-file, for .json-files it's
        # only one element)
        cs_interpretations = self.load_configspace(folders[0])

        # Using temporary files for the intermediate smac-result-like format
        if not output_dir:
            self.logger.debug("New outputdir")
            output_dir = tempfile.mkdtemp()

        # Actual conversion
        folder2budgets = self.hpbandster2smac(folder2result,
                                              cs_interpretations, output_dir)

        return folder2result, folder2budgets
Exemplo n.º 25
0
 def build(self):
     good_kdes = list()
     bad_kdes = list()
     for i, (result, config_space, origin) in enumerate(
             zip(self.results, self.config_spaces, self.origins)):
         print(i)
         if isinstance(result, str):
             try:
                 result = logged_results_to_HBS_result(result)
             except:
                 continue
         good, bad, budgets = self.train_kde(result, config_space)
         good_kdes.append(dict(zip(budgets, good)))
         bad_kdes.append(dict(zip(budgets, bad)))
         self.kde_config_spaces.append(config_space)
     return WarmstartedModel(good_kdes, bad_kdes, self.kde_config_spaces,
                             self.origins)
Exemplo n.º 26
0
    def fit(self, result_dir, fit_duration, final_score, autonet, task_id):
        if (task_id not in [-1, 1]):
            time.sleep(60)
            return dict()

        logging.getLogger('benchmark').info("Create and save summary")

        autonet_config = autonet.autonet_config
        res = logged_results_to_HBS_result(result_dir)
        id2config = res.get_id2config_mapping()
        incumbent_trajectory = res.get_incumbent_trajectory(
            bigger_is_better=False, non_decreasing_budget=False)
        final_config_id = incumbent_trajectory['config_ids'][-1]
        final_budget = incumbent_trajectory['budgets'][-1]
        incumbent_config = id2config[final_config_id]['config']

        final_info = [
            run["info"] for run in res.get_runs_by_id(final_config_id)
            if run["budget"] == final_budget
        ][0]

        summary = dict()
        summary["final_loss"] = final_score if autonet_config[
            "minimize"] else -final_score
        summary["incumbent_config"] = incumbent_config
        summary["duration"] = fit_duration
        for name in autonet_config['additional_metrics'] + [
                autonet_config["train_metric"]
        ]:
            try:
                summary["final_" + name] = final_info["val_" + name]
            except:
                summary["final_" + name] = final_info["train_" + name]

        for name in autonet_config['additional_logs']:
            try:
                summary["final_" + name] = final_info[name]
            except:
                pass

        # write as json
        with open(os.path.join(result_dir, "summary.json"), "w") as f:
            json.dump(summary, f)

        return dict()
Exemplo n.º 27
0
def incumbent_to_config(experiment_path, configs_path, output_dir):
    # Read the incumbent
    result = hpres.logged_results_to_HBS_result(str(experiment_path))
    id2conf = result.get_id2config_mapping()
    inc_id = result.get_incumbent_id()
    incumbent_config = id2conf[inc_id]['config']

    # Read the default config
    default_config_path = configs_path / "default.yaml"
    with default_config_path.open() as in_stream:
        default_config = yaml.safe_load(in_stream)

    # Compute and write incumbent config in the format of default_config
    incumbent_config = construct_model_config(incumbent_config, default_config)

    out_config_path = output_dir / "{}.yaml".format(experiment_path.name)
    with out_config_path.open("w") as out_stream:
        yaml.dump(incumbent_config, out_stream)
Exemplo n.º 28
0
def analyze_bohb(log_dir):
    # load the example run from the log files
    result = hpres.logged_results_to_HBS_result(log_dir)

    result = transform_result(result, min_success_reward=MIN_SUCCESS_REWARD)
    #result = remove_outliers(result)

    plot_parallel_scatter(result,
                          with_mirrored_sampling=False,
                          with_nes_step_size=False)
    plot_parallel_scatter(result,
                          with_mirrored_sampling=False,
                          with_nes_step_size=True)
    plot_parallel_scatter(result,
                          with_mirrored_sampling=True,
                          with_nes_step_size=False)
    plot_parallel_scatter(result,
                          with_mirrored_sampling=True,
                          with_nes_step_size=True)
Exemplo n.º 29
0
    def convert(self, folders, output_dir=None):
        """Convert hpbandster-results into smac-format, aggregating parallel runs along the budgets, so it is treated as
        one run with the same budgets. Throws ValueError when budgets of individual runs dont match.

        Parameters
        ----------
        folders: List[str]
            list of runs to consider
        output_dir: str
            path to CAVE's output-directory

        Returns
        -------
        result: hpbandster.core.result
            BOHB-result in original format
        paths: List[str]
            paths to converted data
        budgets: List[int]
            budgets, corresponding to paths
        """
        try:
            from hpbandster.core.result import Result as HPBResult
            from hpbandster.core.result import logged_results_to_HBS_result
        except ImportError as e:
            raise ImportError(
                "To analyze BOHB-data, please install hpbandster (e.g. `pip install hpbandster`)"
            )

        folder2result = OrderedDict([(f, logged_results_to_HBS_result(f))
                                     for f in folders])

        # backup_cs is a list with alternative interpretations of the configspace-file (if it's a .pcs-file)
        cs, backup_cs = self.load_configspace(folders[0])

        # Using temporary files for the intermediate smac-result-like format
        if not output_dir:
            self.logger.debug("New outputdir")
            output_dir = tempfile.mkdtemp()
        budgets, paths = zip(*self.hpbandster2smac(
            folder2result, cs, backup_cs, output_dir).items())

        return list(folder2result.values()), paths, budgets
    def fit(self, pipeline_config, run_result_dir, train_metric, trajectories):
        ensemble_log_file = os.path.join(run_result_dir, "ensemble_log.json")
        test_log_file = os.path.join(run_result_dir, "test_result.json")
        if not pipeline_config["enable_ensemble"] or train_metric is None or \
            (not os.path.exists(ensemble_log_file) and not os.path.exists(test_log_file)):
            return {"trajectories": trajectories, "train_metric": train_metric}

        try:
            started = logged_results_to_HBS_result(run_result_dir).HB_config["time_ref"]
        except:
            return {"trajectories": trajectories, "train_metric": train_metric}
        
        ensemble_trajectories = dict()
        test_trajectories = dict()
        if os.path.exists(ensemble_log_file):
            ensemble_trajectories = get_ensemble_trajectories(ensemble_log_file, started)
        if os.path.exists(test_log_file):
            test_trajectories = get_ensemble_trajectories(test_log_file, started, prefix="", only_test=True)
        
        return {"trajectories": dict(trajectories, **ensemble_trajectories, **test_trajectories), "train_metric": train_metric}