Exemplo n.º 1
0
def HVI_from_files(real_pareto_file, parameters_file):
    """
    Compute hvi for a target Pareto front using the real Pareto front as reference.
    :param real_pareto_file: file containing the real Pareto front
    :param parameters_file: file containing the experiment scenario. Also used to find the target Pareto file.
    :return: the hvi of the target Pareto front
    """
    config = validate_json(parameters_file)
    param_space = space.Space(config)

    application_name = config["application_name"]
    test_pareto_file = config["output_pareto_file"]
    run_directory = config["run_directory"]
    if test_pareto_file == "output_pareto.csv":
        test_pareto_file = application_name + "_" + test_pareto_file
    test_pareto_file = deal_with_relative_and_absolute_path(
        run_directory, test_pareto_file
    )

    optimization_metrics = param_space.get_optimization_parameters()
    selection_keys = optimization_metrics + param_space.get_timestamp_parameter()
    feasible_flag = True if (param_space.get_feasible_parameter() != [None]) else False
    exhaustive_branin_pareto, _ = param_space.load_data_file(
        real_pareto_file, selection_keys_list=selection_keys, only_valid=feasible_flag
    )
    test_pareto, _ = param_space.load_data_file(
        test_pareto_file, selection_keys_list=selection_keys, only_valid=feasible_flag
    )
    concatenated_all_data_array = concatenate_data_dictionaries(
        exhaustive_branin_pareto, test_pareto, selection_keys_list=selection_keys
    )

    standard_deviations, max_point = compute_std_and_max_point(
        concatenated_all_data_array, optimization_metrics
    )

    exhaustive_branin_pareto = normalize_with_std(
        exhaustive_branin_pareto, standard_deviations, optimization_metrics
    )
    test_pareto = normalize_with_std(
        test_pareto, standard_deviations, optimization_metrics
    )

    exhaustive_branin_pareto = [
        exhaustive_branin_pareto[objective] for objective in optimization_metrics
    ]
    exhaustive_branin_pareto = list(zip(*exhaustive_branin_pareto))

    test_pareto = [test_pareto[objective] for objective in optimization_metrics]
    test_pareto = list(zip(*test_pareto))

    hv_exhaustive = H(exhaustive_branin_pareto, max_point)
    hv_test = H(test_pareto, max_point)
    hvi = hv_exhaustive - hv_test

    return hvi
Exemplo n.º 2
0
def plot_hvi(parameters_file, output_hvi_file_name, list_of_dirs):
    """
    Plot the hypervolume indicator (HVI) results of the design space exploration.
    In this plot specifically we plot the HVI of HyperMapper's DSE against the HVI of a competing approach.
    On the x axis we plot time in seconds and on the y axis the HVI.
    HVI to be computed needs a real Pareto or at least a Pareto that is the best found by the results concatenation of
    HyperMapper and the competing approach.

    ######################################################
    ######### Input of this script ######################
    # 1) a file that is the real Pareto or the best Pareto found
    #    (supposing the we are comparing several approaches for example the best Pareto is the result of all these approaches combined).
    # 2) a file containing all the samples of the exploration (not only the Pareto).
    #    From this file we can compute the Pareto at time t and then the hvi at time t
    """
    try:
        import statsmodels.stats.api as sms
    except:
        # TODO: Long-term: move this import to the top.
        ImportError(
            "Failed to import statsmodels. Statsmodels is required for plot_hvi."
        )
    xlabel = "Time (sec)"
    ylabel = "HyperVolume Indicator (HVI)"
    number_of_bins = 20

    filename, file_extension = os.path.splitext(parameters_file)
    if file_extension != ".json":
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension
        )
        exit(1)
    with open(parameters_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    DefaultValidatingDraft4Validator(schema).validate(config)

    if "application_name" in config:
        application_name = config["application_name"]
    else:
        application_name = ""

    print("########## plot_hvi.py #########################")
    print("### Parameters file is %s" % parameters_file)
    print("### Application name is %s" % application_name)
    print("### The input directories data are %s" % str(list_of_dirs))
    print("################################################")

    param_space = space.Space(config)
    optimization_metrics = param_space.get_optimization_parameters()

    ###################################################################################################################
    ########### Compute the hypervolume of all the input files concatenated as a reference for the HVI metric.
    ###################################################################################################################
    input_files = {}

    # y_data_mean is dict on the directories that for each entry in the dict contains the mean of each point x over multiple file repetitions in one directory; lower and upper are for the confidence interval.
    y_data_mean = defaultdict(list)
    y_data_median = defaultdict(list)
    y_data_min = defaultdict(list)
    y_data_max = defaultdict(list)
    y_data_lower = defaultdict(list)
    y_data_upper = defaultdict(list)
    bin_array_X = {}
    number_or_runs_in_bins = {}

    for dir in list_of_dirs:
        input_files[dir] = [f for f in listdir(dir) if isfile(join(dir, f))]

    for dir in list_of_dirs:
        files_to_remove = []
        for file in input_files[dir]:
            filename, file_extension = os.path.splitext(file)
            if file_extension != ".csv":
                print(
                    "Warning: file %s is not a csv file, it will not be considered in the HVI plot. "
                    % file
                )
                files_to_remove.append(file)
        # Don't move this for loop inside the previous identical one otherwise you will remove the elements before they get process because of overlapping references.
        for file in files_to_remove:
            input_files[dir].remove(file)

    for dir in list_of_dirs:
        if len(input_files[dir]) == 0:
            print(
                "Warning: directory %s is empty, it will not be considered in the HVI plot."
            )
            del input_files[dir]

    if len(input_files) == 0:
        print("Error: there no input files to compute the HVI.")

    print("The files used as a input are: ")
    for i, dir in enumerate(input_files.keys()):
        print(
            "Directory "
            + str(i)
            + ": "
            + dir
            + ", # of files: "
            + str(len(input_files[dir]))
            + ", list of files: "
            + str(input_files[dir])
        )

    all_data_files = []
    for dir in input_files.keys():
        for file in input_files[dir]:
            all_data_files += [dir + "/" + file]

    selection_keys = (
        param_space.get_output_parameters() + param_space.get_timestamp_parameter()
    )
    feasible_flag = True if (param_space.get_feasible_parameter() != [None]) else False
    concatenated_all_data_array = param_space.load_data_files(
        all_data_files, selection_keys_list=selection_keys, only_valid=feasible_flag
    )

    if len(next(iter(concatenated_all_data_array.values()))) == 0:
        return return_empty_images(
            application_name,
            input_files,
            number_of_bins,
            output_hvi_file_name,
            xlabel,
            ylabel,
        )

    bounds = {}
    max_point = []
    standard_deviation_optimization_metrics = []
    max_min_difference = []
    # Get bounds of objective space
    for metric in optimization_metrics:
        X = np.array(concatenated_all_data_array[metric])

        standard_deviation = np.std(X, axis=0)
        standard_deviation_optimization_metrics.append(standard_deviation)
        X /= standard_deviation

        concatenated_all_data_array[metric] = X
        bounds[metric] = (
            min(concatenated_all_data_array[metric]),
            max(concatenated_all_data_array[metric]),
        )
        max_point.append(bounds[metric][1])
        max_min_difference.append(bounds[metric][1] - bounds[metric][0])
        print(
            "(min, max) = (%f, %f) for the metric %s. This is to compute the hypervolume."
            % (bounds[metric][0], bounds[metric][1], metric)
        )

    total_volume = prod(max_min_difference)
    list_of_objectives = [
        concatenated_all_data_array[objective]
        for objective in param_space.get_optimization_parameters()
    ]
    reformatted_all_data = list(zip(*list_of_objectives))

    # Get dominated hypervolume for Pareto of all data observed
    hv_all_data = H(reformatted_all_data, max_point)
    print("The hypervolume of all the files concatenated: %d" % hv_all_data)

    ###################################################################################################################
    ########### Compute the HVI for each directory.
    ###################################################################################################################
    hvi = {}
    for dir in input_files:
        print("Compute HVI for %s" % dir)
        convert_in_seconds = 1000.0
        hvi[dir], bin_array_X[dir], number_or_runs_in_bins[dir] = compute_hvi(
            standard_deviation_optimization_metrics,
            input_files[dir],
            dir,
            total_volume,
            max_point,
            hv_all_data,
            param_space,
            convert_in_seconds,
            number_of_bins,
        )

        # Round the floating point numbers to 1 decimal for clarity of visualization.
        bin_array_X[dir] = [round(float(i), 1) for i in bin_array_X[dir]]
        for file in hvi[dir]:
            for bin in hvi[dir][file]:
                hvi[dir][file][bin] = round(float(hvi[dir][file][bin]), 1)

    ###################################################################################################################
    ########### Plot all the HVIs (using box plots bin_array_X and hvi)
    ###################################################################################################################

    for dir in input_files:
        hvi_list_of_lists = []
        each_bin = defaultdict(list)
        for file in hvi[dir]:
            for bin in hvi[dir][file]:
                each_bin[bin].append(hvi[dir][file][bin])
        for bin in hvi[dir][file]:
            hvi_list_of_lists.append(
                each_bin[bin]
            )  # This is a list of bins and for each bin there is a list of hvi values for each file in that directory.

        # Print boxplot (one figure per directory).
        boxplot(
            bin_array_X[dir],
            hvi_list_of_lists,
            application_name,
            number_of_bins,
            xlabel,
            ylabel,
            str(dir + "/" + os.path.basename(dir) + "_boxplot" + ".pdf"),
        )

        # Print lineplot (only one figure comparing all the directories).
        for hvi_list in hvi_list_of_lists:
            hvi_list_array = np.array(hvi_list)
            y_data_mean[dir].append(hvi_list_array.mean())
            y_data_median[dir].append(np.median(hvi_list_array))
            y_data_min[dir].append(np.min(hvi_list_array))
            y_data_max[dir].append(np.max(hvi_list_array))
            low, up = sms.DescrStatsW(hvi_list_array).tconfint_mean()
            y_data_lower[dir].append(low)
            y_data_upper[dir].append(up)

        for bin_number, bin_value in enumerate(y_data_lower[dir]):
            if not math.isnan(bin_value) and bin_value < 0:
                y_data_lower[dir][bin_number] = 0
        for bin_number, bin_value in enumerate(y_data_upper[dir]):
            if not math.isnan(bin_value) and bin_value < 0:
                y_data_upper[dir][bin_number] = 0

        print_stats_on_a_txt(
            dir,
            str(dir + "/" + os.path.basename(dir) + "_stats" + ".txt"),
            bin_array_X,
            number_or_runs_in_bins,
            y_data_mean,
            y_data_median,
            y_data_min,
            y_data_max,
            y_data_lower,
            y_data_upper,
        )

    # Call the function to create plot
    lineplotCI(
        input_files,
        application_name,
        x_data=bin_array_X,
        y_data=y_data_mean,
        low_CI=y_data_lower,
        upper_CI=y_data_upper,
        xlabel=xlabel,
        ylabel=ylabel,
        title="Line plot with 95% confidence intervals",
        output_filename=output_hvi_file_name,
    )
Exemplo n.º 3
0
def main(config, black_box_function=None, output_file="", profiling=None):
    """
    Run design-space exploration using evolution.
    :param config: dictionary containing all the configuration parameters of this design-space exploration.
    :param black_box_function: The function hypermapper seeks to optimize
    :param output_file: a name for the file used to save the dse results.
    :return:
    """
    param_space = space.Space(config)

    run_directory = config["run_directory"]
    application_name = config["application_name"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    optimization_metrics = config["optimization_objectives"]
    number_of_objectives = len(optimization_metrics)
    if number_of_objectives != 1:
        print(
            "the evolutionary optimization does not support multi-objective optimization. Exiting."
        )
        sys.exit()

    fitness_measure = optimization_metrics[0]
    population_size = config["evolution_population_size"]
    generations = config["evolution_generations"]
    mutation_rate = config["mutation_rate"]
    if mutation_rate > len(param_space.get_input_parameters()):
        print("mutation rate cannot be higher than the number of parameters. Exiting.")
        sys.exit()
    if mutation_rate < 1:
        print("mutation rate must be at least 1 for evolution to work. Exiting.")
        sys.exit()
    crossover = config["evolution_crossover"]
    regularize = config["regularize_evolution"]
    batch_size = config["batch_size"]
    if batch_size > population_size:
        print("population_size must be bigger than batch_size. Exiting.")
        sys.exit()
    elif batch_size < 2 and not crossover:
        print("batch_size cannot be smaller than 2. Exiting.")
        sys.exit()
    elif batch_size < 3 and crossover:
        print("batch_size must be at least 3 when using crossover. Exiting.")
        sys.exit()

    log_file = deal_with_relative_and_absolute_path(run_directory, config["log_file"])
    sys.stdout.change_log_file(log_file)
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    if output_file == "":
        output_data_file = config["output_data_file"]
        if output_data_file == "output_samples.csv":
            output_data_file = application_name + "_" + output_data_file
    else:
        output_data_file = output_file

    absolute_configuration_index = 0
    fast_addressing_of_data_array = {}
    evolution_fast_addressing_of_data_array = {}
    evolution_data_array = defaultdict(list)

    beginning_of_time = param_space.current_milli_time()

    optimization_function_parameters = dict()
    optimization_function_parameters["hypermapper_mode"] = hypermapper_mode
    optimization_function_parameters["param_space"] = param_space
    optimization_function_parameters["beginning_of_time"] = beginning_of_time
    optimization_function_parameters["run_directory"] = run_directory
    optimization_function_parameters["black_box_function"] = black_box_function
    optimization_function_parameters["evolution_data_array"] = evolution_data_array
    optimization_function_parameters[
        "fast_addressing_of_data_array"
    ] = evolution_fast_addressing_of_data_array

    print("Starting evolution...")
    evolution_t0 = datetime.datetime.now()
    all_samples = evolution(
        population_size,
        generations,
        mutation_rate,
        crossover,
        regularize,
        batch_size,
        fitness_measure,
        param_space,
        fast_addressing_of_data_array,
        run_objective_function,
        optimization_function_parameters,
        profiling,
    )

    print(
        "Evolution finished after %d function evaluations"
        % (len(evolution_data_array[optimization_metrics[0]]))
    )
    sys.stdout.write_to_logfile(
        (
            "Evolutionary search time %10.4f sec\n"
            % ((datetime.datetime.now() - evolution_t0).total_seconds())
        )
    )

    with open(
        deal_with_relative_and_absolute_path(run_directory, output_data_file), "w"
    ) as f:
        w = csv.writer(f)
        w.writerow(list(evolution_data_array.keys()))
        tmp_list = [
            param_space.convert_types_to_string(j, evolution_data_array)
            for j in list(evolution_data_array.keys())
        ]
        tmp_list = list(zip(*tmp_list))
        for i in range(len(evolution_data_array[optimization_metrics[0]])):
            w.writerow(tmp_list[i])

    print("### End of the evolutionary search")
    return evolution_data_array
Exemplo n.º 4
0
def main(config, black_box_function=None, profiling=None):
    """
    Run design-space exploration using bayesian optimization.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param output_file: a name for the file used to save the dse results.
    """
    start_time = datetime.datetime.now()
    run_directory = config["run_directory"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]

    # Start logging
    log_file = deal_with_relative_and_absolute_path(run_directory, config["log_file"])
    sys.stdout.change_log_file(log_file)
    sys.stdout.set_verbose_mode(config["verbose_logging"])
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    # Log the json configuration for this optimization
    sys.stdout.write_to_logfile(str(config) + "\n")

    # Create parameter space object and unpack hyperparameters from json
    param_space = space.Space(config)
    application_name = config["application_name"]
    optimization_metrics = config["optimization_objectives"]
    optimization_iterations = config["optimization_iterations"]
    evaluations_per_optimization_iteration = config[
        "evaluations_per_optimization_iteration"
    ]
    output_data_file = get_output_data_file(
        config["output_data_file"], run_directory, application_name
    )
    batch_mode = evaluations_per_optimization_iteration > 1
    number_of_cpus = config["number_of_cpus"]
    print_importances = config["print_parameter_importance"]
    epsilon_greedy_threshold = config["epsilon_greedy_threshold"]
    acquisition_function = config["acquisition_function"]
    weight_sampling = config["weight_sampling"]
    scalarization_method = config["scalarization_method"]
    scalarization_key = config["scalarization_key"]
    doe_type = config["design_of_experiment"]["doe_type"]
    number_of_doe_samples = config["design_of_experiment"]["number_of_samples"]
    model_type = config["models"]["model"]
    optimization_method = config["optimization_method"]
    time_budget = config["time_budget"]
    acquisition_function_optimizer = config["acquisition_function_optimizer"]
    if (
        acquisition_function_optimizer == "cma_es"
        and not param_space.is_space_continuous()
    ):
        print(
            "Warning: CMA_ES can only be used with continuous search spaces (i.e. all parameters must be of type 'real')"
        )
        print("Switching acquisition function optimizer to local search")
        acquisition_function_optimizer = "local_search"

    input_params = param_space.get_input_parameters()
    number_of_objectives = len(optimization_metrics)
    objective_limits = {}
    data_array = {}
    fast_addressing_of_data_array = {}
    objective_bounds = None
    exhaustive_search_data_array = None
    normalize_objectives = False
    debug = False

    if "feasible_output" in config:
        feasible_output = config["feasible_output"]
        feasible_output_name = feasible_output["name"]
        enable_feasible_predictor = feasible_output["enable_feasible_predictor"]
        enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[
            "enable_feasible_predictor_grid_search_on_recall_and_precision"
        ]
        feasible_predictor_grid_search_validation_file = feasible_output[
            "feasible_predictor_grid_search_validation_file"
        ]
        feasible_parameter = param_space.get_feasible_parameter()
        number_of_trees = config["models"]["number_of_trees"]

    if weight_sampling == "bounding_box":
        objective_bounds = {}
        user_bounds = config["bounding_box_limits"]
        if len(user_bounds) == 2:
            if user_bounds[0] > user_bounds[1]:
                user_bounds[0], user_bounds[1] = user_bounds[1], user_bounds[0]
            for objective in optimization_metrics:
                objective_bounds[objective] = user_bounds
                objective_limits[objective] = user_bounds
        elif len(user_bounds) == number_of_objectives * 2:
            idx = 0
            for objective in optimization_metrics:
                objective_bounds[objective] = user_bounds[idx : idx + 2]
                if objective_bounds[objective][0] > objective_bounds[objective][1]:
                    objective_bounds[objective][0], objective_bounds[objective][1] = (
                        objective_bounds[objective][1],
                        objective_bounds[objective][0],
                    )
                objective_limits[objective] = objective_bounds[objective]
                idx += 2
        else:
            print(
                "Wrong number of bounding boxes, expected 2 or",
                2 * number_of_objectives,
                "got",
                len(user_bounds),
            )
            raise SystemExit
    else:
        for objective in optimization_metrics:
            objective_limits[objective] = [float("inf"), float("-inf")]

    exhaustive_search_data_array = None
    exhaustive_search_fast_addressing_of_data_array = None
    if hypermapper_mode == "exhaustive":
        exhaustive_file = config["hypermapper_mode"]["exhaustive_search_file"]
        (
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
        ) = param_space.load_data_file(
            exhaustive_file, debug=False, number_of_cpus=number_of_cpus
        )

    # Check if some parameters are correctly defined
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    if (model_type == "gaussian_process") and (acquisition_function == "TS"):
        print(
            "Error: The TS acquisition function with Gaussian Process models is still under implementation"
        )
        print("Using EI acquisition function instead")
        config["acquisition_function"] = "EI"

    if number_of_cpus > 1:
        print(
            "Warning: HyperMapper supports only sequential execution for now. Running on a single cpu."
        )
        number_of_cpus = 1

    # If priors are present, use prior-guided optimization
    user_priors = False
    for input_param in config["input_parameters"]:
        if config["input_parameters"][input_param]["prior"] != "uniform":
            if number_of_objectives == 1:
                user_priors = True
            else:
                print(
                    "Warning: prior optimization does not work with multiple objectives yet, priors will be uniform"
                )
                config["input_parameters"][input_param]["prior"] = "uniform"

    if user_priors:
        bo_method = prior_guided_optimization
    else:
        bo_method = random_scalarizations
        normalize_objectives = True

    ### Resume previous optimization, if any
    beginning_of_time = param_space.current_milli_time()
    absolute_configuration_index = 0
    doe_t0 = datetime.datetime.now()
    if config["resume_optimization"] == True:
        resume_data_file = config["resume_optimization_data"]

        if not resume_data_file.endswith(".csv"):
            print("Error: resume data file must be a CSV")
            raise SystemExit
        if resume_data_file == "output_samples.csv":
            resume_data_file = application_name + "_" + resume_data_file

        data_array, fast_addressing_of_data_array = param_space.load_data_file(
            resume_data_file, debug=False, number_of_cpus=number_of_cpus
        )
        absolute_configuration_index = len(
            data_array[list(data_array.keys())[0]]
        )  # get the number of points evaluated in the previous run
        beginning_of_time = (
            beginning_of_time - data_array[param_space.get_timestamp_parameter()[0]][-1]
        )  # Set the timestamp back to match the previous run
        print(
            "Resumed optimization, number of samples = %d ......."
            % absolute_configuration_index
        )

    create_output_data_file(
        output_data_file, param_space.get_input_output_and_timestamp_parameters()
    )
    if data_array:  # if it is not empty
        write_data_array(param_space, data_array, output_data_file)
    ### DoE phase
    if absolute_configuration_index < number_of_doe_samples:
        configurations = []
        default_configuration = param_space.get_default_or_random_configuration()
        str_data = param_space.get_unique_hash_string_from_values(default_configuration)
        if str_data not in fast_addressing_of_data_array:
            fast_addressing_of_data_array[str_data] = absolute_configuration_index
            configurations.append(default_configuration)
            absolute_configuration_index += 1

        doe_configurations = []
        if absolute_configuration_index < number_of_doe_samples:
            doe_configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array,
                number_of_doe_samples - absolute_configuration_index,
                doe_type,
            )
        configurations += doe_configurations
        print(
            "Design of experiment phase, number of new doe samples = %d ......."
            % len(configurations)
        )

        doe_data_array = param_space.run_configurations(
            hypermapper_mode,
            configurations,
            beginning_of_time,
            output_data_file,
            black_box_function,
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
            run_directory,
            batch_mode=batch_mode,
        )
        data_array = concatenate_data_dictionaries(
            data_array,
            doe_data_array,
            param_space.input_output_and_timestamp_parameter_names,
        )
        absolute_configuration_index = number_of_doe_samples
        iteration_number = 1
    else:
        iteration_number = absolute_configuration_index - number_of_doe_samples + 1

    # If we have feasibility constraints, we must ensure we have at least one feasible and one infeasible sample before starting optimization
    # If this is not true, continue design of experiment until the condition is met
    if enable_feasible_predictor:
        while (
            are_all_elements_equal(data_array[feasible_parameter[0]])
            and optimization_iterations > 0
        ):
            print(
                "Warning: all points are either valid or invalid, random sampling more configurations."
            )
            print("Number of doe samples so far:", absolute_configuration_index)
            configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array, 1, "random sampling"
            )
            new_data_array = param_space.run_configurations(
                hypermapper_mode,
                configurations,
                beginning_of_time,
                output_data_file,
                black_box_function,
                exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array,
                run_directory,
                batch_mode=batch_mode,
            )
            data_array = concatenate_data_dictionaries(
                new_data_array,
                data_array,
                param_space.input_output_and_timestamp_parameter_names,
            )
            absolute_configuration_index += 1
            optimization_iterations -= 1

    for objective in optimization_metrics:
        lower_bound = min(objective_limits[objective][0], min(data_array[objective]))
        upper_bound = max(objective_limits[objective][1], max(data_array[objective]))
        objective_limits[objective] = [lower_bound, upper_bound]
    print(
        "\nEnd of doe/resume phase, the number of evaluated configurations is: %d\n"
        % absolute_configuration_index
    )
    sys.stdout.write_to_logfile(
        (
            "End of DoE - Time %10.4f sec\n"
            % ((datetime.datetime.now() - doe_t0).total_seconds())
        )
    )
    if doe_type == "grid_search" and optimization_iterations > 0:
        print(
            "Warning: DoE is grid search, setting number of optimization iterations to 0"
        )
        optimization_iterations = 0

    ### Main optimization loop
    bo_t0 = datetime.datetime.now()
    run_time = (datetime.datetime.now() - start_time).total_seconds() / 60
    # run_time / time_budget < 1 if budget > elapsed time or budget == -1
    if time_budget > 0:
        print(
            "starting optimization phase, limited to run for ", time_budget, " minutes"
        )
    elif time_budget == 0:
        print("Time budget cannot be zero. To not limit runtime set time_budget = -1")
        sys.exit()

    configurations = []
    evaluation_budget = optimization_iterations * evaluations_per_optimization_iteration
    iteration_number = 0
    evaluation_count = 0
    while evaluation_count < evaluation_budget and run_time / time_budget < 1:
        if evaluation_count % evaluations_per_optimization_iteration == 0:
            iteration_number += 1
            print("Starting optimization iteration", iteration_number)
            iteration_t0 = datetime.datetime.now()

        model_t0 = datetime.datetime.now()
        regression_models, _, _ = models.generate_mono_output_regression_models(
            data_array,
            param_space,
            input_params,
            optimization_metrics,
            1.00,
            config,
            model_type=model_type,
            number_of_cpus=number_of_cpus,
            print_importances=print_importances,
            normalize_objectives=normalize_objectives,
            objective_limits=objective_limits,
        )

        classification_model = None
        if enable_feasible_predictor:
            classification_model, _, _ = models.generate_classification_model(
                application_name,
                param_space,
                data_array,
                input_params,
                feasible_parameter,
                1.00,
                config,
                debug,
                number_of_cpus=number_of_cpus,
                data_array_exhaustive=exhaustive_search_data_array,
                enable_feasible_predictor_grid_search_on_recall_and_precision=enable_feasible_predictor_grid_search_on_recall_and_precision,
                feasible_predictor_grid_search_validation_file=feasible_predictor_grid_search_validation_file,
                print_importances=print_importances,
            )
        model_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Model fitting time %10.4f sec\n"
                % ((model_t1 - model_t0).total_seconds())
            )
        )
        if weight_sampling == "bounding_box":
            objective_weights = sample_weight_bbox(
                optimization_metrics, objective_bounds, objective_limits, 1
            )[0]
        elif weight_sampling == "flat":
            objective_weights = sample_weight_flat(optimization_metrics, 1)[0]
        else:
            print("Error: unrecognized option:", weight_sampling)
            raise SystemExit

        data_array_scalarization, _ = compute_data_array_scalarization(
            data_array, objective_weights, objective_limits, scalarization_method
        )
        data_array[scalarization_key] = data_array_scalarization.tolist()

        epsilon = random.uniform(0, 1)
        local_search_t0 = datetime.datetime.now()
        if epsilon > epsilon_greedy_threshold:
            best_configuration = bo_method(
                config,
                data_array,
                param_space,
                fast_addressing_of_data_array,
                regression_models,
                iteration_number,
                objective_weights,
                objective_limits,
                classification_model,
                profiling,
                acquisition_function_optimizer,
            )

        else:
            sys.stdout.write_to_logfile(
                str(epsilon)
                + " < "
                + str(epsilon_greedy_threshold)
                + " random sampling a configuration to run\n"
            )
            tmp_fast_addressing_of_data_array = copy.deepcopy(
                fast_addressing_of_data_array
            )
            best_configuration = (
                param_space.random_sample_configurations_without_repetitions(
                    tmp_fast_addressing_of_data_array, 1, use_priors=False
                )[0]
            )
        local_search_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Local search time %10.4f sec\n"
                % ((local_search_t1 - local_search_t0).total_seconds())
            )
        )

        configurations.append(best_configuration)

        # When we have selected "evaluations_per_optimization_iteration" configurations, evaluate the batch
        if evaluation_count % evaluations_per_optimization_iteration == (
            evaluations_per_optimization_iteration - 1
        ):
            black_box_function_t0 = datetime.datetime.now()
            new_data_array = param_space.run_configurations(
                hypermapper_mode,
                configurations,
                beginning_of_time,
                output_data_file,
                black_box_function,
                exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array,
                run_directory,
                batch_mode=batch_mode,
            )
            black_box_function_t1 = datetime.datetime.now()
            sys.stdout.write_to_logfile(
                (
                    "Black box function time %10.4f sec\n"
                    % ((black_box_function_t1 - black_box_function_t0).total_seconds())
                )
            )

            # If running batch BO, we will have some liars in fast_addressing_of_data, update them with the true value
            for configuration_idx in range(
                len(new_data_array[list(new_data_array.keys())[0]])
            ):
                configuration = get_single_configuration(
                    new_data_array, configuration_idx
                )
                str_data = param_space.get_unique_hash_string_from_values(configuration)
                if str_data in fast_addressing_of_data_array:
                    absolute_index = fast_addressing_of_data_array[str_data]
                    for header in configuration:
                        data_array[header][absolute_index] = configuration[header]
                else:
                    fast_addressing_of_data_array[
                        str_data
                    ] = absolute_configuration_index
                    absolute_configuration_index += 1
                    for header in configuration:
                        data_array[header].append(configuration[header])

            configurations = []
        else:
            # If we have not selected all points in the batch yet, add the model prediction as a 'liar'
            for header in best_configuration:
                data_array[header].append(best_configuration[header])

            bufferx = [tuple(best_configuration.values())]
            prediction_means, _ = models.compute_model_mean_and_uncertainty(
                bufferx, regression_models, model_type, param_space
            )
            for objective in prediction_means:
                data_array[objective].append(prediction_means[objective][0])

            if classification_model is not None:
                classification_prediction_results = models.model_probabilities(
                    bufferx, classification_model, param_space
                )
                true_value_index = (
                    classification_model[feasible_parameter[0]]
                    .classes_.tolist()
                    .index(True)
                )
                feasibility_indicator = classification_prediction_results[
                    feasible_parameter[0]
                ][:, true_value_index]
                data_array[feasible_output_name].append(
                    True if feasibility_indicator[0] >= 0.5 else False
                )

            data_array[param_space.get_timestamp_parameter()[0]].append(
                absolute_configuration_index
            )
            str_data = param_space.get_unique_hash_string_from_values(
                best_configuration
            )
            fast_addressing_of_data_array[str_data] = absolute_configuration_index
            absolute_configuration_index += 1

        for objective in optimization_metrics:
            lower_bound = min(
                objective_limits[objective][0], min(data_array[objective])
            )
            upper_bound = max(
                objective_limits[objective][1], max(data_array[objective])
            )
            objective_limits[objective] = [lower_bound, upper_bound]

        evaluation_count += 1
        run_time = (datetime.datetime.now() - start_time).total_seconds() / 60
        iteration_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Total iteration time %10.4f sec\n"
                % ((iteration_t1 - iteration_t0).total_seconds())
            )
        )

        if profiling is not None:
            profiling.add("Model fitting time", (model_t1 - model_t0).total_seconds())
            # local search profiling is done inside of local search
            profiling.add(
                "Black box function time",
                (black_box_function_t1 - black_box_function_t0).total_seconds(),
            )

    sys.stdout.write_to_logfile(
        (
            "End of BO phase - Time %10.4f sec\n"
            % ((datetime.datetime.now() - bo_t0).total_seconds())
        )
    )

    print("End of Bayesian Optimization")

    print_posterior_best = config["print_posterior_best"]
    if print_posterior_best:
        if number_of_objectives > 1:
            print(
                "Warning: print_posterior_best is set to true, but application is not mono-objective."
            )
            print(
                "Can only compute best according to posterior for mono-objective applications. Ignoring."
            )
        elif enable_feasible_predictor:
            print(
                "Warning: print_posterior_best is set to true, but application has feasibility constraints."
            )
            print(
                "Cannot compute best according to posterior for applications with feasibility constraints. Ignoring."
            )
        else:
            # Update model with latest data
            regression_models, _, _ = models.generate_mono_output_regression_models(
                data_array,
                param_space,
                input_params,
                optimization_metrics,
                1.00,
                config,
                model_type=model_type,
                number_of_cpus=number_of_cpus,
                print_importances=print_importances,
                normalize_objectives=normalize_objectives,
                objective_limits=objective_limits,
            )

            best_point = models.minimize_posterior_mean(
                regression_models,
                config,
                param_space,
                data_array,
                objective_limits,
                normalize_objectives,
                profiling,
            )
            keys = ""
            best_point_string = ""
            for key in best_point:
                keys += f"{key},"
                best_point_string += f"{best_point[key]},"
            keys = keys[:-1]
            best_point_string = best_point_string[:-1]

            sys.stdout.write_protocol("Minimum of the posterior mean:\n")
            sys.stdout.write_protocol(f"{keys}\n")
            sys.stdout.write_protocol(f"{best_point_string}\n\n")

    sys.stdout.write_to_logfile(
        (
            "Total script time %10.2f sec\n"
            % ((datetime.datetime.now() - start_time).total_seconds())
        )
    )

    return data_array
Exemplo n.º 5
0
def main(config, black_box_function=None, profiling=None):
    """
    Run design-space exploration using random scalarizations.
    :param config: dictionary containing all the configuration parameters of this design-space exploration.
    :param output_file: a name for the file used to save the dse results.
    :return:
    """
    param_space = space.Space(config)

    run_directory = config["run_directory"]
    application_name = config["application_name"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    noise = config["noise"]
    output_data_file = get_output_data_file(config["output_data_file"],
                                            run_directory, application_name)
    optimization_metrics = config["optimization_objectives"]
    number_of_objectives = len(optimization_metrics)
    # local search will not produce reasonable output if run in parallel - it is therefore disabled
    number_of_cpus = 1
    local_search_random_points = config["local_search_random_points"]
    local_search_evaluation_limit = config["local_search_evaluation_limit"]
    if local_search_evaluation_limit == -1:
        local_search_evaluation_limit = float("inf")
    scalarization_key = config["scalarization_key"]
    scalarization_method = config["scalarization_method"]
    scalarization_weights = config["local_search_scalarization_weights"]
    if len(scalarization_weights) < len(optimization_metrics):
        print(
            "Error: not enough scalarization weights. Received",
            len(scalarization_weights),
            "expected",
            len(optimization_metrics),
        )
        raise SystemExit
    if sum(scalarization_weights) != 1:
        sys.stdout.write_to_logfile(
            "Weights must sum 1. Normalizing weights.\n")
        for idx in range(len(scalarization_weights)):
            scalarization_weights[idx] = scalarization_weights[idx] / sum(
                scalarization_weights)
        sys.stdout.write_to_logfile("New weights:" +
                                    str(scalarization_weights) + "\n")
    objective_weights = {}
    objective_limits = {}
    for idx, objective in enumerate(optimization_metrics):
        objective_weights[objective] = scalarization_weights[idx]
        objective_limits[objective] = [float("inf"), float("-inf")]

    exhaustive_search_data_array = None
    exhaustive_search_fast_addressing_of_data_array = None
    if hypermapper_mode == "exhaustive":
        exhaustive_file = config["hypermapper_mode"]["exhaustive_search_file"]
        print("Exhaustive mode, loading data from %s ..." % exhaustive_file)
        (
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
        ) = param_space.load_data_file(exhaustive_file,
                                       debug=False,
                                       number_of_cpus=number_of_cpus)

    enable_feasible_predictor = False
    if "feasible_output" in config:
        feasible_output = config["feasible_output"]
        feasible_output_name = feasible_output["name"]
        enable_feasible_predictor = feasible_output[
            "enable_feasible_predictor"]
        enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[
            "enable_feasible_predictor_grid_search_on_recall_and_precision"]
        feasible_predictor_grid_search_validation_file = feasible_output[
            "feasible_predictor_grid_search_validation_file"]
        feasible_parameter = param_space.get_feasible_parameter()

    local_search_starting_points = config["local_search_starting_points"]

    debug = False

    log_file = deal_with_relative_and_absolute_path(run_directory,
                                                    config["log_file"])
    sys.stdout.change_log_file(log_file)
    sys.stdout.set_verbose_mode(config["verbose_logging"])
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    absolute_configuration_index = 0
    fast_addressing_of_data_array = {}
    local_search_fast_addressing_of_data_array = {}
    local_search_data_array = defaultdict(list)

    beginning_of_time = param_space.current_milli_time()

    optimization_function_parameters = {}
    optimization_function_parameters["hypermapper_mode"] = hypermapper_mode
    optimization_function_parameters["param_space"] = param_space
    optimization_function_parameters["beginning_of_time"] = beginning_of_time
    optimization_function_parameters["run_directory"] = run_directory
    optimization_function_parameters["output_data_file"] = output_data_file
    optimization_function_parameters[
        "exhaustive_search_data_array"] = exhaustive_search_data_array
    optimization_function_parameters[
        "exhaustive_search_fast_addressing_of_data_array"] = exhaustive_search_fast_addressing_of_data_array
    optimization_function_parameters["black_box_function"] = black_box_function
    optimization_function_parameters["number_of_cpus"] = number_of_cpus
    optimization_function_parameters[
        "local_search_data_array"] = local_search_data_array
    optimization_function_parameters[
        "fast_addressing_of_data_array"] = local_search_fast_addressing_of_data_array
    optimization_function_parameters[
        "evaluation_limit"] = local_search_evaluation_limit
    optimization_function_parameters[
        "scalarization_weights"] = objective_weights
    optimization_function_parameters["objective_limits"] = objective_limits
    optimization_function_parameters[
        "scalarization_method"] = scalarization_method
    optimization_function_parameters[
        "enable_feasible_predictor"] = enable_feasible_predictor

    create_output_data_file(
        output_data_file,
        param_space.get_input_output_and_timestamp_parameters())

    print("Starting local search...")
    local_search_t0 = datetime.datetime.now()
    all_samples, best_configuration = local_search(
        local_search_starting_points,
        local_search_random_points,
        param_space,
        fast_addressing_of_data_array,
        enable_feasible_predictor,
        run_objective_function,
        optimization_function_parameters,
        scalarization_key,
        number_of_cpus,
        profiling=profiling,
        noise=noise,
    )

    print("Local search finished after %d function evaluations" %
          (len(local_search_data_array[optimization_metrics[0]])))

    print("### End of the local search.")
    return local_search_data_array
Exemplo n.º 6
0
def compute(
    parameters_file="example_scenarios/spatial/BlackScholes_scenario.json",
    input_data_file=None,
    output_pareto_file=None,
):
    """
    Compute Pareto from the csv data files specified in the json output_pareto_file field.
    :param parameters_file: the json file the specify all the HyperMapper input parameters.
    :return: the csv file is written on disk.
    """
    try:
        hypermapper_pwd = os.environ["PWD"]
        hypermapper_home = os.environ["HYPERMAPPER_HOME"]
        os.chdir(hypermapper_home)
    except:
        hypermapper_pwd = "."

    print("######## compute_pareto.py #####################")
    print("### Parameters file is %s" % parameters_file)
    sys.stdout.flush()

    filename, file_extension = os.path.splitext(parameters_file)
    if file_extension != ".json":
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension)
        exit(1)
    with open(parameters_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    DefaultValidatingDraft4Validator(schema).validate(config)

    application_name = config["application_name"]
    max_number_of_predictions = config["max_number_of_predictions"]
    optimization_metrics = config["optimization_objectives"]
    number_of_cpus = config["number_of_cpus"]
    run_directory = config["run_directory"]
    if run_directory == ".":
        run_directory = hypermapper_pwd
        config["run_directory"] = run_directory
    if input_data_file is None:
        input_data_file = config["output_data_file"]
        if input_data_file == "output_samples.csv":
            input_data_file = application_name + "_" + input_data_file
    input_data_file = deal_with_relative_and_absolute_path(
        run_directory, input_data_file)
    if output_pareto_file is None:
        output_pareto_file = config["output_pareto_file"]
        if output_pareto_file == "output_pareto.csv":
            output_pareto_file = application_name + "_" + output_pareto_file
    output_pareto_file = deal_with_relative_and_absolute_path(
        run_directory, output_pareto_file)

    param_space = space.Space(config)
    print("### The input data file is %s" % input_data_file)
    print("### The output Pareto file is %s" % output_pareto_file)
    print("################################################")
    debug = False

    print("Computing the Pareto...")
    start_time = datetime.datetime.now()
    # Compute Pareto and save it to output_pareto_file
    with open(input_data_file, "r") as f_csv_file_data_array:
        count_number_of_points_in_Pareto = compute_pareto(
            param_space, input_data_file, output_pareto_file, debug,
            number_of_cpus)
    end_time = datetime.datetime.now()
    print(
        ("Total time of computation is (read and Pareto computation): " + str(
            (end_time - start_time).total_seconds()) + " seconds"))
    print(("The total size of the Pareto (RS + AL) is: %d" %
           count_number_of_points_in_Pareto))
    sys.stdout.flush()
    print("End of the compute_pareto.py script!\n")
def plot_regret(
    configuration_file,
    data_dirs,
    labels=None,
    minimum=0,
    outfile=None,
    title=None,
    plot_log=False,
    unlog_y_axis=False,
    budget=None,
    out_dir=None,
    ncol=4,
    x_label=None,
    y_label=None,
    show_doe=True,
    expert_configuration=None,
):

    # Read json configuration file
    if not configuration_file.endswith(".json"):
        _, file_extension = splitext(configuration_file)
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension)
        raise SystemExit
    with open(configuration_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    try:
        DefaultValidatingDraft4Validator(schema).validate(config)
    except exceptions.ValidationError as ve:
        print("Failed to validate json:")
        print(ve)
        raise SystemExit

    param_space = space.Space(config)
    output_metric = param_space.get_optimization_parameters()[
        0]  # only works for mono-objective
    doe_size = config["design_of_experiment"]["number_of_samples"]
    feasibility_flag = param_space.get_feasible_parameter()[
        0]  # returns a list, we just want the name

    best = 0
    if minimum is not None:
        best = minimum
    application_name = config["application_name"]

    if budget is None:
        budget = float("inf")

    regrets = {}
    log_regrets = {}
    total_evaluations = {}
    max_iters = float("-inf")
    for data_dir_idx, data_dir in enumerate(data_dirs):
        dir_regrets = []
        dir_log_regrets = []
        min_dir_iters = budget
        for file in listdir(data_dir):
            if not file.endswith(".csv"):
                print("Skipping:", file)
                continue
            full_file = join(data_dir, file)
            data_array = load_data(full_file)
            total_iters = min(len(data_array[output_metric]), budget)
            min_dir_iters = min(total_iters, min_dir_iters)
            max_iters = max(max_iters, total_iters)
            evaluations = list(range(total_iters))
            simple_regret = []
            log_regret = []
            incumbent = float("inf")
            for idx in evaluations:
                if feasibility_flag is not None:
                    if data_array[feasibility_flag][idx] == True:
                        incumbent = min(incumbent,
                                        data_array[output_metric][idx])
                else:
                    incumbent = min(incumbent, data_array[output_metric][idx])
                regret = incumbent - best
                simple_regret.append(regret)
                log_regret.append(np.log(regret))
            dir_regrets.append(np.array(simple_regret))
            dir_log_regrets.append(np.array(log_regret))

        for idx in range(len(dir_regrets)):
            dir_regrets[idx] = dir_regrets[idx][:min_dir_iters]
            dir_log_regrets[idx] = dir_log_regrets[idx][:min_dir_iters]

        regrets[data_dir] = np.array(dir_regrets)
        log_regrets[data_dir] = np.array(dir_log_regrets)
        total_evaluations[data_dir] = list(range(min_dir_iters))

    mpl.rcParams.update({"font.size": 40})
    plt.rcParams["figure.figsize"] = [16, 12]
    linewidth = 6
    fig, ax = plt.subplots()
    colors = [
        "blue",
        "green",
        "red",
        "magenta",
        "yellow",
        "purple",
        "orange",
        "cyan",
        "gray",
    ]
    legend_elements = []
    if expert_configuration is not None:
        if plot_log:
            expert_configuration = np.log(expert_configuration)
        expert_data = [expert_configuration] * max_iters
        plt.plot(
            list(range(max_iters)),
            expert_data,
            color="black",
            linewidth=linewidth,
            linestyle="solid",
        )

    for key_idx, key in enumerate(regrets.keys()):
        std = np.std(regrets[key], axis=0, ddof=1)
        log_std = np.std(log_regrets[key], axis=0, ddof=1)
        simple_means = np.mean(regrets[key], axis=0)
        log_means = np.log(simple_means)
        lower_bound = []
        upper_bound = []
        plot_means = simple_means
        plot_stds = std
        if plot_log:
            plot_means = log_means
            plot_stds = log_std

        for idx in range(plot_stds.shape[0]):
            lower_bound.append(plot_means[idx] - plot_stds[idx])
            upper_bound.append(plot_means[idx] + plot_stds[idx])

        next_color = colors[key_idx % len(colors)]
        plt.plot(total_evaluations[key],
                 plot_means,
                 color=next_color,
                 linewidth=linewidth)
        plt.fill_between(
            total_evaluations[key],
            lower_bound,
            upper_bound,
            color=next_color,
            alpha=0.2,
        )

        if labels is None:
            label = key
        else:
            label = labels[key_idx]

        legend_elements.append(
            Line2D([0], [0],
                   color=next_color,
                   label=label,
                   linewidth=linewidth))

    if expert_configuration is not None:
        legend_elements.append(
            Line2D(
                [0],
                [0],
                color="black",
                linewidth=linewidth,
                linestyle="solid",
                label="Expert Configuration",
            ))

    if plot_log and unlog_y_axis:
        locs, plt_labels = plt.yticks()
        plt_labels = [np.exp(float(item)) for item in locs]
        plt_labels = ["{0:,.2f}\n".format(item) for item in plt_labels]
        plt.yticks(locs, plt_labels)

    if show_doe:
        legend_elements.append(
            Line2D(
                [0],
                [0],
                color="black",
                linewidth=linewidth,
                linestyle="dashed",
                label="Initialization",
            ))
        plt.axvline(x=doe_size,
                    color="black",
                    linewidth=linewidth,
                    linestyle="dashed")

    rows = np.ceil(len(legend_elements) / ncol)
    height = 1 + (0.03) * rows
    plt.legend(
        handles=legend_elements,
        loc="center",
        bbox_to_anchor=(0.5, height),
        fancybox=True,
        shadow=True,
        ncol=ncol,
        bbox_transform=plt.gcf().transFigure,
    )

    if x_label is None:
        x_label = "Number of Evaluations"
    if y_label is None:
        if plot_log:
            y_label = "Log Regret"
        else:
            y_label = "Regret"
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    if title is None:
        title = config["application_name"]
    plt.title(title, y=1)

    plt.xlim(1, )

    if out_dir != "":
        if not out_dir.endswith("/"):
            out_dir += "/"
        os.makedirs(out_dir, exist_ok=True)
    if outfile is None:
        outfile = out_dir + application_name + "_regret.pdf"
    plt.savefig(outfile, bbox_inches="tight", dpi=300)
    plt.gcf().clear()

    return legend_elements
Exemplo n.º 8
0
def plot(parameters_file, list_of_pairs_of_files=[], image_output_file=None):
    """
    Plot the results of the previously run design space exploration.
    """
    try:
        hypermapper_pwd = os.environ["PWD"]
        hypermapper_home = os.environ["HYPERMAPPER_HOME"]
        os.chdir(hypermapper_home)
    except:
        hypermapper_home = "."
        hypermapper_pwd = "."
    show_samples = False

    filename, file_extension = os.path.splitext(parameters_file)
    if file_extension != ".json":
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension
        )
        exit(1)
    with open(parameters_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    DefaultValidatingDraft4Validator(schema).validate(config)

    application_name = config["application_name"]
    optimization_metrics = config["optimization_objectives"]
    feasible_output = config["feasible_output"]
    feasible_output_name = feasible_output["name"]
    run_directory = config["run_directory"]
    if run_directory == ".":
        run_directory = hypermapper_pwd
        config["run_directory"] = run_directory

    xlog = config["output_image"]["image_xlog"]
    ylog = config["output_image"]["image_ylog"]

    if "optimization_objectives_labels_image_pdf" in config["output_image"]:
        optimization_objectives_labels_image_pdf = config["output_image"][
            "optimization_objectives_labels_image_pdf"
        ]
    else:
        optimization_objectives_labels_image_pdf = optimization_metrics

    # Only consider the files in the json file if there are no input files.
    if list_of_pairs_of_files == []:
        output_pareto_file = config["output_pareto_file"]
        if output_pareto_file == "output_pareto.csv":
            output_pareto_file = application_name + "_" + output_pareto_file
        output_data_file = config["output_data_file"]
        if output_data_file == "output_samples.csv":
            output_data_file = application_name + "_" + output_data_file
        list_of_pairs_of_files.append(
            (
                deal_with_relative_and_absolute_path(run_directory, output_pareto_file),
                deal_with_relative_and_absolute_path(run_directory, output_data_file),
            )
        )
    else:
        for idx, (output_pareto_file, output_data_file) in enumerate(
            list_of_pairs_of_files
        ):
            list_of_pairs_of_files[idx] = (
                deal_with_relative_and_absolute_path(run_directory, output_pareto_file),
                deal_with_relative_and_absolute_path(run_directory, output_data_file),
            )

    if image_output_file != None:
        output_image_pdf_file = image_output_file
        output_image_pdf_file = deal_with_relative_and_absolute_path(
            run_directory, output_image_pdf_file
        )
        filename = os.path.basename(output_image_pdf_file)
        path = os.path.dirname(output_image_pdf_file)
        if path == "":
            output_image_pdf_file_with_all_samples = "all_" + filename
        else:
            output_image_pdf_file_with_all_samples = path + "/" + "all_" + filename
    else:
        tmp_file_name = config["output_image"]["output_image_pdf_file"]
        if tmp_file_name == "output_pareto.pdf":
            tmp_file_name = application_name + "_" + tmp_file_name
        output_image_pdf_file = deal_with_relative_and_absolute_path(
            run_directory, tmp_file_name
        )
        filename = os.path.basename(output_image_pdf_file)
        path = os.path.dirname(output_image_pdf_file)
        if path == "":
            output_image_pdf_file_with_all_samples = "all_" + filename
        else:
            output_image_pdf_file_with_all_samples = path + "/" + "all_" + filename

    str_files = ""
    for e in list_of_pairs_of_files:
        str_files += str(e[0] + " " + e[1] + " ")

    print("######### plot_pareto.py ##########################")
    print("### Parameters file is %s" % parameters_file)
    print("### The Pareto and DSE data files are: %s" % str_files)
    print("### The first output pdf image is %s" % output_image_pdf_file)
    print(
        "### The second output pdf image is %s" % output_image_pdf_file_with_all_samples
    )
    print("################################################")

    param_space = space.Space(config)

    xelem = optimization_metrics[0]
    yelem = optimization_metrics[1]
    handler_map_for_legend = {}
    xlabel = optimization_objectives_labels_image_pdf[0]
    ylabel = optimization_objectives_labels_image_pdf[1]

    x_max = float("-inf")
    x_min = float("inf")
    y_max = float("-inf")
    y_min = float("inf")

    print_legend = True
    fig = plt.figure()
    ax1 = plt.subplot(1, 1, 1)

    if xlog:
        ax1.set_xscale("log")
    if ylog:
        ax1.set_yscale("log")

    objective_1_max = objective_2_max = 1
    objective_1_is_percentage = objective_2_is_percentage = False
    if "objective_1_max" in config["output_image"]:
        objective_1_max = config["output_image"]["objective_1_max"]
        objective_1_is_percentage = True
    if "objective_2_max" in config["output_image"]:
        objective_2_max = config["output_image"]["objective_2_max"]
        objective_2_is_percentage = True

    input_data_array = {}
    fast_addressing_of_data_array = {}
    non_valid_optimization_obj_1 = defaultdict(list)
    non_valid_optimization_obj_2 = defaultdict(list)

    for (
        file_pair
    ) in (
        list_of_pairs_of_files
    ):  # file_pair is tuple containing: (pareto file, DSE file)
        next_color = get_next_color()

        #############################################################################
        ###### Load data from files and do preprocessing on the data before plotting.
        #############################################################################
        for file in file_pair:
            print(("Loading data from %s ..." % file))
            (
                input_data_array[file],
                fast_addressing_of_data_array[file],
            ) = param_space.load_data_file(file, debug)
            if input_data_array[file] == None:
                print("Error: no data found in input data file: %s. \n" % file_pair[1])
                exit(1)
            if (xelem not in input_data_array[file]) or (
                yelem not in input_data_array[file]
            ):
                print(
                    "Error: the optimization variables have not been found in input data file %s. \n"
                    % file
                )
                exit(1)
            print(("Parameters are " + str(list(input_data_array[file].keys())) + "\n"))
            input_data_array[file][xelem] = [
                float(input_data_array[file][xelem][i]) / objective_1_max
                for i in range(len(input_data_array[file][xelem]))
            ]
            input_data_array[file][yelem] = [
                float(input_data_array[file][yelem][i]) / objective_2_max
                for i in range(len(input_data_array[file][yelem]))
            ]

            if objective_1_is_percentage:
                input_data_array[file][xelem] = [
                    input_data_array[file][xelem][i] * 100
                    for i in range(len(input_data_array[file][xelem]))
                ]
            if objective_2_is_percentage:
                input_data_array[file][yelem] = [
                    input_data_array[file][yelem][i] * 100
                    for i in range(len(input_data_array[file][yelem]))
                ]

            x_max, x_min, y_max, y_min = compute_min_max_samples(
                input_data_array[file], x_max, x_min, xelem, y_max, y_min, yelem
            )

            input_data_array_size = len(
                input_data_array[file][list(input_data_array[file].keys())[0]]
            )
            print("Size of the data file %s is %d" % (file, input_data_array_size))

        file_pareto = file_pair[0]  # This is the Pareto file
        file_search = file_pair[1]  # This is the DSE file

        ######################################################################################################
        ###### Compute invalid samples to be plot in a different color (and remove them from the data arrays).
        ######################################################################################################
        if show_samples:
            i = 0
            for ind in range(len(input_data_array[file][yelem])):
                if input_data_array[file][feasible_output_name][i] == False:
                    non_valid_optimization_obj_2[file_search].append(
                        input_data_array[file][yelem][i]
                    )
                    non_valid_optimization_obj_1[file_search].append(
                        input_data_array[file][xelem][i]
                    )
                    for key in list(input_data_array[file].keys()):
                        del input_data_array[file][key][i]
                else:
                    i += 1

            label_is = get_last_dir_and_file_names(file_pareto)
            (all_samples,) = plt.plot(
                input_data_array[file_search][xelem],
                input_data_array[file_search][yelem],
                color=next_color,
                linestyle="None",
                marker=".",
                mew=0.5,
                markersize=3,
                fillstyle="none",
                label=label_is,
            )
            plt.plot(
                input_data_array[file_pareto][xelem],
                input_data_array[file_pareto][yelem],
                linestyle="None",
                marker=".",
                mew=0.5,
                markersize=3,
                fillstyle="none",
            )
            handler_map_for_legend[all_samples] = HandlerLine2D(numpoints=1)

        ################################################################################################################
        ##### Create a straight Pareto plot: we need to add one point for each point of the data in paretoX and paretoY.
        ##### We also need to reorder the points on the x axis first.
        ################################################################################################################
        straight_pareto_x = list()
        straight_pareto_y = list()
        if len(input_data_array[file_pareto][xelem]) != 0:
            data_array_pareto_x, data_array_pareto_y = (
                list(t)
                for t in zip(
                    *sorted(
                        zip(
                            input_data_array[file_pareto][xelem],
                            input_data_array[file_pareto][yelem],
                        )
                    )
                )
            )
            for j in range(len(data_array_pareto_x)):
                straight_pareto_x.append(data_array_pareto_x[j])
                straight_pareto_x.append(data_array_pareto_x[j])
                straight_pareto_y.append(data_array_pareto_y[j])
                straight_pareto_y.append(data_array_pareto_y[j])
            straight_pareto_x.append(x_max)  # Just insert the max on the x axis
            straight_pareto_y.insert(0, y_max)  # Just insert the max on the y axis

        label_is = "Pareto - " + get_last_dir_and_file_names(file_pareto)

        (pareto_front,) = plt.plot(
            straight_pareto_x,
            straight_pareto_y,
            label=label_is,
            linewidth=1,
            color=next_color,
        )
        handler_map_for_legend[pareto_front] = HandlerLine2D(numpoints=1)

        label_is = "Invalid Samples - " + get_last_dir_and_file_names(file_search)
        if show_samples:
            (non_valid,) = plt.plot(
                non_valid_optimization_obj_1[file_search],
                non_valid_optimization_obj_2[file_search],
                linestyle="None",
                marker=".",
                mew=0.5,
                markersize=3,
                fillstyle="none",
                label=label_is,
            )
            handler_map_for_legend[non_valid] = HandlerLine2D(numpoints=1)

    plt.ylabel(ylabel, fontsize=16)
    plt.xlabel(xlabel, fontsize=16)
    for tick in ax1.xaxis.get_major_ticks():
        tick.label.set_fontsize(
            14
        )  # Set the fontsize of the label on the ticks of the x axis
    for tick in ax1.yaxis.get_major_ticks():
        tick.label.set_fontsize(
            14
        )  # Set the fontsize of the label on the ticks of the y axis

    # Add the legend with some customizations
    if print_legend:
        lgd = ax1.legend(
            handler_map=handler_map_for_legend,
            loc="best",
            bbox_to_anchor=(1, 1),
            fancybox=True,
            shadow=True,
            ncol=1,
            prop={"size": 14},
        )  # Display legend.

    font = {"size": 16}
    matplotlib.rc("font", **font)

    fig.savefig(output_image_pdf_file_with_all_samples, dpi=120, bbox_inches="tight")

    if objective_1_is_percentage:
        plt.xlim(0, 100)
    if objective_2_is_percentage:
        plt.ylim(0, 100)

    fig.savefig(output_image_pdf_file, dpi=120, bbox_inches="tight")