示例#1
0
def _generate_slice_subplot(
    trials: List[FrozenTrial],
    param: str,
    ax: "Axes",
    cmap: "Colormap",
    padding_ratio: float,
    obj_values: List[Union[int, float]],
) -> "PathCollection":
    x_values = []
    y_values = []
    trial_numbers = []
    scale = None
    for t, obj_v in zip(trials, obj_values):
        if param in t.params:
            x_values.append(t.params[param])
            y_values.append(obj_v)
            trial_numbers.append(t.number)
    ax.set(xlabel=param, ylabel="Objective Value")
    if _is_log_scale(trials, param):
        ax.set_xscale("log")
        scale = "log"
    elif _is_categorical(trials, param):
        x_values = [str(x) for x in x_values]
        scale = "categorical"
    xlim = _calc_lim_with_padding(x_values, padding_ratio, scale)
    ax.set_xlim(xlim[0], xlim[1])
    sc = ax.scatter(x_values,
                    y_values,
                    c=trial_numbers,
                    cmap=cmap,
                    edgecolors="grey")
    ax.label_outer()

    return sc
示例#2
0
文件: _contour.py 项目: optuna/optuna
def _generate_contour_subplot(
    trials: List[FrozenTrial],
    x_param: str,
    y_param: str,
    ax: "Axes",
    cmap: "Colormap",
    contour_point_num: int,
    target: Optional[Callable[[FrozenTrial], float]],
) -> "ContourSet":

    x_indices = sorted(set(_get_param_values(trials, x_param)))
    y_indices = sorted(set(_get_param_values(trials, y_param)))
    if len(x_indices) < 2:
        _logger.warning("Param {} unique value length is less than 2.".format(x_param))
        return ax
    if len(y_indices) < 2:
        _logger.warning("Param {} unique value length is less than 2.".format(y_param))
        return ax

    (
        xi,
        yi,
        zi,
        x_values,
        y_values,
        x_values_range,
        y_values_range,
        x_cat_param_pos,
        x_cat_param_label,
        y_cat_param_pos,
        y_cat_param_label,
        x_values_dummy_count,
        y_values_dummy_count,
    ) = _calculate_griddata(
        trials, x_param, x_indices, y_param, y_indices, contour_point_num, target
    )
    cs = None
    ax.set(xlabel=x_param, ylabel=y_param)
    ax.set_xlim(x_values_range[0], x_values_range[1])
    ax.set_ylim(y_values_range[0], y_values_range[1])
    if len(zi) > 0:
        if _is_log_scale(trials, x_param):
            ax.set_xscale("log")
        if _is_log_scale(trials, y_param):
            ax.set_yscale("log")
        if x_param != y_param:
            # Contour the gridded data.
            ax.contour(xi, yi, zi, 15, linewidths=0.5, colors="k")
            cs = ax.contourf(xi, yi, zi, 15, cmap=cmap.reversed())
            # Plot data points.
            if x_values_dummy_count > 0:
                x_org_len = int(len(x_values) / (x_values_dummy_count + 1))
                y_org_len = int(len(y_values) / (x_values_dummy_count + 1))
            elif y_values_dummy_count > 0:
                x_org_len = int(len(x_values) / (y_values_dummy_count + 1))
                y_org_len = int(len(y_values) / (y_values_dummy_count + 1))
            else:
                x_org_len = len(x_values)
                y_org_len = len(x_values)
            ax.scatter(
                x_values[:x_org_len],
                y_values[:y_org_len],
                marker="o",
                c="black",
                s=20,
                edgecolors="grey",
                linewidth=2.0,
            )
    if x_cat_param_pos:
        ax.set_xticks(x_cat_param_pos)
        ax.set_xticklabels(x_cat_param_label)
    if y_cat_param_pos:
        ax.set_yticks(y_cat_param_pos)
        ax.set_yticklabels(y_cat_param_label)
    ax.label_outer()
    return cs
示例#3
0
文件: _contour.py 项目: optuna/optuna
def _calculate_griddata(
    trials: List[FrozenTrial],
    x_param: str,
    x_indices: List[Union[str, int, float]],
    y_param: str,
    y_indices: List[Union[str, int, float]],
    contour_point_num: int,
    target: Optional[Callable[[FrozenTrial], float]],
) -> Tuple[
    np.ndarray,
    np.ndarray,
    np.ndarray,
    List[Union[int, float]],
    List[Union[int, float]],
    List[Union[int, float]],
    List[Union[int, float]],
    List[int],
    List[str],
    List[int],
    List[str],
    int,
    int,
]:

    # Extract values for x, y, z axes from each trail.
    x_values = []
    y_values = []
    z_values = []
    x_range_values = []
    y_range_values = []
    for trial in trials:
        contains_x_param = x_param in trial.params
        if contains_x_param:
            x_range_values.append(trial.params[x_param])

        contains_y_param = y_param in trial.params
        if contains_y_param:
            y_range_values.append(trial.params[y_param])

        if not contains_x_param or not contains_y_param:
            continue
        x_values.append(trial.params[x_param])
        y_values.append(trial.params[y_param])

        if target is None:
            value = trial.value
        else:
            value = target(trial)

        if isinstance(value, int):
            value = float(value)
        elif not isinstance(value, float):
            raise ValueError(
                "Trial{} has COMPLETE state, but its target value is non-numeric.".format(
                    trial.number
                )
            )
        z_values.append(value)

    # Return empty values when x or y has no value.
    if len(x_values) == 0 or len(y_values) == 0:
        return (
            np.array([]),
            np.array([]),
            np.array([]),
            x_values,
            y_values,
            [],
            [],
            [],
            [],
            [],
            [],
            0,
            0,
        )

    # Add dummy values for grid data calculation when a parameter has one unique value.
    x_values_dummy = []
    y_values_dummy = []
    if len(set(x_values)) == 1:
        x_values_dummy = [x for x in x_indices if x not in x_values]
        x_values = x_values + x_values_dummy * len(x_values)
        y_values = y_values + (y_values * len(x_values_dummy))
        z_values = z_values + (z_values * len(x_values_dummy))
    if len(set(y_values)) == 1:
        y_values_dummy = [y for y in y_indices if y not in y_values]
        y_values = y_values + y_values_dummy * len(y_values)
        x_values = x_values + (x_values * len(y_values_dummy))
        z_values = z_values + (z_values * len(y_values_dummy))

    # Convert categorical values to int.
    cat_param_labels_x = []  # type: List[str]
    cat_param_pos_x = []  # type: List[int]
    cat_param_labels_y = []  # type: List[str]
    cat_param_pos_y = []  # type: List[int]
    if not _is_numerical(trials, x_param):
        enc = _LabelEncoder()
        x_range_values = enc.fit_transform(list(map(str, x_range_values)))
        x_values = enc.transform(list(map(str, x_values)))
        cat_param_labels_x = enc.get_labels()
        cat_param_pos_x = enc.get_indices()
    if not _is_numerical(trials, y_param):
        enc = _LabelEncoder()
        y_range_values = enc.fit_transform(list(map(str, y_range_values)))
        y_values = enc.transform(list(map(str, y_values)))
        cat_param_labels_y = enc.get_labels()
        cat_param_pos_y = enc.get_indices()

    # Calculate min and max of x and y.
    x_values_min = min(x_range_values)
    x_values_max = max(x_range_values)
    y_values_min = min(y_range_values)
    y_values_max = max(y_range_values)

    # Calculate grid data points.
    # For x and y, create 1-D array of evenly spaced coordinates on linear or log scale.
    xi: np.ndarray = np.array([])
    yi: np.ndarray = np.array([])
    zi: np.ndarray = np.array([])

    if _is_log_scale(trials, x_param):
        padding_x = (np.log10(x_values_max) - np.log10(x_values_min)) * AXES_PADDING_RATIO
        x_values_min = np.power(10, np.log10(x_values_min) - padding_x)
        x_values_max = np.power(10, np.log10(x_values_max) + padding_x)
        xi = np.logspace(np.log10(x_values_min), np.log10(x_values_max), contour_point_num)
    else:
        padding_x = (x_values_max - x_values_min) * AXES_PADDING_RATIO
        x_values_min -= padding_x
        x_values_max += padding_x
        xi = np.linspace(x_values_min, x_values_max, contour_point_num)

    if _is_log_scale(trials, y_param):
        padding_y = (np.log10(y_values_max) - np.log10(y_values_min)) * AXES_PADDING_RATIO
        y_values_min = np.power(10, np.log10(y_values_min) - padding_y)
        y_values_max = np.power(10, np.log10(y_values_max) + padding_y)
        yi = np.logspace(np.log10(y_values_min), np.log10(y_values_max), contour_point_num)
    else:
        padding_y = (y_values_max - y_values_min) * AXES_PADDING_RATIO
        y_values_min -= padding_y
        y_values_max += padding_y
        yi = np.linspace(y_values_min, y_values_max, contour_point_num)

    # create irregularly spaced map of trial values
    # and interpolate it with Plotly's interpolation formulation
    if x_param != y_param:
        zmap = _create_zmap(x_values, y_values, z_values, xi, yi)
        zi = _interpolate_zmap(zmap, contour_point_num)

    return (
        xi,
        yi,
        zi,
        x_values,
        y_values,
        [x_values_min, x_values_max],
        [y_values_min, y_values_max],
        cat_param_pos_x,
        cat_param_labels_x,
        cat_param_pos_y,
        cat_param_labels_y,
        len(x_values_dummy),
        len(y_values_dummy),
    )
示例#4
0
def _calculate_griddata(
    trials: List[FrozenTrial],
    x_param: str,
    x_indices: List[Union[str, int, float]],
    y_param: str,
    y_indices: List[Union[str, int, float]],
    contour_point_num: int,
    target: Optional[Callable[[FrozenTrial], float]],
    target_name: str,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[Union[
        int, float]], List[Union[int, float]], List[Union[
            int, float]], List[Union[int, float]], List[int], List[str],
           List[int], List[str], int, int, ]:

    # Extract values for x, y, z axes from each trail.
    x_values = []
    y_values = []
    z_values = []
    for trial in trials:
        if x_param not in trial.params or y_param not in trial.params:
            continue
        x_values.append(trial.params[x_param])
        y_values.append(trial.params[y_param])

        if target is None:
            value = trial.value
        else:
            value = target(trial)

        if isinstance(value, int):
            value = float(value)
        elif not isinstance(value, float):
            raise ValueError(
                "Trial{} has COMPLETE state, but its target value is non-numeric."
                .format(trial.number))
        z_values.append(value)

    # Return empty values when x or y has no value.
    if len(x_values) == 0 or len(y_values) == 0:
        return (
            np.array([]),
            np.array([]),
            np.array([]),
            x_values,
            y_values,
            [],
            [],
            [],
            [],
            [],
            [],
            0,
            0,
        )

    # Add dummy values for grid data calculation when a parameter has one unique value.
    x_values_dummy = []
    y_values_dummy = []
    if len(set(x_values)) == 1:
        x_values_dummy = [x for x in x_indices if x not in x_values]
        x_values = x_values + x_values_dummy * len(x_values)
        y_values = y_values + (y_values * len(x_values_dummy))
        z_values = z_values + (z_values * len(x_values_dummy))
    if len(set(y_values)) == 1:
        y_values_dummy = [y for y in y_indices if y not in y_values]
        y_values = y_values + y_values_dummy * len(y_values)
        x_values = x_values + (x_values * len(y_values_dummy))
        z_values = z_values + (z_values * len(y_values_dummy))

    # Convert categorical values to int.
    cat_param_labels_x = []  # type: List[str]
    cat_param_pos_x = []  # type: List[int]
    cat_param_labels_y = []  # type: List[str]
    cat_param_pos_y = []  # type: List[int]
    if _is_categorical(trials, x_param):
        x_values = [str(x) for x in x_values]
        (
            x_values,
            cat_param_labels_x,
            cat_param_pos_x,
        ) = _convert_categorical2int(x_values)
    if _is_categorical(trials, y_param):
        y_values = [str(y) for y in y_values]
        (
            y_values,
            cat_param_labels_y,
            cat_param_pos_y,
        ) = _convert_categorical2int(y_values)

    # Calculate min and max of x and y.
    x_values_min = min(x_values)
    x_values_max = max(x_values)
    y_values_min = min(y_values)
    y_values_max = max(y_values)

    # Calculate grid data points.
    # For x and y, create 1-D array of evenly spaced coordinates on linear or log scale.
    xi = np.array([])
    yi = np.array([])
    zi = np.array([])
    if x_param != y_param:
        if _is_log_scale(trials, x_param):
            xi = np.logspace(np.log10(x_values_min), np.log10(x_values_max),
                             contour_point_num)
        else:
            xi = np.linspace(x_values_min, x_values_max, contour_point_num)
        if _is_log_scale(trials, y_param):
            yi = np.logspace(np.log10(y_values_min), np.log10(y_values_max),
                             contour_point_num)
        else:
            yi = np.linspace(y_values_min, y_values_max, contour_point_num)

        # Interpolate z-axis data on a grid with cubic interpolator.
        # TODO(ytknzw): Implement Plotly-like interpolation algorithm.
        zi = griddata(
            np.column_stack((x_values, y_values)),
            z_values,
            (xi[None, :], yi[:, None]),
            method="cubic",
        )

    return (
        xi,
        yi,
        zi,
        x_values,
        y_values,
        [x_values_min, x_values_max],
        [y_values_min, y_values_max],
        cat_param_pos_x,
        cat_param_labels_x,
        cat_param_pos_y,
        cat_param_labels_y,
        len(x_values_dummy),
        len(y_values_dummy),
    )
示例#5
0
def _get_parallel_coordinate_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target
        reversescale = study.direction == StudyDirection.MINIMIZE
    else:
        reversescale = True

    # Set up the graph style.
    fig, ax = plt.subplots()
    cmap = plt.get_cmap("Blues_r" if reversescale else "Blues")
    ax.set_title("Parallel Coordinate Plot")
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)

    # Prepare data for plotting.
    trials = [
        trial for trial in study.trials if trial.state == TrialState.COMPLETE
    ]

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return ax

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is not None:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        all_params = set(params)
    sorted_params = sorted(all_params)

    skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params)

    obj_org = [
        target(t) for t in trials if t.number not in skipped_trial_numbers
    ]

    if len(obj_org) == 0:
        _logger.warning(
            "Your study has only completed trials with missing parameters.")
        return ax

    obj_min = min(obj_org)
    obj_max = max(obj_org)
    obj_w = obj_max - obj_min
    dims_obj_base = [[o] for o in obj_org]

    cat_param_names = []
    cat_param_values = []
    cat_param_ticks = []
    param_values = []
    var_names = [target_name]
    numeric_cat_params_indices: List[int] = []

    for param_index, p_name in enumerate(sorted_params):
        values = [
            t.params[p_name] for t in trials
            if t.number not in skipped_trial_numbers
        ]

        if _is_categorical(trials, p_name):
            vocab = defaultdict(
                lambda: len(vocab))  # type: DefaultDict[str, int]

            if _is_numerical(trials, p_name):
                _ = [vocab[v] for v in sorted(values)]
                numeric_cat_params_indices.append(param_index)

            values = [vocab[v] for v in values]

            cat_param_names.append(p_name)
            vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1])
            cat_param_values.append([v[0] for v in vocab_item_sorted])
            cat_param_ticks.append([v[1] for v in vocab_item_sorted])

        if _is_log_scale(trials, p_name):
            values_for_lc = [np.log10(v) for v in values]
        else:
            values_for_lc = values

        p_min = min(values_for_lc)
        p_max = max(values_for_lc)
        p_w = p_max - p_min

        if p_w == 0.0:
            center = obj_w / 2 + obj_min
            for i in range(len(values)):
                dims_obj_base[i].append(center)
        else:
            for i, v in enumerate(values_for_lc):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)

        var_names.append(
            p_name if len(p_name) < 20 else "{}...".format(p_name[:17]))
        param_values.append(values)

    if numeric_cat_params_indices:
        # np.lexsort consumes the sort keys the order from back to front.
        # So the values of parameters have to be reversed the order.
        sorted_idx = np.lexsort([
            param_values[index] for index in numeric_cat_params_indices
        ][::-1])
        # Since the values are mapped to other categories by the index,
        # the index will be swapped according to the sorted index of numeric params.
        param_values = [list(np.array(v)[sorted_idx]) for v in param_values]

    # Draw multiple line plots and axes.
    # Ref: https://stackoverflow.com/a/50029441
    ax.set_xlim(0, len(sorted_params))
    ax.set_ylim(obj_min, obj_max)
    xs = [range(len(sorted_params) + 1) for _ in range(len(dims_obj_base))]
    segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)]
    lc = LineCollection(segments, cmap=cmap)
    lc.set_array(np.asarray(obj_org))
    axcb = fig.colorbar(lc, pad=0.1)
    axcb.set_label(target_name)
    plt.xticks(range(len(sorted_params) + 1), var_names, rotation=330)

    for i, p_name in enumerate(sorted_params):
        ax2 = ax.twinx()
        ax2.set_ylim(min(param_values[i]), max(param_values[i]))
        if _is_log_scale(trials, p_name):
            ax2.set_yscale("log")
        ax2.spines["top"].set_visible(False)
        ax2.spines["bottom"].set_visible(False)
        ax2.xaxis.set_visible(False)
        ax2.plot([1] * len(param_values[i]), param_values[i], visible=False)
        ax2.spines["right"].set_position(
            ("axes", (i + 1) / len(sorted_params)))
        if p_name in cat_param_names:
            idx = cat_param_names.index(p_name)
            tick_pos = cat_param_ticks[idx]
            tick_labels = cat_param_values[idx]
            ax2.set_yticks(tick_pos)
            ax2.set_yticklabels(tick_labels)

    ax.add_collection(lc)

    return ax
示例#6
0
def _get_parallel_coordinate_plot(
    study: Study,
    params: Optional[List[str]] = None,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "Axes":

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target
        reversescale = study.direction == StudyDirection.MINIMIZE
    else:
        reversescale = True

    # Set up the graph style.
    fig, ax = plt.subplots()
    cmap = plt.get_cmap("Blues_r" if reversescale else "Blues")
    ax.set_title("Parallel Coordinate Plot")
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)

    # Prepare data for plotting.
    trials = [
        trial for trial in study.trials if trial.state == TrialState.COMPLETE
    ]

    if len(trials) == 0:
        _logger.warning("Your study does not have any completed trials.")
        return ax

    all_params = {p_name for t in trials for p_name in t.params.keys()}
    if params is not None:
        for input_p_name in params:
            if input_p_name not in all_params:
                raise ValueError(
                    "Parameter {} does not exist in your study.".format(
                        input_p_name))
        all_params = set(params)
    sorted_params = sorted(list(all_params))

    obj_org = [target(t) for t in trials]
    obj_min = min(obj_org)
    obj_max = max(obj_org)
    obj_w = obj_max - obj_min
    dims_obj_base = [[o] for o in obj_org]

    cat_param_names = []
    cat_param_values = []
    cat_param_ticks = []
    log_param_names = []
    param_values = []
    var_names = [target_name]
    for p_name in sorted_params:
        values = [
            t.params[p_name] if p_name in t.params else np.nan for t in trials
        ]

        if _is_log_scale(trials, p_name):
            p_min = math.log10(min(values))
            p_max = math.log10(max(values))
            p_w = p_max - p_min
            log_param_names.append(p_name)
            for i, v in enumerate(values):
                dims_obj_base[i].append((math.log10(v) - p_min) / p_w * obj_w +
                                        obj_min)
        elif _is_categorical(trials, p_name):
            vocab = defaultdict(
                lambda: len(vocab))  # type: DefaultDict[str, int]
            values = [vocab[v] for v in values]
            cat_param_names.append(p_name)
            vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1])
            cat_param_values.append([v[0] for v in vocab_item_sorted])
            cat_param_ticks.append([v[1] for v in vocab_item_sorted])
            p_min = min(values)
            p_max = max(values)
            p_w = p_max - p_min
            for i, v in enumerate(values):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)
        else:
            p_min = min(values)
            p_max = max(values)
            p_w = p_max - p_min

            for i, v in enumerate(values):
                dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min)

        var_names.append(
            p_name if len(p_name) < 20 else "{}...".format(p_name[:17]))
        param_values.append(values)

    # Draw multiple line plots and axes.
    # Ref: https://stackoverflow.com/a/50029441
    ax.set_xlim(0, len(sorted_params))
    ax.set_ylim(obj_min, obj_max)
    xs = [range(0, len(sorted_params) + 1) for i in range(len(dims_obj_base))]
    segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)]
    lc = LineCollection(segments, cmap=cmap)
    lc.set_array(np.asarray([target(t) for t in trials] + [0]))
    axcb = fig.colorbar(lc, pad=0.1)
    axcb.set_label(target_name)
    plt.xticks(range(0, len(sorted_params) + 1), var_names, rotation=330)

    for i, p_name in enumerate(sorted_params):
        ax2 = ax.twinx()
        ax2.set_ylim(min(param_values[i]), max(param_values[i]))
        if _is_log_scale(trials, p_name):
            ax2.set_yscale("log")
        ax2.spines["top"].set_visible(False)
        ax2.spines["bottom"].set_visible(False)
        ax2.get_xaxis().set_visible(False)
        ax2.plot([1] * len(param_values[i]), param_values[i], visible=False)
        ax2.spines["right"].set_position(
            ("axes", (i + 1) / len(sorted_params)))
        if p_name in cat_param_names:
            idx = cat_param_names.index(p_name)
            tick_pos = cat_param_ticks[idx]
            tick_labels = cat_param_values[idx]
            ax2.set_yticks(tick_pos)
            ax2.set_yticklabels(tick_labels)

    ax.add_collection(lc)

    return ax