def _generate_slice_subplot( trials: List[FrozenTrial], param: str, ax: "Axes", cmap: "Colormap", padding_ratio: float, obj_values: List[Union[int, float]], ) -> "PathCollection": x_values = [] y_values = [] trial_numbers = [] scale = None for t, obj_v in zip(trials, obj_values): if param in t.params: x_values.append(t.params[param]) y_values.append(obj_v) trial_numbers.append(t.number) ax.set(xlabel=param, ylabel="Objective Value") if _is_log_scale(trials, param): ax.set_xscale("log") scale = "log" elif _is_categorical(trials, param): x_values = [str(x) for x in x_values] scale = "categorical" xlim = _calc_lim_with_padding(x_values, padding_ratio, scale) ax.set_xlim(xlim[0], xlim[1]) sc = ax.scatter(x_values, y_values, c=trial_numbers, cmap=cmap, edgecolors="grey") ax.label_outer() return sc
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target reversescale = study.direction == StudyDirection.MINIMIZE else: reversescale = True # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(all_params) skipped_trial_numbers = _get_skipped_trial_numbers(trials, sorted_params) obj_org = [ target(t) for t in trials if t.number not in skipped_trial_numbers ] if len(obj_org) == 0: _logger.warning( "Your study has only completed trials with missing parameters.") return ax obj_min = min(obj_org) obj_max = max(obj_org) obj_w = obj_max - obj_min dims_obj_base = [[o] for o in obj_org] cat_param_names = [] cat_param_values = [] cat_param_ticks = [] param_values = [] var_names = [target_name] numeric_cat_params_indices: List[int] = [] for param_index, p_name in enumerate(sorted_params): values = [ t.params[p_name] for t in trials if t.number not in skipped_trial_numbers ] if _is_categorical(trials, p_name): vocab = defaultdict( lambda: len(vocab)) # type: DefaultDict[str, int] if _is_numerical(trials, p_name): _ = [vocab[v] for v in sorted(values)] numeric_cat_params_indices.append(param_index) values = [vocab[v] for v in values] cat_param_names.append(p_name) vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1]) cat_param_values.append([v[0] for v in vocab_item_sorted]) cat_param_ticks.append([v[1] for v in vocab_item_sorted]) if _is_log_scale(trials, p_name): values_for_lc = [np.log10(v) for v in values] else: values_for_lc = values p_min = min(values_for_lc) p_max = max(values_for_lc) p_w = p_max - p_min if p_w == 0.0: center = obj_w / 2 + obj_min for i in range(len(values)): dims_obj_base[i].append(center) else: for i, v in enumerate(values_for_lc): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) var_names.append( p_name if len(p_name) < 20 else "{}...".format(p_name[:17])) param_values.append(values) if numeric_cat_params_indices: # np.lexsort consumes the sort keys the order from back to front. # So the values of parameters have to be reversed the order. sorted_idx = np.lexsort([ param_values[index] for index in numeric_cat_params_indices ][::-1]) # Since the values are mapped to other categories by the index, # the index will be swapped according to the sorted index of numeric params. param_values = [list(np.array(v)[sorted_idx]) for v in param_values] # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 ax.set_xlim(0, len(sorted_params)) ax.set_ylim(obj_min, obj_max) xs = [range(len(sorted_params) + 1) for _ in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray(obj_org)) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) plt.xticks(range(len(sorted_params) + 1), var_names, rotation=330) for i, p_name in enumerate(sorted_params): ax2 = ax.twinx() ax2.set_ylim(min(param_values[i]), max(param_values[i])) if _is_log_scale(trials, p_name): ax2.set_yscale("log") ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.xaxis.set_visible(False) ax2.plot([1] * len(param_values[i]), param_values[i], visible=False) ax2.spines["right"].set_position( ("axes", (i + 1) / len(sorted_params))) if p_name in cat_param_names: idx = cat_param_names.index(p_name) tick_pos = cat_param_ticks[idx] tick_labels = cat_param_values[idx] ax2.set_yticks(tick_pos) ax2.set_yticklabels(tick_labels) ax.add_collection(lc) return ax
def _calculate_griddata( trials: List[FrozenTrial], x_param: str, x_indices: List[Union[str, int, float]], y_param: str, y_indices: List[Union[str, int, float]], contour_point_num: int, target: Optional[Callable[[FrozenTrial], float]], target_name: str, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[Union[ int, float]], List[Union[int, float]], List[Union[ int, float]], List[Union[int, float]], List[int], List[str], List[int], List[str], int, int, ]: # Extract values for x, y, z axes from each trail. x_values = [] y_values = [] z_values = [] for trial in trials: if x_param not in trial.params or y_param not in trial.params: continue x_values.append(trial.params[x_param]) y_values.append(trial.params[y_param]) if target is None: value = trial.value else: value = target(trial) if isinstance(value, int): value = float(value) elif not isinstance(value, float): raise ValueError( "Trial{} has COMPLETE state, but its target value is non-numeric." .format(trial.number)) z_values.append(value) # Return empty values when x or y has no value. if len(x_values) == 0 or len(y_values) == 0: return ( np.array([]), np.array([]), np.array([]), x_values, y_values, [], [], [], [], [], [], 0, 0, ) # Add dummy values for grid data calculation when a parameter has one unique value. x_values_dummy = [] y_values_dummy = [] if len(set(x_values)) == 1: x_values_dummy = [x for x in x_indices if x not in x_values] x_values = x_values + x_values_dummy * len(x_values) y_values = y_values + (y_values * len(x_values_dummy)) z_values = z_values + (z_values * len(x_values_dummy)) if len(set(y_values)) == 1: y_values_dummy = [y for y in y_indices if y not in y_values] y_values = y_values + y_values_dummy * len(y_values) x_values = x_values + (x_values * len(y_values_dummy)) z_values = z_values + (z_values * len(y_values_dummy)) # Convert categorical values to int. cat_param_labels_x = [] # type: List[str] cat_param_pos_x = [] # type: List[int] cat_param_labels_y = [] # type: List[str] cat_param_pos_y = [] # type: List[int] if _is_categorical(trials, x_param): x_values = [str(x) for x in x_values] ( x_values, cat_param_labels_x, cat_param_pos_x, ) = _convert_categorical2int(x_values) if _is_categorical(trials, y_param): y_values = [str(y) for y in y_values] ( y_values, cat_param_labels_y, cat_param_pos_y, ) = _convert_categorical2int(y_values) # Calculate min and max of x and y. x_values_min = min(x_values) x_values_max = max(x_values) y_values_min = min(y_values) y_values_max = max(y_values) # Calculate grid data points. # For x and y, create 1-D array of evenly spaced coordinates on linear or log scale. xi = np.array([]) yi = np.array([]) zi = np.array([]) if x_param != y_param: if _is_log_scale(trials, x_param): xi = np.logspace(np.log10(x_values_min), np.log10(x_values_max), contour_point_num) else: xi = np.linspace(x_values_min, x_values_max, contour_point_num) if _is_log_scale(trials, y_param): yi = np.logspace(np.log10(y_values_min), np.log10(y_values_max), contour_point_num) else: yi = np.linspace(y_values_min, y_values_max, contour_point_num) # Interpolate z-axis data on a grid with cubic interpolator. # TODO(ytknzw): Implement Plotly-like interpolation algorithm. zi = griddata( np.column_stack((x_values, y_values)), z_values, (xi[None, :], yi[:, None]), method="cubic", ) return ( xi, yi, zi, x_values, y_values, [x_values_min, x_values_max], [y_values_min, y_values_max], cat_param_pos_x, cat_param_labels_x, cat_param_pos_y, cat_param_labels_y, len(x_values_dummy), len(y_values_dummy), )
def _get_parallel_coordinate_plot( study: Study, params: Optional[List[str]] = None, target: Optional[Callable[[FrozenTrial], float]] = None, target_name: str = "Objective Value", ) -> "Axes": if target is None: def _target(t: FrozenTrial) -> float: return cast(float, t.value) target = _target reversescale = study.direction == StudyDirection.MINIMIZE else: reversescale = True # Set up the graph style. fig, ax = plt.subplots() cmap = plt.get_cmap("Blues_r" if reversescale else "Blues") ax.set_title("Parallel Coordinate Plot") ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) # Prepare data for plotting. trials = [ trial for trial in study.trials if trial.state == TrialState.COMPLETE ] if len(trials) == 0: _logger.warning("Your study does not have any completed trials.") return ax all_params = {p_name for t in trials for p_name in t.params.keys()} if params is not None: for input_p_name in params: if input_p_name not in all_params: raise ValueError( "Parameter {} does not exist in your study.".format( input_p_name)) all_params = set(params) sorted_params = sorted(list(all_params)) obj_org = [target(t) for t in trials] obj_min = min(obj_org) obj_max = max(obj_org) obj_w = obj_max - obj_min dims_obj_base = [[o] for o in obj_org] cat_param_names = [] cat_param_values = [] cat_param_ticks = [] log_param_names = [] param_values = [] var_names = [target_name] for p_name in sorted_params: values = [ t.params[p_name] if p_name in t.params else np.nan for t in trials ] if _is_log_scale(trials, p_name): p_min = math.log10(min(values)) p_max = math.log10(max(values)) p_w = p_max - p_min log_param_names.append(p_name) for i, v in enumerate(values): dims_obj_base[i].append((math.log10(v) - p_min) / p_w * obj_w + obj_min) elif _is_categorical(trials, p_name): vocab = defaultdict( lambda: len(vocab)) # type: DefaultDict[str, int] values = [vocab[v] for v in values] cat_param_names.append(p_name) vocab_item_sorted = sorted(vocab.items(), key=lambda x: x[1]) cat_param_values.append([v[0] for v in vocab_item_sorted]) cat_param_ticks.append([v[1] for v in vocab_item_sorted]) p_min = min(values) p_max = max(values) p_w = p_max - p_min for i, v in enumerate(values): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) else: p_min = min(values) p_max = max(values) p_w = p_max - p_min for i, v in enumerate(values): dims_obj_base[i].append((v - p_min) / p_w * obj_w + obj_min) var_names.append( p_name if len(p_name) < 20 else "{}...".format(p_name[:17])) param_values.append(values) # Draw multiple line plots and axes. # Ref: https://stackoverflow.com/a/50029441 ax.set_xlim(0, len(sorted_params)) ax.set_ylim(obj_min, obj_max) xs = [range(0, len(sorted_params) + 1) for i in range(len(dims_obj_base))] segments = [np.column_stack([x, y]) for x, y in zip(xs, dims_obj_base)] lc = LineCollection(segments, cmap=cmap) lc.set_array(np.asarray([target(t) for t in trials] + [0])) axcb = fig.colorbar(lc, pad=0.1) axcb.set_label(target_name) plt.xticks(range(0, len(sorted_params) + 1), var_names, rotation=330) for i, p_name in enumerate(sorted_params): ax2 = ax.twinx() ax2.set_ylim(min(param_values[i]), max(param_values[i])) if _is_log_scale(trials, p_name): ax2.set_yscale("log") ax2.spines["top"].set_visible(False) ax2.spines["bottom"].set_visible(False) ax2.get_xaxis().set_visible(False) ax2.plot([1] * len(param_values[i]), param_values[i], visible=False) ax2.spines["right"].set_position( ("axes", (i + 1) / len(sorted_params))) if p_name in cat_param_names: idx = cat_param_names.index(p_name) tick_pos = cat_param_ticks[idx] tick_labels = cat_param_values[idx] ax2.set_yticks(tick_pos) ax2.set_yticklabels(tick_labels) ax.add_collection(lc) return ax