Пример #1
0
    def _init_flag_views(self):
        '''
            The result should be something like this:

            self.env.views = {
                'TAB_NAME': {
                    0: view_object_0,
                    1: view_object_1,
                }
            }

            NOTE: self.env.all_flags and  self.env.tabs_flags_plots should be created beforehand
        '''
        lg.info('-- INIT FLAG VIEWS')
        # lg.warning('>> SELF.ENV.ALL_FLAGS: {}'.format(self.env.all_flags))
        # lg.warning('>> self.env.tabs_flags_plot: {}'.format(self.env.tabs_flags_plots))
        # lg.warning('>> self.env.f_handler.tab_list: {}'.format(self.env.f_handler.tab_list))
        # lg.info('>> self.env.source.data: {}'.format(self.env.source.data))

        # TODO: tabs with the same flag should share the views

        for tab in self.env.f_handler.tab_list:
            flag = tab + FLAG_END
            flags = {}
            for i, val in enumerate(self.env.source.data[flag]):
                flags.setdefault(int(val), []).append(i)

            flag_views = {}
            for key in list(self.env.all_flags.keys()):
                if key in flags:
                    view = CDSView(source=self.env.source, filters=[IndexFilter(flags[key])])
                else:  # there is no values
                    view = CDSView(source=self.env.source, filters=[IndexFilter([])])
                flag_views[key] = view
            self.env.flag_views[tab] = flag_views
Пример #2
0
    def _create_views(self, source, used_configs):
        """Create views in order of plotting, so more interesting views are
        plotted on top. Order of interest:
        default > final-incumbent > incumbent > candidate
          local > random
            num_runs (ascending, more evaluated -> more interesting)
        Individual views are necessary, since bokeh can only plot one
        marker-type (circle, triangle, ...) per 'scatter'-call

        Parameters
        ----------:
        source: ColumnDataSource
            containing relevant information for plotting
        used_configs: List[Configuration]
            configs that are contained in this source. necessary to plot glyphs for the independent runs so they can be
            toggled. not all configs are in every source because of efficiency: no need to have 0-runs configs

        Returns
        -------
        views: List[CDSView]
            views in order of plotting
        views_by_run: Dict[ConfiguratorRun -> List[int]]
            maps each run to a list of indices of the related glyphs in the returned 'views'-list
        markers: List[string]
            markers (to the view with the same index)
        """

        def _get_marker(t, o):
            """ returns marker according to type t and origin o """
            if t == "Default":
                shape = 'triangle'
            elif t == 'Final Incumbent':
                shape = 'inverted_triangle'
            else:
                shape = 'square' if t == "Incumbent" else 'circle'
                shape += '_x' if o.startswith("Acquisition Function") else ''
            return shape

        views, markers = [], []
        views_by_run = {run : [] for run in self.configs_in_run}
        idx = 0
        for t in ['Candidate', 'Incumbent', 'Final Incumbent', 'Default']:
            for o in ['Unknown', 'Random', 'Acquisition Function']:
                for z in sorted(list(set(source.data['zorder'])), key=lambda x: int(x)):
                    for run, configs in self.configs_in_run.items():
                        booleans = [True if c in configs else False for c in used_configs]
                        view = CDSView(source=source, filters=[
                                GroupFilter(column_name='type', group=t),
                                GroupFilter(column_name='origin', group=o),
                                GroupFilter(column_name='zorder', group=z),
                                BooleanFilter(booleans)])
                        views.append(view)  # all views
                        views_by_run[run].append(idx)  # views sorted by runs
                        idx += 1
                        markers.append(_get_marker(t, o))
        self.logger.debug("%d different glyph renderers, %d different zorder-values",
                          len(views), len(set(source.data['zorder'])))
        return (views, views_by_run, markers)
Пример #3
0
    def _plot_create_views(self, source):
        """Create views in order of plotting, so more interesting views are
        plotted on top. Order of interest:
        default > final-incumbent > incumbent > candidate
          local > random
            num_runs (ascending, more evaluated -> more interesting)
        Individual views are necessary, since bokeh can only plot one
        marker-typei (circle, triangle, ...) per 'scatter'-call

        Parameters
        ----------
        source: ColumnDataSource
            containing relevant information for plotting

        Returns
        -------
        views: List[CDSView]
            views in order of plotting
        markers: List[string]
            markers (to the view with the same index)
        """
        def _get_marker(t, o):
            """ returns marker according to type t and origin o """
            if t == "Default":
                shape = 'triangle'
            elif t == 'Final Incumbent':
                shape = 'inverted_triangle'
            else:
                shape = 'square' if t == "Incumbent" else 'circle'
                shape += '_x' if o.startswith("Acquisition Function") else ''
            return shape

        views, markers = [], []
        for t in ['Candidate', 'Incumbent', 'Final Incumbent', 'Default']:
            for o in ['Unknown', 'Random', 'Acquisition Function']:
                for z in sorted(list(set(source.data['zorder'])),
                                key=lambda x: int(x)):
                    views.append(
                        CDSView(source=source,
                                filters=[
                                    GroupFilter(column_name='type', group=t),
                                    GroupFilter(column_name='origin', group=o),
                                    GroupFilter(column_name='zorder', group=z)
                                ]))
                    markers.append(_get_marker(t, o))
        self.logger.debug(
            "%d different glyph renderers, %d different zorder-values",
            len(views), len(set(source.data['zorder'])))
        return (views, markers)
Пример #4
0
def generate_view(source, axis, positive):
    filter = CustomJSFilter(
        code=f"""
        var indices = [];

        for (var i = 0; i < source.get_length(); i++){{
            if (source.data['{axis}'][i] {'>=' if positive else '<='} 0){{
                indices.push(true);
            }} else {{
                indices.push(false);
            }}
        }}
        return indices;
    """
    )
    return CDSView(source=source, filters=[filter])
Пример #5
0
    def plot(self):
        """
        Plot performance over time, using all trajectory entries.
        max_time denotes max(wallclock_limit, highest recorded time).
        """
        rh, runs, output_fn, validator = self.rh, self.runs, self.output_fn, self.validator
        # Add lines to be plotted to lines (key-values must be zippable)
        lines = []

        # Get plotting data and create CDS
        if self.bohb_results:
            lines.append(self._get_bohb_line(validator, runs, rh))
            for b in self.bohb_results[0].HB_config['budgets']:
                lines.append(self._get_bohb_line(validator, runs, rh, b))
        else:
            lines.append(self._get_avg(validator, runs, rh))
            lines.extend(self._get_all_runs(validator, runs, rh))

        data = {'name': [], 'time': [], 'mean': [], 'upper': [], 'lower': []}
        hp_names = self.scenario.cs.get_hyperparameter_names()
        for p in hp_names:
            data[p] = []
        for line in lines:
            for t, m, u, l, c in zip(line.time, line.mean, line.upper,
                                     line.lower, line.config):
                if not (np.isfinite(m) and np.isfinite(u) and np.isfinite(l)):
                    self.logger.debug("Why is there a NaN? (%s)", str(line))
                    raise ValueError(
                        "There is a NaN value in your data, this should be filtered out. "
                        "Please report this to github.com/automl/CAVE/issues and provide the "
                        "debug/debug.log and the output of `pip freeze`, if you can."
                    )
                data['name'].append(line.name)
                data['time'].append(t)
                data['mean'].append(m)
                data['upper'].append(u)
                data['lower'].append(l)
                for p in hp_names:
                    data[p].append(c[p] if (c and p in c) else 'inactive')
        source = ColumnDataSource(data=data)

        # Create plot
        x_range = Range1d(min(source.data['time']), max(source.data['time']))
        y_label = 'estimated {}'.format(self.scenario.run_obj if self.scenario.
                                        run_obj != 'quality' else 'cost')
        p = figure(plot_width=700,
                   plot_height=500,
                   tools=['save', 'pan', 'box_zoom', 'wheel_zoom', 'reset'],
                   x_range=x_range,
                   x_axis_type='log',
                   y_axis_type='log'
                   if self.scenario.run_obj == 'runtime' else 'linear',
                   x_axis_label='time (sec)',
                   y_axis_label=y_label,
                   title="Cost over time")

        colors = itertools.cycle(Dark2_5)
        renderers = []
        legend_it = []
        for line, color in zip(lines, colors):
            # CDSview w GroupFilter
            name = line.name
            view = CDSView(
                source=source,
                filters=[GroupFilter(column_name='name', group=str(name))])
            renderers.append([
                p.line('time',
                       'mean',
                       source=source,
                       view=view,
                       line_color=color,
                       visible=True
                       if line.name in ['average', 'all budgets'] else False)
            ])

            # Add to legend
            legend_it.append((name, renderers[-1]))

            if name in ['average', 'all budgets'] or 'budget' in name:
                # Fill area (uncertainty)
                # Defined as sequence of coordinates, so for step-effect double and arange accordingly ([(t0, v0), (t1, v0), (t1, v1), ... (tn, vn-1)])
                band_x = np.append(line.time, line.time[::-1])
                band_y = np.append(line.lower, line.upper[::-1])
                renderers[-1].extend([
                    p.patch(band_x,
                            band_y,
                            color='#7570B3',
                            fill_alpha=0.2,
                            visible=True if line.name
                            in ['average', 'all budgets'] else False)
                ])

        # Tooltips
        tooltips = [("estimated performance", "@mean"), ("at-time", "@time")]
        p.add_tools(
            HoverTool(
                renderers=[i for s in renderers for i in s],
                tooltips=tooltips,
            ))
        # MAKE hovertips stay fixed in position
        #                      callback=CustomJS(code="""
        # var tooltips = document.getElementsByClassName("bk-tooltip");
        # for (var i = 0, len = tooltips.length; i < len; i ++) {
        #     tooltips[i].style.top = ""; // unset what bokeh.js sets
        #     tooltips[i].style.left = "";
        #     tooltips[i].style.bottom = "0px";
        #     tooltips[i].style.left = "0px";
        # }
        # """)))

        # TODO optional: activate different tooltips for different renderers, doesn't work properly
        #tooltips_configs = tooltips[:] + [(p, '@'+p) for p in hp_names]
        #if 'average' in [l.name for l in lines]:
        #    p.add_tools(HoverTool(renderers=[renderers[0]], tooltips=tooltips_avg   ))#, mode='vline'))

        # Wrap renderers in nested lists for checkbox-code
        checkbox, select_all, select_none = get_checkbox(
            renderers, [l[0] for l in legend_it])
        checkbox.active = [0]

        # Tilt tick labels and configure axis labels
        p.xaxis.major_label_orientation = 3 / 4

        p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt"
        p.xaxis.major_label_text_font_size = p.yaxis.major_label_text_font_size = "12pt"
        p.title.text_font_size = "15pt"

        legend = Legend(
            items=legend_it,
            location='bottom_left',  #(0, -60),
            label_text_font_size="8pt")
        legend.click_policy = "hide"

        p.add_layout(legend, 'right')

        # Assign objects and save png's
        layout = row(
            p,
            column(
                widgetbox(checkbox, width=100),
                row(widgetbox(select_all, width=50),
                    widgetbox(select_none, width=50))))

        output_path = os.path.join(self.output_dir, output_fn)
        export_bokeh(p, output_path, self.logger)
        self.plots.append(output_path)

        return layout
Пример #6
0
    def _plot(self,
              result_object,
              learning_curves,
              hyperparameter_names,
              reset_times=False):
        # Extract information from learning-curve-dict
        times, losses, config_ids, = [], [], []
        for conf_id, learning_curves in learning_curves.items():
            # self.logger.debug("Config ID: %s, learning_curves: %s", str(conf_id), str(learning_curves))
            for lc in learning_curves:
                if len(lc) == 0:
                    continue
                tmp = list(
                    zip(*[(time, loss) for time, loss in lc
                          if np.isfinite(loss) and loss is not None]))
                if len(tmp) == 0:
                    self.logger.debug(
                        "Probably filtered NaNs or None's.., skipping %s, data %s",
                        str(conf_id), str(lc))
                    continue
                times.append(tmp[0])
                losses.append(tmp[1])
                config_ids.append(conf_id)

        if reset_times:
            times = [np.array(ts) - ts[0] for ts in times]

        # Prepare ColumnDataSource
        data = OrderedDict([
            ('config_id', []),
            ('config_info', []),
            ('times', []),
            ('losses', []),
            ('duration', []),
            ('HB_iteration', []),
            ('colors', []),
            ('colors_performance', []),
            ('colors_iteration', []),
        ])
        for hp in hyperparameter_names:
            data[hp] = []

        # Populate
        id2conf = result_object.get_id2config_mapping()
        for counter, c_id in enumerate(config_ids):
            if not (len(times[counter]) == len(losses[counter])):
                raise ValueError()
            longest_run = self.get_longest_run(c_id, result_object)
            if not longest_run:
                continue
            data['config_id'].append(str(c_id))
            try:
                conf_info = '\n'.join([
                    str(k) + "=" + str(v)
                    for k, v in sorted(id2conf[c_id]['config_info'].items())
                ])
            except KeyError:
                conf_info = 'Not Available'
            data['config_info'].append(conf_info)
            data['times'].append(times[counter])
            data['losses'].append(losses[counter])
            if longest_run:
                data['duration'].append(
                    longest_run['time_stamps']['finished'] -
                    longest_run['time_stamps']['started'])
            else:
                data['duration'].append('N/A')
            data['HB_iteration'].append(str(c_id[0]))
            for hp in hyperparameter_names:
                try:
                    data[hp].append(id2conf[c_id]['config'][hp])
                except KeyError:
                    data[hp].append("None")
            data['colors'].append(losses[counter][-1])
            data['colors_performance'].append(losses[counter][-1])
            data['colors_iteration'].append(c_id[0])

        # Tooltips
        tooltips = [(key, '@' + key) for key in data.keys() if key not in [
            'times', 'duration', 'colors', 'colors_performance',
            'colors_iteration'
        ]]
        tooltips.insert(4, ('duration (sec)', '@duration'))
        tooltips.insert(5, ('Configuration', ' '))
        hover = HoverTool(tooltips=tooltips)

        # Create sources
        source_multiline = ColumnDataSource(data=data)
        # Special source for scattering points, since times and losses for multi_line are nested lists
        scatter_data = {key: [] for key in data.keys()}
        for idx, c_id in enumerate(data['config_id']):
            for t, l in zip(data['times'][idx], data['losses'][idx]):
                scatter_data['times'].append(t)
                scatter_data['losses'].append(l)
                for key in list(data.keys()):
                    if key in ['times', 'losses']:
                        continue
                    scatter_data[key].append(data[key][idx])
        source_scatter = ColumnDataSource(data=scatter_data)

        # Color
        min_perf, max_perf = min([loss[-1] for loss in data['losses']]), max(
            [loss[-1] for loss in data['losses']])
        min_iter, max_iter = min([int(i) for i in data['HB_iteration']]), max(
            [int(i) for i in data['HB_iteration']])
        color_mapper = LinearColorMapper(palette=Spectral11,
                                         low=min_perf,
                                         high=max_perf)

        # Create plot
        y_axis_type = "log" if len(
            [a for a in scatter_data['losses'] if a <= 0]) == 0 else 'linear'

        x_min, x_max = min(scatter_data['times']), max(scatter_data['times'])
        x_pad = (x_max - x_min) / 10
        x_min -= x_pad
        x_max += x_pad
        y_min, y_max = min(scatter_data['losses']), max(scatter_data['losses'])
        y_pad = (y_max - y_min) / 10
        y_min -= (
            y_min / 10
        ) if y_axis_type == 'log' else y_pad  # because this must not be below 0 if it's a logscale
        y_max += y_pad * 10 if y_axis_type == 'log' else y_pad
        p = figure(
            plot_height=500,
            plot_width=600,
            y_axis_type=y_axis_type,
            tools=[hover, 'save', 'pan', 'wheel_zoom', 'box_zoom', 'reset'],
            x_axis_label='Time',
            y_axis_label='Cost',
            x_range=Range1d(x_min, x_max, bounds='auto'),
            y_range=Range1d(y_min, y_max, bounds='auto'),
        )

        # Plot per HB_iteration, each config individually
        HB_iterations = sorted(set(data['HB_iteration']))
        max_label_len = max([len(iteration) for iteration in HB_iterations])
        HB_handles, HB_labels = [], []
        self.logger.debug(
            "Assuming config_info to be either \"model_based_pick=True\" or \"model_based_pick=False\""
        )
        for it in HB_iterations:
            line_handles = []
            view = CDSView(source=source_multiline,
                           filters=[
                               GroupFilter(column_name='HB_iteration',
                                           group=str(it))
                           ])
            line_handles.append(
                p.multi_line(
                    xs='times',
                    ys='losses',
                    source=source_multiline,
                    view=view,
                    color={
                        'field': 'colors',
                        'transform': color_mapper
                    },
                    alpha=0.5,
                    line_width=5,
                ))
            # Separate modelbased and rand   om
            view = CDSView(source=source_scatter,
                           filters=[
                               GroupFilter(column_name='HB_iteration',
                                           group=str(it)),
                               GroupFilter(column_name='config_info',
                                           group="model_based_pick=True")
                           ])
            line_handles.append(
                p.circle_x(
                    x='times',
                    y='losses',
                    source=source_scatter,
                    view=view,
                    fill_color={
                        'field': 'colors',
                        'transform': color_mapper
                    },
                    fill_alpha=0.5,
                    line_color='colors',
                    size=20,
                ))
            view = CDSView(source=source_scatter,
                           filters=[
                               GroupFilter(column_name='HB_iteration',
                                           group=str(it)),
                               GroupFilter(column_name='config_info',
                                           group="model_based_pick=False")
                           ])
            line_handles.append(
                p.circle(
                    x='times',
                    y='losses',
                    source=source_scatter,
                    view=view,
                    fill_color={
                        'field': 'colors',
                        'transform': color_mapper
                    },
                    fill_alpha=0.5,
                    line_color='colors',
                    size=20,
                ))
            HB_handles.append(line_handles)
            HB_labels.append('warmstart data' if it in
                             [-1, '-1'] else '{number:0{width}d}'.
                             format(width=max_label_len, number=int(it)))

        # Sort all lists according to label
        HB_iterations, HB_handles, HB_labels = zip(*sorted(
            zip(HB_iterations, HB_handles, HB_labels), key=lambda tup: tup[2]))
        HB_iterations, HB_handles, HB_labels = list(HB_iterations), list(
            HB_handles), list(HB_labels)
        self.logger.debug("HB_iterations to labels: %s",
                          str(list(zip(HB_iterations, HB_labels))))

        chckboxes, select_all, select_none = get_checkbox(
            HB_handles, HB_labels, max_checkbox_length=10)

        callback_color = CustomJS(args=dict(source_multiline=source_multiline,
                                            source_scatter=source_scatter,
                                            cm=color_mapper),
                                  code="""
            var data_multiline = source_multiline.data;
            var data_scatter = source_scatter.data;
            var min_perf = {0};
            var max_perf = {1};
            var min_iter = {2};
            var max_iter = {3};
            if (cb_obj.value == 'performance') {{
                data_multiline['colors'] = data_multiline['colors_performance'];
                data_scatter['colors'] = data_scatter['colors_performance'];
                cm.low = min_perf;
                cm.high = max_perf;
            }} else {{
                data_multiline['colors'] = data_multiline['colors_iteration'];
                data_scatter['colors'] = data_scatter['colors_iteration'];
                cm.low = min_iter;
                cm.high = max_iter;
            }}
            source.change.emit();
            """.format(min_perf, max_perf, min_iter, max_iter))

        select_color = Select(title="Select colors",
                              value="performance",
                              options=["performance", "iteration"],
                              callback=callback_color)

        # Put it all together in a layout (width of checkbox-field sizes with number of elements
        chkbox_width = 650 if len(HB_labels) > 100 else 500 if len(
            HB_labels) > 70 else 400
        layout = row(
            p,
            column(
                *[
                    widgetbox(chkbox,
                              max_width=chkbox_width,
                              width_policy="min") for chkbox in chckboxes
                ],
                row(widgetbox(select_all, width=50),
                    widgetbox(select_none, width=50)),
                widgetbox(select_color, width=200)))
        return layout