Пример #1
0
    def _create_views(self, source, used_configs):
        """Create views in order of plotting, so more interesting views are
        plotted on top. Order of interest:
        default > final-incumbent > incumbent > candidate
          local > random
            num_runs (ascending, more evaluated -> more interesting)
        Individual views are necessary, since bokeh can only plot one
        marker-type (circle, triangle, ...) per 'scatter'-call

        Parameters
        ----------:
        source: ColumnDataSource
            containing relevant information for plotting
        used_configs: List[Configuration]
            configs that are contained in this source. necessary to plot glyphs for the independent runs so they can be
            toggled. not all configs are in every source because of efficiency: no need to have 0-runs configs

        Returns
        -------
        views: List[CDSView]
            views in order of plotting
        views_by_run: Dict[ConfiguratorRun -> List[int]]
            maps each run to a list of indices of the related glyphs in the returned 'views'-list
        markers: List[string]
            markers (to the view with the same index)
        """

        def _get_marker(t, o):
            """ returns marker according to type t and origin o """
            if t == "Default":
                shape = 'triangle'
            elif t == 'Final Incumbent':
                shape = 'inverted_triangle'
            else:
                shape = 'square' if t == "Incumbent" else 'circle'
                shape += '_x' if o.startswith("Acquisition Function") else ''
            return shape

        views, markers = [], []
        views_by_run = {run : [] for run in self.configs_in_run}
        idx = 0
        for t in ['Candidate', 'Incumbent', 'Final Incumbent', 'Default']:
            for o in ['Unknown', 'Random', 'Acquisition Function']:
                for z in sorted(list(set(source.data['zorder'])), key=lambda x: int(x)):
                    for run, configs in self.configs_in_run.items():
                        booleans = [True if c in configs else False for c in used_configs]
                        view = CDSView(source=source, filters=[
                                GroupFilter(column_name='type', group=t),
                                GroupFilter(column_name='origin', group=o),
                                GroupFilter(column_name='zorder', group=z),
                                BooleanFilter(booleans)])
                        views.append(view)  # all views
                        views_by_run[run].append(idx)  # views sorted by runs
                        idx += 1
                        markers.append(_get_marker(t, o))
        self.logger.debug("%d different glyph renderers, %d different zorder-values",
                          len(views), len(set(source.data['zorder'])))
        return (views, views_by_run, markers)
Пример #2
0
    def _plot_create_views(self, source):
        """Create views in order of plotting, so more interesting views are
        plotted on top. Order of interest:
        default > final-incumbent > incumbent > candidate
          local > random
            num_runs (ascending, more evaluated -> more interesting)
        Individual views are necessary, since bokeh can only plot one
        marker-typei (circle, triangle, ...) per 'scatter'-call

        Parameters
        ----------
        source: ColumnDataSource
            containing relevant information for plotting

        Returns
        -------
        views: List[CDSView]
            views in order of plotting
        markers: List[string]
            markers (to the view with the same index)
        """
        def _get_marker(t, o):
            """ returns marker according to type t and origin o """
            if t == "Default":
                shape = 'triangle'
            elif t == 'Final Incumbent':
                shape = 'inverted_triangle'
            else:
                shape = 'square' if t == "Incumbent" else 'circle'
                shape += '_x' if o.startswith("Acquisition Function") else ''
            return shape

        views, markers = [], []
        for t in ['Candidate', 'Incumbent', 'Final Incumbent', 'Default']:
            for o in ['Unknown', 'Random', 'Acquisition Function']:
                for z in sorted(list(set(source.data['zorder'])),
                                key=lambda x: int(x)):
                    views.append(
                        CDSView(source=source,
                                filters=[
                                    GroupFilter(column_name='type', group=t),
                                    GroupFilter(column_name='origin', group=o),
                                    GroupFilter(column_name='zorder', group=z)
                                ]))
                    markers.append(_get_marker(t, o))
        self.logger.debug(
            "%d different glyph renderers, %d different zorder-values",
            len(views), len(set(source.data['zorder'])))
        return (views, markers)
Пример #3
0
    def _create_plot(self,
                     x_range,
                     s_data,
                     x_minor_values,
                     colors,
                     chart_title=None,
                     x_title=None,
                     y_title='Occurrences per 1000 words',
                     x_major_name=None,
                     x2_minor_name=None):
        """Create a figure for the specified x_range and data_range.
        
        Arguments:
        x_range the range of major X values
        s_data the subsection per major X value the corresponding value
        x_minor_values the section names in s_data
        colors a color palette for rendering the sections
        chart_title the title for the chart
        x_title the X axis title
        y_title the Y axis title
        
        Return - the figure
        """
        # Tool palette
        TOOLS = "crosshair,pan,wheel_zoom,box_zoom,reset,save,tap,box_select"

        # Create basic figure
        p = figure(x_range=x_range,
                   plot_width=900,
                   title=chart_title,
                   tools=TOOLS,
                   toolbar_location='above')

        # Set up tool tips hover
        hover_items = []

        # Plot the vbars
        num_bars = len(x_minor_values)
        positions = num_bars + 2
        pos_width = 1 / positions
        bar_width = 0.8 / positions
        start = -(positions / 2) * pos_width + (pos_width / 2)
        litems = []
        for i, s in enumerate(x_minor_values):
            # Create CDSView for plotting and syncing to the Datatable.
            cds_view = CDSView(source=s_data,
                               filters=[GroupFilter(self._x_minor_name, s)])
            offset = start + (i + 1) * pos_width
            bars = p.vbar(x=dodge(self._x_major_name, offset, range=p.x_range),
                          top='per1000',
                          width=bar_width,
                          source=s_data,
                          view=cds_view,
                          fill_color=colors[i])
            # Add legend item for this series
            litems.append(LegendItem(label=s, renderers=[bars]))
            # Add hover tool
            # Note that toggleable is false because each HoverTool gets an icon in the toolbar if it can be toggled
            #  on and off. With large numbers of sections the toolbar is a real mess. For now just turn them off.
            # p.add_tools(HoverTool(tooltips=[(self._x_major_name, '@' + self._x_major_name),
            #                                 (self._x_minor_name, '{}'.format(s)),
            #                                 ("Total Hits", "@{ty}"),
            #                                 ("Hits/1000", "@{y}")],
            #                       renderers=[bars], toggleable=False,
            #                       point_policy='follow_mouse'))
            p.add_tools(
                HoverTool(tooltips=[(self._x_major_name,
                                     '@' + self._x_major_name),
                                    (self._x_minor_name, '{}'.format(s)),
                                    ("Total Hits", "@{Count}"),
                                    ("Hits/1000", "@{per1000}")],
                          renderers=[bars],
                          toggleable=False,
                          point_policy='follow_mouse'))

        legend = Legend(items=litems, location=('center'))
        p.add_layout(legend, 'right')

        # Set axis titles
        p.xaxis.axis_label = x_title
        p.yaxis.axis_label = y_title
        p.xaxis.major_label_orientation = math.pi / 4

        return p
Пример #4
0
    def plot(self):
        """
        Plot performance over time, using all trajectory entries.
        max_time denotes max(wallclock_limit, highest recorded time).
        """
        rh, runs, output_fn, validator = self.rh, self.runs, self.output_fn, self.validator
        # Add lines to be plotted to lines (key-values must be zippable)
        lines = []

        # Get plotting data and create CDS
        if self.bohb_results:
            lines.append(self._get_bohb_line(validator, runs, rh))
            for b in self.bohb_results[0].HB_config['budgets']:
                lines.append(self._get_bohb_line(validator, runs, rh, b))
        else:
            lines.append(self._get_avg(validator, runs, rh))
            lines.extend(self._get_all_runs(validator, runs, rh))

        data = {'name': [], 'time': [], 'mean': [], 'upper': [], 'lower': []}
        hp_names = self.scenario.cs.get_hyperparameter_names()
        for p in hp_names:
            data[p] = []
        for line in lines:
            for t, m, u, l, c in zip(line.time, line.mean, line.upper,
                                     line.lower, line.config):
                if not (np.isfinite(m) and np.isfinite(u) and np.isfinite(l)):
                    self.logger.debug("Why is there a NaN? (%s)", str(line))
                    raise ValueError(
                        "There is a NaN value in your data, this should be filtered out. "
                        "Please report this to github.com/automl/CAVE/issues and provide the "
                        "debug/debug.log and the output of `pip freeze`, if you can."
                    )
                data['name'].append(line.name)
                data['time'].append(t)
                data['mean'].append(m)
                data['upper'].append(u)
                data['lower'].append(l)
                for p in hp_names:
                    data[p].append(c[p] if (c and p in c) else 'inactive')
        source = ColumnDataSource(data=data)

        # Create plot
        x_range = Range1d(min(source.data['time']), max(source.data['time']))
        y_label = 'estimated {}'.format(self.scenario.run_obj if self.scenario.
                                        run_obj != 'quality' else 'cost')
        p = figure(plot_width=700,
                   plot_height=500,
                   tools=['save', 'pan', 'box_zoom', 'wheel_zoom', 'reset'],
                   x_range=x_range,
                   x_axis_type='log',
                   y_axis_type='log'
                   if self.scenario.run_obj == 'runtime' else 'linear',
                   x_axis_label='time (sec)',
                   y_axis_label=y_label,
                   title="Cost over time")

        colors = itertools.cycle(Dark2_5)
        renderers = []
        legend_it = []
        for line, color in zip(lines, colors):
            # CDSview w GroupFilter
            name = line.name
            view = CDSView(
                source=source,
                filters=[GroupFilter(column_name='name', group=str(name))])
            renderers.append([
                p.line('time',
                       'mean',
                       source=source,
                       view=view,
                       line_color=color,
                       visible=True
                       if line.name in ['average', 'all budgets'] else False)
            ])

            # Add to legend
            legend_it.append((name, renderers[-1]))

            if name in ['average', 'all budgets'] or 'budget' in name:
                # Fill area (uncertainty)
                # Defined as sequence of coordinates, so for step-effect double and arange accordingly ([(t0, v0), (t1, v0), (t1, v1), ... (tn, vn-1)])
                band_x = np.append(line.time, line.time[::-1])
                band_y = np.append(line.lower, line.upper[::-1])
                renderers[-1].extend([
                    p.patch(band_x,
                            band_y,
                            color='#7570B3',
                            fill_alpha=0.2,
                            visible=True if line.name
                            in ['average', 'all budgets'] else False)
                ])

        # Tooltips
        tooltips = [("estimated performance", "@mean"), ("at-time", "@time")]
        p.add_tools(
            HoverTool(
                renderers=[i for s in renderers for i in s],
                tooltips=tooltips,
            ))
        # MAKE hovertips stay fixed in position
        #                      callback=CustomJS(code="""
        # var tooltips = document.getElementsByClassName("bk-tooltip");
        # for (var i = 0, len = tooltips.length; i < len; i ++) {
        #     tooltips[i].style.top = ""; // unset what bokeh.js sets
        #     tooltips[i].style.left = "";
        #     tooltips[i].style.bottom = "0px";
        #     tooltips[i].style.left = "0px";
        # }
        # """)))

        # TODO optional: activate different tooltips for different renderers, doesn't work properly
        #tooltips_configs = tooltips[:] + [(p, '@'+p) for p in hp_names]
        #if 'average' in [l.name for l in lines]:
        #    p.add_tools(HoverTool(renderers=[renderers[0]], tooltips=tooltips_avg   ))#, mode='vline'))

        # Wrap renderers in nested lists for checkbox-code
        checkbox, select_all, select_none = get_checkbox(
            renderers, [l[0] for l in legend_it])
        checkbox.active = [0]

        # Tilt tick labels and configure axis labels
        p.xaxis.major_label_orientation = 3 / 4

        p.xaxis.axis_label_text_font_size = p.yaxis.axis_label_text_font_size = "15pt"
        p.xaxis.major_label_text_font_size = p.yaxis.major_label_text_font_size = "12pt"
        p.title.text_font_size = "15pt"

        legend = Legend(
            items=legend_it,
            location='bottom_left',  #(0, -60),
            label_text_font_size="8pt")
        legend.click_policy = "hide"

        p.add_layout(legend, 'right')

        # Assign objects and save png's
        layout = row(
            p,
            column(
                widgetbox(checkbox, width=100),
                row(widgetbox(select_all, width=50),
                    widgetbox(select_none, width=50))))

        output_path = os.path.join(self.output_dir, output_fn)
        export_bokeh(p, output_path, self.logger)
        self.plots.append(output_path)

        return layout
Пример #5
0
    def _plot(self,
              result_object,
              learning_curves,
              hyperparameter_names,
              reset_times=False):
        # Extract information from learning-curve-dict
        times, losses, config_ids, = [], [], []
        for conf_id, learning_curves in learning_curves.items():
            # self.logger.debug("Config ID: %s, learning_curves: %s", str(conf_id), str(learning_curves))
            for lc in learning_curves:
                if len(lc) == 0:
                    continue
                tmp = list(
                    zip(*[(time, loss) for time, loss in lc
                          if np.isfinite(loss) and loss is not None]))
                if len(tmp) == 0:
                    self.logger.debug(
                        "Probably filtered NaNs or None's.., skipping %s, data %s",
                        str(conf_id), str(lc))
                    continue
                times.append(tmp[0])
                losses.append(tmp[1])
                config_ids.append(conf_id)

        if reset_times:
            times = [np.array(ts) - ts[0] for ts in times]

        # Prepare ColumnDataSource
        data = OrderedDict([
            ('config_id', []),
            ('config_info', []),
            ('times', []),
            ('losses', []),
            ('duration', []),
            ('HB_iteration', []),
            ('colors', []),
            ('colors_performance', []),
            ('colors_iteration', []),
        ])
        for hp in hyperparameter_names:
            data[hp] = []

        # Populate
        id2conf = result_object.get_id2config_mapping()
        for counter, c_id in enumerate(config_ids):
            if not (len(times[counter]) == len(losses[counter])):
                raise ValueError()
            longest_run = self.get_longest_run(c_id, result_object)
            if not longest_run:
                continue
            data['config_id'].append(str(c_id))
            try:
                conf_info = '\n'.join([
                    str(k) + "=" + str(v)
                    for k, v in sorted(id2conf[c_id]['config_info'].items())
                ])
            except KeyError:
                conf_info = 'Not Available'
            data['config_info'].append(conf_info)
            data['times'].append(times[counter])
            data['losses'].append(losses[counter])
            if longest_run:
                data['duration'].append(
                    longest_run['time_stamps']['finished'] -
                    longest_run['time_stamps']['started'])
            else:
                data['duration'].append('N/A')
            data['HB_iteration'].append(str(c_id[0]))
            for hp in hyperparameter_names:
                try:
                    data[hp].append(id2conf[c_id]['config'][hp])
                except KeyError:
                    data[hp].append("None")
            data['colors'].append(losses[counter][-1])
            data['colors_performance'].append(losses[counter][-1])
            data['colors_iteration'].append(c_id[0])

        # Tooltips
        tooltips = [(key, '@' + key) for key in data.keys() if key not in [
            'times', 'duration', 'colors', 'colors_performance',
            'colors_iteration'
        ]]
        tooltips.insert(4, ('duration (sec)', '@duration'))
        tooltips.insert(5, ('Configuration', ' '))
        hover = HoverTool(tooltips=tooltips)

        # Create sources
        source_multiline = ColumnDataSource(data=data)
        # Special source for scattering points, since times and losses for multi_line are nested lists
        scatter_data = {key: [] for key in data.keys()}
        for idx, c_id in enumerate(data['config_id']):
            for t, l in zip(data['times'][idx], data['losses'][idx]):
                scatter_data['times'].append(t)
                scatter_data['losses'].append(l)
                for key in list(data.keys()):
                    if key in ['times', 'losses']:
                        continue
                    scatter_data[key].append(data[key][idx])
        source_scatter = ColumnDataSource(data=scatter_data)

        # Color
        min_perf, max_perf = min([loss[-1] for loss in data['losses']]), max(
            [loss[-1] for loss in data['losses']])
        min_iter, max_iter = min([int(i) for i in data['HB_iteration']]), max(
            [int(i) for i in data['HB_iteration']])
        color_mapper = LinearColorMapper(palette=Spectral11,
                                         low=min_perf,
                                         high=max_perf)

        # Create plot
        y_axis_type = "log" if len(
            [a for a in scatter_data['losses'] if a <= 0]) == 0 else 'linear'

        x_min, x_max = min(scatter_data['times']), max(scatter_data['times'])
        x_pad = (x_max - x_min) / 10
        x_min -= x_pad
        x_max += x_pad
        y_min, y_max = min(scatter_data['losses']), max(scatter_data['losses'])
        y_pad = (y_max - y_min) / 10
        y_min -= (
            y_min / 10
        ) if y_axis_type == 'log' else y_pad  # because this must not be below 0 if it's a logscale
        y_max += y_pad * 10 if y_axis_type == 'log' else y_pad
        p = figure(
            plot_height=500,
            plot_width=600,
            y_axis_type=y_axis_type,
            tools=[hover, 'save', 'pan', 'wheel_zoom', 'box_zoom', 'reset'],
            x_axis_label='Time',
            y_axis_label='Cost',
            x_range=Range1d(x_min, x_max, bounds='auto'),
            y_range=Range1d(y_min, y_max, bounds='auto'),
        )

        # Plot per HB_iteration, each config individually
        HB_iterations = sorted(set(data['HB_iteration']))
        max_label_len = max([len(iteration) for iteration in HB_iterations])
        HB_handles, HB_labels = [], []
        self.logger.debug(
            "Assuming config_info to be either \"model_based_pick=True\" or \"model_based_pick=False\""
        )
        for it in HB_iterations:
            line_handles = []
            view = CDSView(source=source_multiline,
                           filters=[
                               GroupFilter(column_name='HB_iteration',
                                           group=str(it))
                           ])
            line_handles.append(
                p.multi_line(
                    xs='times',
                    ys='losses',
                    source=source_multiline,
                    view=view,
                    color={
                        'field': 'colors',
                        'transform': color_mapper
                    },
                    alpha=0.5,
                    line_width=5,
                ))
            # Separate modelbased and rand   om
            view = CDSView(source=source_scatter,
                           filters=[
                               GroupFilter(column_name='HB_iteration',
                                           group=str(it)),
                               GroupFilter(column_name='config_info',
                                           group="model_based_pick=True")
                           ])
            line_handles.append(
                p.circle_x(
                    x='times',
                    y='losses',
                    source=source_scatter,
                    view=view,
                    fill_color={
                        'field': 'colors',
                        'transform': color_mapper
                    },
                    fill_alpha=0.5,
                    line_color='colors',
                    size=20,
                ))
            view = CDSView(source=source_scatter,
                           filters=[
                               GroupFilter(column_name='HB_iteration',
                                           group=str(it)),
                               GroupFilter(column_name='config_info',
                                           group="model_based_pick=False")
                           ])
            line_handles.append(
                p.circle(
                    x='times',
                    y='losses',
                    source=source_scatter,
                    view=view,
                    fill_color={
                        'field': 'colors',
                        'transform': color_mapper
                    },
                    fill_alpha=0.5,
                    line_color='colors',
                    size=20,
                ))
            HB_handles.append(line_handles)
            HB_labels.append('warmstart data' if it in
                             [-1, '-1'] else '{number:0{width}d}'.
                             format(width=max_label_len, number=int(it)))

        # Sort all lists according to label
        HB_iterations, HB_handles, HB_labels = zip(*sorted(
            zip(HB_iterations, HB_handles, HB_labels), key=lambda tup: tup[2]))
        HB_iterations, HB_handles, HB_labels = list(HB_iterations), list(
            HB_handles), list(HB_labels)
        self.logger.debug("HB_iterations to labels: %s",
                          str(list(zip(HB_iterations, HB_labels))))

        chckboxes, select_all, select_none = get_checkbox(
            HB_handles, HB_labels, max_checkbox_length=10)

        callback_color = CustomJS(args=dict(source_multiline=source_multiline,
                                            source_scatter=source_scatter,
                                            cm=color_mapper),
                                  code="""
            var data_multiline = source_multiline.data;
            var data_scatter = source_scatter.data;
            var min_perf = {0};
            var max_perf = {1};
            var min_iter = {2};
            var max_iter = {3};
            if (cb_obj.value == 'performance') {{
                data_multiline['colors'] = data_multiline['colors_performance'];
                data_scatter['colors'] = data_scatter['colors_performance'];
                cm.low = min_perf;
                cm.high = max_perf;
            }} else {{
                data_multiline['colors'] = data_multiline['colors_iteration'];
                data_scatter['colors'] = data_scatter['colors_iteration'];
                cm.low = min_iter;
                cm.high = max_iter;
            }}
            source.change.emit();
            """.format(min_perf, max_perf, min_iter, max_iter))

        select_color = Select(title="Select colors",
                              value="performance",
                              options=["performance", "iteration"],
                              callback=callback_color)

        # Put it all together in a layout (width of checkbox-field sizes with number of elements
        chkbox_width = 650 if len(HB_labels) > 100 else 500 if len(
            HB_labels) > 70 else 400
        layout = row(
            p,
            column(
                *[
                    widgetbox(chkbox,
                              max_width=chkbox_width,
                              width_policy="min") for chkbox in chckboxes
                ],
                row(widgetbox(select_all, width=50),
                    widgetbox(select_none, width=50)),
                widgetbox(select_color, width=200)))
        return layout