Пример #1
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the (displayed) silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    add_scores = settings.Setting(False)
    auto_commit = settings.Setting(True)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan)]

    graph_name = "scene"
    buttons_area_orientation = Qt.Vertical

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")
        singleton_clusters_all = Msg("All clusters are singletons")
        memory_error = Msg("Not enough memory")
        value_error = Msg("Distances could not be computed: '{}'")

    class Warning(widget.OWWidget.Warning):
        missing_cluster_assignment = Msg(
            "{} instance{s} omitted (missing cluster assignment)")

    def __init__(self):
        super().__init__()
        #: The input data
        self.data = None  # type: Optional[Orange.data.Table]
        #: Distance matrix computed from data
        self._matrix = None  # type: Optional[Orange.misc.DistMatrix]
        #: An bool mask (size == len(data)) indicating missing group/cluster
        #: assignments
        self._mask = None  # type: Optional[np.ndarray]
        #: An array of cluster/group labels for instances with valid group
        #: assignment
        self._labels = None  # type: Optional[np.ndarray]
        #: An array of silhouette scores for instances with valid group
        #: assignment
        self._silhouette = None  # type: Optional[np.ndarray]
        self._silplot = None  # type: Optional[SilhouettePlot]

        gui.comboBox(self.controlArea,
                     self,
                     "distance_idx",
                     box="Distance",
                     items=[name for name, _ in OWSilhouettePlot.Distances],
                     orientation=Qt.Horizontal,
                     callback=self._invalidate_distances)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(box,
                                           self,
                                           "cluster_var_idx",
                                           contentsLength=14,
                                           addSpace=4,
                                           callback=self._invalidate_scores)
        gui.checkBox(box,
                     self,
                     "group_by_cluster",
                     "Group by cluster",
                     callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(box,
                    self,
                    "bar_size",
                    minValue=1,
                    maxValue=10,
                    step=1,
                    callback=self._update_bar_size,
                    addSpace=6)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(box,
                                          self,
                                          "annotation_var_idx",
                                          contentsLength=14,
                                          callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.separator(self.buttonsArea)
        box = gui.vBox(self.buttonsArea, "Output")
        # Thunk the call to commit to call conditional commit
        gui.checkBox(box,
                     self,
                     "add_scores",
                     "Add silhouette scores",
                     callback=lambda: self.commit())
        gui.auto_commit(box,
                        self,
                        "auto_commit",
                        "Commit",
                        auto_label="Auto commit",
                        box=False)
        # Ensure that the controlArea is not narrower than buttonsArea
        self.controlArea.layout().addWidget(self.buttonsArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """
        Set the input dataset.
        """
        self.closeContext()
        self.clear()
        error_msg = ""
        warning_msg = ""
        candidatevars = []
        if data is not None:
            candidatevars = [
                v for v in data.domain.variables + data.domain.metas
                if v.is_discrete and len(v.values) >= 2
            ]
            if not candidatevars:
                error_msg = "Input does not have any suitable labels."
                data = None

        self.data = data
        if data is not None:
            self.cluster_var_model[:] = candidatevars
            if data.domain.class_var in candidatevars:
                self.cluster_var_idx = \
                    candidatevars.index(data.domain.class_var)
            else:
                self.cluster_var_idx = 0

            annotvars = [var for var in data.domain.metas if var.is_string]
            self.annotation_var_model[:] = ["None"] + annotvars
            self.annotation_var_idx = 1 if len(annotvars) else 0
            self.openContext(Orange.data.Domain(candidatevars))

        self.error(error_msg)
        self.warning(warning_msg)

    def handleNewSignals(self):
        if self.data is not None:
            self._update()
            self._replot()

        self.unconditional_commit()

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self._matrix = None
        self._mask = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()
        self.Error.clear()
        self.Warning.clear()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = self._mask = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit()

    def _update(self):
        # Update/recompute the distances/scores as required
        self._clear_messages()

        if self.data is None or not len(self.data):
            self._reset_all()
            return

        if self._matrix is None and self.data is not None:
            _, metric = self.Distances[self.distance_idx]
            try:
                self._matrix = np.asarray(metric(self.data))
            except MemoryError:
                self.Error.memory_error()
                return
            except ValueError as err:
                self.Error.value_error(str(err))
                return

        self._update_labels()

    def _reset_all(self):
        self._mask = None
        self._silhouette = None
        self._labels = None
        self._matrix = None
        self._clear_scene()

    def _clear_messages(self):
        self.Error.clear()
        self.Warning.missing_cluster_assignment.clear()

    def _update_labels(self):
        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = np.asarray(labels, dtype=float)
        mask = np.isnan(labels)
        labels = labels.astype(int)
        labels = labels[~mask]

        labels_unq, _ = np.unique(labels, return_counts=True)

        if len(labels_unq) < 2:
            self.Error.need_two_clusters()
            labels = silhouette = mask = None
        elif len(labels_unq) == len(labels):
            self.Error.singleton_clusters_all()
            labels = silhouette = mask = None
        else:
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix[~mask, :][:, ~mask], labels, metric="precomputed")
        self._mask = mask
        self._labels = labels
        self._silhouette = silhouette

        if labels is not None:
            count_missing = np.count_nonzero(mask)
            if count_missing:
                self.Warning.missing_cluster_assignment(
                    count_missing, s="s" if count_missing > 1 else "")

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(not visible
                                           and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values,
                                  var.colors)
            else:
                silplot.setScores(self._silhouette,
                                  np.zeros(len(self._silhouette), dtype=int),
                                  [""], np.array([[63, 207, 207]]))

            self.scene.addItem(silplot)
            self._update_annotations()
            silplot.selectionChanged.connect(self.commit)
            silplot.layout().activate()
            self._update_scene_rect()
            silplot.geometryChanged.connect(self._update_scene_rect)

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(self.bar_size < 5
                                           and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                if self._mask is not None:
                    assert column.shape == self._mask.shape
                    column = column[~self._mask]
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def _update_scene_rect(self):
        self.scene.setSceneRect(self._silplot.geometry())

    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = np.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                assert (np.diff(indices) > 0).all(), "strictly increasing"
                if self._mask is not None:
                    indices = np.flatnonzero(~self._mask)[indices]
                selectedmask[indices] = True

            if self._mask is not None:
                scores = np.full(shape=selectedmask.shape, fill_value=np.nan)
                scores[~self._mask] = self._silhouette
            else:
                scores = self._silhouette

            silhouette_var = None
            if self.add_scores:
                var = self.cluster_var_model[self.cluster_var_idx]
                silhouette_var = Orange.data.ContinuousVariable(
                    "Silhouette ({})".format(escape(var.name)))
                domain = Orange.data.Domain(
                    self.data.domain.attributes, self.data.domain.class_vars,
                    self.data.domain.metas + (silhouette_var, ))
                data = self.data.transform(domain)
            else:
                domain = self.data.domain
                data = self.data

            if np.count_nonzero(selectedmask):
                selected = self.data.from_table(domain, self.data,
                                                np.flatnonzero(selectedmask))

            if self.add_scores:
                if selected is not None:
                    selected[:, silhouette_var] = np.c_[scores[selectedmask]]
                data[:, silhouette_var] = np.c_[scores]

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Пример #2
0
class OWColor(widget.OWWidget):
    name = "Color"
    description = "Set color legend for variables."
    icon = "icons/Colors.svg"

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    settingsHandler = settings.PerfectDomainContextHandler(
        match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL)
    disc_data = settings.ContextSetting([])
    cont_data = settings.ContextSetting([])
    color_settings = settings.Setting(None)
    selected_schema_index = settings.Setting(0)
    auto_apply = settings.Setting(True)

    want_main_area = False

    def __init__(self):
        super().__init__()
        self.data = None
        self.orig_domain = self.domain = None
        self.disc_colors = []
        self.cont_colors = []

        box = gui.hBox(self.controlArea, "Discrete Variables")
        self.disc_model = DiscColorTableModel()
        disc_view = self.disc_view = DiscreteTable(self.disc_model)
        disc_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.ResizeToContents)
        self.disc_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(disc_view)

        box = gui.hBox(self.controlArea, "Numeric Variables")
        self.cont_model = ContColorTableModel()
        cont_view = self.cont_view = ContinuousTable(self, self.cont_model)
        cont_view.setColumnWidth(1, 256)
        self.cont_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(cont_view)

        box = gui.auto_commit(self.controlArea,
                              self,
                              "auto_apply",
                              "Apply",
                              orientation=Qt.Horizontal,
                              checkbox_label="Apply automatically")
        box.layout().insertSpacing(0, 20)
        box.layout().insertWidget(0, self.report_button)

    def _create_proxies(self, variables):
        part_vars = []
        for var in variables:
            if var.is_discrete or var.is_continuous:
                var = var.make_proxy()
                if var.is_discrete:
                    var.values = var.values[:]
                    self.disc_colors.append(var)
                else:
                    self.cont_colors.append(var)
            part_vars.append(var)
        return part_vars

    @Inputs.data
    def set_data(self, data):
        """Handle data input signal"""
        self.closeContext()
        self.disc_colors = []
        self.cont_colors = []
        if data is None:
            self.data = self.domain = None
        else:
            domain = self.orig_domain = data.domain
            domain = Orange.data.Domain(
                self._create_proxies(domain.attributes),
                self._create_proxies(domain.class_vars),
                self._create_proxies(domain.metas))
            self.openContext(data)
            self.data = data.transform(domain)
            self.disc_model.set_data(self.disc_colors)
            self.cont_model.set_data(self.cont_colors)
            self.disc_view.resizeColumnsToContents()
            self.cont_view.resizeColumnsToContents()
        self.commit()

    def storeSpecificSettings(self):
        # Store the colors that were changed -- but not others
        self.current_context.disc_data = \
            [(var.name, var.values, "colors" in var.attributes and var.colors)
             for var in self.disc_colors]
        self.current_context.cont_data = \
            [(var.name, "colors" in var.attributes and var.colors)
             for var in self.cont_colors]

    def retrieveSpecificSettings(self):
        disc_data = getattr(self.current_context, "disc_data", ())
        for var, (name, values, colors) in zip(self.disc_colors, disc_data):
            var.name = name
            var.values = values[:]
            if colors is not False:
                var.colors = colors
        cont_data = getattr(self.current_context, "cont_data", ())
        for var, (name, colors) in zip(self.cont_colors, cont_data):
            var.name = name
            if colors is not False:
                var.colors = colors

    def _on_data_changed(self, *args):
        self.commit()

    def commit(self):
        self.Outputs.data.send(self.data)

    def send_report(self):
        """Send report"""
        def _report_variables(variables, orig_variables):
            from Orange.canvas.report import colored_square as square

            def was(n, o):
                return n if n == o else "{} (was: {})".format(n, o)

            # definition of td element for continuous gradient
            # with support for pre-standard css (needed at least for Qt 4.8)
            max_values = max(
                (len(var.values) for var in variables if var.is_discrete),
                default=1)
            defs = ("-webkit-", "-o-", "-moz-", "")
            cont_tpl = '<td colspan="{}">' \
                       '<span class="legend-square" style="width: 100px; '.\
                format(max_values) + \
                " ".join(map(
                    "background: {}linear-gradient("
                    "left, rgb({{}}, {{}}, {{}}), {{}}rgb({{}}, {{}}, {{}}));"
                    .format, defs)) + \
                '"></span></td>'

            rows = ""
            for var, ovar in zip(variables, orig_variables):
                if var.is_discrete:
                    values = "    \n".join(
                        "<td>{} {}</td>".format(square(
                            *var.colors[i]), was(value, ovalue))
                        for i, (
                            value,
                            ovalue) in enumerate(zip(var.values, ovar.values)))
                elif var.is_continuous:
                    col = var.colors
                    colors = col[0][:3] + ("black, " * col[2], ) + col[1][:3]
                    values = cont_tpl.format(*colors * len(defs))
                else:
                    continue
                name = was(var.name, ovar.name)
                rows += '<tr style="height: 2em">\n' \
                        '  <th style="text-align: right">{}</th>{}\n</tr>\n'. \
                    format(name, values)
            return rows

        if not self.data:
            return
        domain = self.data.domain
        orig_domain = self.orig_domain
        sections = ((name, _report_variables(vars, ovars))
                    for name, vars, ovars in (("Features", domain.attributes,
                                               orig_domain.attributes),
                                              ("Outcome" + "s" *
                                               (len(domain.class_vars) > 1),
                                               domain.class_vars,
                                               orig_domain.class_vars),
                                              ("Meta attributes", domain.metas,
                                               orig_domain.metas)))
        table = "".join("<tr><th>{}</th></tr>{}".format(name, rows)
                        for name, rows in sections if rows)
        if table:
            self.report_raw("<table>{}</table>".format(table))
Пример #3
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Silhouette Plot"

    icon = "icons/Silhouette.svg"

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default),
               ("Other Data", Orange.data.Table)]

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    add_scores = settings.Setting(False)
    auto_commit = settings.Setting(False)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan)]

    def __init__(self):
        super().__init__()

        self.data = None
        self._effective_data = None
        self._matrix = None
        self._silhouette = None
        self._labels = None
        self._silplot = None

        box = gui.vBox(
            self.controlArea,
            "Settings",
        )
        gui.comboBox(box,
                     self,
                     "distance_idx",
                     label="Distance",
                     items=[name for name, _ in OWSilhouettePlot.Distances],
                     callback=self._invalidate_distances)
        self.cluster_var_cb = gui.comboBox(box,
                                           self,
                                           "cluster_var_idx",
                                           label="Cluster",
                                           callback=self._invalidate_scores)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        gui.spin(box,
                 self,
                 "bar_size",
                 minv=1,
                 maxv=10,
                 label="Bar Size",
                 callback=self._update_bar_size)

        gui.checkBox(box,
                     self,
                     "group_by_cluster",
                     "Group by cluster",
                     callback=self._replot)

        self.annotation_cb = gui.comboBox(box,
                                          self,
                                          "annotation_var_idx",
                                          label="Annotations",
                                          callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)

        gui.rubber(self.controlArea)

        box = gui.vBox(self.controlArea, "Output")
        gui.checkBox(
            box,
            self,
            "add_scores",
            "Add silhouette scores",
        )
        gui.auto_commit(box,
                        self,
                        "auto_commit",
                        "Commit",
                        auto_label="Auto commit",
                        box=False)

        self.scene = QtGui.QGraphicsScene()
        self.view = QtGui.QGraphicsView(self.scene)
        self.view.setRenderHint(QtGui.QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QtCore.QSize(600, 720))

    @check_sql_input
    def set_data(self, data):
        """
        Set the input data set.
        """
        self.closeContext()
        self.clear()
        error_msg = ""
        warning_msg = ""
        candidatevars = []
        if data is not None:
            candidatevars = [
                v for v in data.domain.variables + data.domain.metas
                if v.is_discrete and len(v.values) >= 2
            ]
            if not candidatevars:
                error_msg = "Input does not have any suitable cluster labels."
                data = None

        if data is not None:
            ncont = sum(v.is_continuous for v in data.domain.attributes)
            ndiscrete = len(data.domain.attributes) - ncont
            if ncont == 0:
                data = None
                error_msg = "No continuous columns"
            elif ncont < len(data.domain.attributes):
                warning_msg = "{0} discrete columns will not be used for " \
                              "distance computation".format(ndiscrete)

        self.data = data
        if data is not None:
            self.cluster_var_model[:] = candidatevars
            if data.domain.class_var in candidatevars:
                self.cluster_var_idx = candidatevars.index(
                    data.domain.class_var)
            else:
                self.cluster_var_idx = 0

            annotvars = [var for var in data.domain.metas if var.is_string]
            self.annotation_var_model[:] = ["None"] + annotvars
            self.annotation_var_idx = 1 if len(annotvars) else 0
            self._effective_data = Orange.distance._preprocess(data)
            self.openContext(Orange.data.Domain(candidatevars))

        self.error(0, error_msg)
        self.warning(0, warning_msg)

    def handleNewSignals(self):
        if self._effective_data is not None:
            self._update()
            self._replot()

        self.unconditional_commit()

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self._effective_data = None
        self._matrix = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit()

    def _update(self):
        # Update/recompute the distances/scores as required
        if self.data is None:
            self._silhouette = None
            self._labels = None
            self._matrix = None
            self._clear_scene()
            return

        if self._matrix is None and self._effective_data is not None:
            _, metric = self.Distances[self.distance_idx]
            self._matrix = numpy.asarray(metric(self._effective_data))

        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = labels.astype(int)
        _, counts = numpy.unique(labels, return_counts=True)
        if numpy.count_nonzero(counts) >= 2:
            self.error(1, "")
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix, labels, metric="precomputed")
        else:
            self.error(1, "Need at least 2 clusters with non zero counts")
            labels = silhouette = None

        self._labels = labels
        self._silhouette = silhouette

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            silplot = SilhouettePlot()
            silplot.setBarHeight(self.bar_size)
            silplot.setRowNamesVisible(self.bar_size >= 5)

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values)
            else:
                silplot.setScores(
                    self._silhouette,
                    numpy.zeros(len(self._silhouette), dtype=int), [""])

            self.scene.addItem(silplot)
            self._silplot = silplot
            self._update_annotations()

            silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize))
            silplot.selectionChanged.connect(self.commit)

            self.scene.setSceneRect(
                QRectF(QtCore.QPointF(0, 0),
                       self._silplot.effectiveSizeHint(Qt.PreferredSize)))

    def _update_bar_size(self):
        if self._silplot is not None:
            self._silplot.setBarHeight(self.bar_size)
            self._silplot.setRowNamesVisible(self.bar_size >= 5)

            self.scene.setSceneRect(
                QRectF(QtCore.QPointF(0, 0),
                       self._silplot.effectiveSizeHint(Qt.PreferredSize)))

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = other = None
        if self.data is not None:
            selectedmask = numpy.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                selectedmask[indices] = True
            scores = self._silhouette
            silhouette_var = None
            if self.add_scores:
                var = self.cluster_var_model[self.cluster_var_idx]
                silhouette_var = Orange.data.ContinuousVariable(
                    "Silhouette ({})".format(escape(var.name)))
                domain = Orange.data.Domain(
                    self.data.domain.attributes, self.data.domain.class_vars,
                    self.data.domain.metas + (silhouette_var, ))
            else:
                domain = self.data.domain

            if numpy.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, numpy.flatnonzero(selectedmask))

            if numpy.count_nonzero(~selectedmask):
                other = self.data.from_table(domain, self.data,
                                             numpy.flatnonzero(~selectedmask))

            if self.add_scores:
                if selected is not None:
                    selected[:,
                             silhouette_var] = numpy.c_[scores[selectedmask]]
                if other is not None:
                    other[:, silhouette_var] = numpy.c_[scores[~selectedmask]]

        self.send("Selected Data", selected)
        self.send("Other Data", other)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Пример #4
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300
    keywords = []

    class Inputs:
        data = Input("Data", (Orange.data.Table, Orange.misc.DistMatrix))

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the (displayed) silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    auto_commit = settings.Setting(True)

    pending_selection = settings.Setting(None, schema_only=True)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan),
                 ("Cosine", Orange.distance.Cosine)]

    graph_name = "scene"

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")
        singleton_clusters_all = Msg("All clusters are singletons")
        memory_error = Msg("Not enough memory")
        value_error = Msg("Distances could not be computed: '{}'")
        input_validation_error = Msg("{}")

    class Warning(widget.OWWidget.Warning):
        missing_cluster_assignment = Msg(
            "{} instance{s} omitted (missing cluster assignment)")
        nan_distances = Msg("{} instance{s} omitted (undefined distances)")
        ignoring_categorical = Msg("Ignoring categorical features")

    def __init__(self):
        super().__init__()
        #: The input data
        self.data = None         # type: Optional[Orange.data.Table]
        #: The input distance matrix (if present)
        self.distances = None  # type: Optional[Orange.misc.DistMatrix]
        #: The effective distance matrix (is self.distances or computed from
        #: self.data depending on input)
        self._matrix = None      # type: Optional[Orange.misc.DistMatrix]
        #: An bool mask (size == len(data)) indicating missing group/cluster
        #: assignments
        self._mask = None        # type: Optional[np.ndarray]
        #: An array of cluster/group labels for instances with valid group
        #: assignment
        self._labels = None      # type: Optional[np.ndarray]
        #: An array of silhouette scores for instances with valid group
        #: assignment
        self._silhouette = None  # type: Optional[np.ndarray]
        self._silplot = None     # type: Optional[SilhouettePlot]

        controllayout = self.controlArea.layout()
        assert isinstance(controllayout, QVBoxLayout)
        self._distances_gui_box = distbox = gui.widgetBox(
            None, "Distance"
        )
        self._distances_gui_cb = gui.comboBox(
            distbox, self, "distance_idx",
            items=[name for name, _ in OWSilhouettePlot.Distances],
            orientation=Qt.Horizontal, callback=self._invalidate_distances)
        controllayout.addWidget(distbox)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(
            box, self, "cluster_var_idx", contentsLength=14,
            searchable=True, callback=self._invalidate_scores
        )
        gui.checkBox(
            box, self, "group_by_cluster", "Group by cluster",
            callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(
            box, self, "bar_size", minValue=1, maxValue=10, step=1,
            callback=self._update_bar_size)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(
            box, self, "annotation_var_idx", contentsLength=14,
            callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.auto_send(self.buttonsArea, self, "auto_commit")

        self.scene = GraphicsScene(self)
        self.view = StickyGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

        self.settingsAboutToBePacked.connect(self.pack_settings)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    def pack_settings(self):
        if self.data and self._silplot is not None:
            self.pending_selection = list(self._silplot.selection())
        else:
            self.pending_selection = None

    @Inputs.data
    @check_sql_input
    def set_data(self, data: Union[Table, DistMatrix, None]):
        """
        Set the input dataset or distance matrix.
        """
        self.closeContext()
        self.clear()
        try:
            if isinstance(data, Orange.misc.DistMatrix):
                self._set_distances(data)
            elif isinstance(data, Orange.data.Table):
                self._set_table(data)
            else:
                self.distances = None
                self.data = None
        except InputValidationError as err:
            self.Error.input_validation_error(err.message)
            self.distances = None
            self.data = None

    def _set_table(self, data: Table):
        self._setup_control_models(data.domain)
        self.data = data
        self.distances = None

    def _set_distances(self, distances: DistMatrix):
        if isinstance(distances.row_items, Orange.data.Table) and \
                distances.axis == 1:
            data = distances.row_items
        else:
            raise ValidationError("Input matrix does not have associated data")

        if data is not None:
            self._setup_control_models(data.domain)
            self.distances = distances
            self.data = data

    def handleNewSignals(self):
        if not self._is_empty():
            self._update()
            self._replot()
            if self.pending_selection is not None and self._silplot is not None:
                # If selection contains indices that are too large, the data
                # file must had been modified, so we ignore selection
                if max(self.pending_selection, default=-1) < len(self.data):
                    self._silplot.setSelection(np.array(self.pending_selection))
                self.pending_selection = None

        # Disable/enable the Distances GUI controls if applicable
        self._distances_gui_box.setEnabled(self.distances is None)

        self.commit.now()

    def _setup_control_models(self, domain: Domain):
        groupvars = [
            v for v in domain.variables + domain.metas
            if v.is_discrete and len(v.values) >= 2]
        if not groupvars:
            raise NoGroupVariable()
        self.cluster_var_model[:] = groupvars
        if domain.class_var in groupvars:
            self.cluster_var_idx = groupvars.index(domain.class_var)
        else:
            self.cluster_var_idx = 0
        annotvars = [var for var in domain.variables + domain.metas
                     if var.is_string or var.is_discrete]
        self.annotation_var_model[:] = ["None"] + annotvars
        self.annotation_var_idx = 1 if annotvars else 0
        self.openContext(Orange.data.Domain(groupvars))

    def _is_empty(self) -> bool:
        # Is empty (does not have any input).
        return (self.data is None or len(self.data) == 0) \
               and self.distances is None

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self.distances = None
        self._matrix = None
        self._mask = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()
        self.Error.clear()
        self.Warning.clear()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self.view.setSceneRect(QRectF())
        self.view.setHeaderSceneRect(QRectF())
        self.view.setFooterSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = self._mask = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit.deferred()

    def _ensure_matrix(self):
        # ensure self._matrix is computed if necessary
        if self._is_empty():
            return
        if self._matrix is None:
            if self.distances is not None:
                self._matrix = np.asarray(self.distances)
            elif self.data is not None:
                data = self.data
                _, metric = self.Distances[self.distance_idx]
                if not metric.supports_discrete and any(
                        a.is_discrete for a in data.domain.attributes):
                    self.Warning.ignoring_categorical()
                    data = Orange.distance.remove_discrete_features(data)
                try:
                    self._matrix = np.asarray(metric(data))
                except MemoryError:
                    self.Error.memory_error()
                    return
                except ValueError as err:
                    self.Error.value_error(str(err))
                    return
            else:
                assert False, "invalid state"

    def _update(self):
        # Update/recompute the effective distances and scores as required.
        self._clear_messages()
        if self._is_empty():
            self._reset_all()
            return

        self._ensure_matrix()
        if self._matrix is None:
            return

        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = np.asarray(labels, dtype=float)
        cluster_mask = np.isnan(labels)
        dist_mask = np.isnan(self._matrix).all(axis=0)
        mask = cluster_mask | dist_mask
        labels = labels.astype(int)
        labels = labels[~mask]

        labels_unq = np.unique(labels)

        if len(labels_unq) < 2:
            self.Error.need_two_clusters()
            labels = silhouette = mask = None
        elif len(labels_unq) == len(labels):
            self.Error.singleton_clusters_all()
            labels = silhouette = mask = None
        else:
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix[~mask, :][:, ~mask], labels, metric="precomputed")
        self._mask = mask
        self._labels = labels
        self._silhouette = silhouette

        if mask is not None:
            count_missing = np.count_nonzero(cluster_mask)
            if count_missing:
                self.Warning.missing_cluster_assignment(
                    count_missing, s="s" if count_missing > 1 else "")
            count_nandist = np.count_nonzero(dist_mask)
            if count_nandist:
                self.Warning.nan_distances(
                    count_nandist, s="s" if count_nandist > 1 else "")

    def _reset_all(self):
        self._mask = None
        self._silhouette = None
        self._labels = None
        self._matrix = None
        self._clear_scene()

    def _clear_messages(self):
        self.Error.clear()
        self.Warning.clear()

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(
            not visible and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values,
                                  var.colors)
            else:
                silplot.setScores(
                    self._silhouette,
                    np.zeros(len(self._silhouette), dtype=int),
                    [""], np.array([[63, 207, 207]])
                )

            self.scene.addItem(silplot)
            self._update_annotations()
            silplot.selectionChanged.connect(self.commit.deferred)
            silplot.layout().activate()
            self._update_scene_rect()
            silplot.geometryChanged.connect(self._update_scene_rect)

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(
            self.bar_size < 5 and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                if self._mask is not None:
                    assert column.shape == self._mask.shape
                    # pylint: disable=invalid-unary-operand-type
                    column = column[~self._mask]
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def _update_scene_rect(self):
        geom = self._silplot.geometry()
        self.scene.setSceneRect(geom)
        self.view.setSceneRect(geom)

        header = self._silplot.topScaleItem()
        footer = self._silplot.bottomScaleItem()

        def extend_horizontal(rect):
            # type: (QRectF) -> QRectF
            rect = QRectF(rect)
            rect.setLeft(geom.left())
            rect.setRight(geom.right())
            return rect

        margin = 3
        if header is not None:
            self.view.setHeaderSceneRect(
                extend_horizontal(header.geometry().adjusted(0, 0, 0, margin)))
        if footer is not None:
            self.view.setFooterSceneRect(
                extend_horizontal(footer.geometry().adjusted(0, -margin, 0, 0)))

    @gui.deferred
    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = np.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                assert (np.diff(indices) > 0).all(), "strictly increasing"
                if self._mask is not None:
                    # pylint: disable=invalid-unary-operand-type
                    indices = np.flatnonzero(~self._mask)[indices]
                selectedmask[indices] = True

            if self._mask is not None:
                scores = np.full(shape=selectedmask.shape,
                                 fill_value=np.nan)
                # pylint: disable=invalid-unary-operand-type
                scores[~self._mask] = self._silhouette
            else:
                scores = self._silhouette

            var = self.cluster_var_model[self.cluster_var_idx]

            domain = self.data.domain
            proposed = "Silhouette ({})".format(escape(var.name))
            names = [var.name for var in itertools.chain(domain.attributes,
                                                         domain.class_vars,
                                                         domain.metas)]
            unique = get_unique_names(names, proposed)
            silhouette_var = Orange.data.ContinuousVariable(unique)
            domain = Orange.data.Domain(
                domain.attributes,
                domain.class_vars,
                domain.metas + (silhouette_var, ))

            if np.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, np.flatnonzero(selectedmask))

            if selected is not None:
                with selected.unlocked(selected.metas):
                    selected[:, silhouette_var] = np.c_[scores[selectedmask]]

            data = self.data.transform(domain)
            with data.unlocked(data.metas):
                data[:, silhouette_var] = np.c_[scores]

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Пример #5
0
class OWDistanceMap(widget.OWWidget):
    name = "Distance Map"
    description = "Visualize a distance matrix."
    icon = "icons/DistanceMap.svg"
    priority = 1200
    keywords = []

    class Inputs:
        distances = Input("Distances", Orange.misc.DistMatrix)

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)
        features = Output("Features", widget.AttributeList, dynamic=False)

    settingsHandler = settings.PerfectDomainContextHandler()

    #: type of ordering to apply to matrix rows/columns
    NoOrdering, Clustering, OrderedClustering = 0, 1, 2

    sorting = settings.Setting(NoOrdering)

    palette_name = settings.Setting(colorpalettes.DefaultContinuousPaletteName)
    color_gamma = settings.Setting(0.0)
    color_low = settings.Setting(0.0)
    color_high = settings.Setting(1.0)

    annotation_idx = settings.ContextSetting(0)
    pending_selection = settings.Setting(None, schema_only=True)

    autocommit = settings.Setting(True)

    graph_name = "grid_widget"

    # Disable clustering for inputs bigger than this
    _MaxClustering = 25000
    # Disable cluster leaf ordering for inputs bigger than this
    _MaxOrderedClustering = 2000

    def __init__(self):
        super().__init__()

        self.matrix = None
        self._tree = None
        self._ordered_tree = None
        self._sorted_matrix = None
        self._sort_indices = None
        self._selection = None

        self.sorting_cb = gui.comboBox(
            self.controlArea, self, "sorting", box="Element Sorting",
            items=["None", "Clustering", "Clustering with ordered leaves"],
            callback=self._invalidate_ordering)

        box = gui.vBox(self.controlArea, "Colors")
        self.color_box = gui.palette_combo_box(self.palette_name)
        self.color_box.currentIndexChanged.connect(self._update_color)
        box.layout().addWidget(self.color_box)

        form = QFormLayout(
            formAlignment=Qt.AlignLeft,
            labelAlignment=Qt.AlignLeft,
            fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow
        )
#         form.addRow(
#             "Gamma",
#             gui.hSlider(box, self, "color_gamma", minValue=0.0, maxValue=1.0,
#                         step=0.05, ticks=True, intOnly=False,
#                         createLabel=False, callback=self._update_color)
#         )
        form.addRow(
            "Low:",
            gui.hSlider(box, self, "color_low", minValue=0.0, maxValue=1.0,
                        step=0.05, ticks=True, intOnly=False,
                        createLabel=False, callback=self._update_color)
        )
        form.addRow(
            "High:",
            gui.hSlider(box, self, "color_high", minValue=0.0, maxValue=1.0,
                        step=0.05, ticks=True, intOnly=False,
                        createLabel=False, callback=self._update_color)
        )
        box.layout().addLayout(form)

        self.annot_combo = gui.comboBox(
            self.controlArea, self, "annotation_idx", box="Annotations",
            callback=self._invalidate_annotations, contentsLength=12)
        self.annot_combo.setModel(itemmodels.VariableListModel())
        self.annot_combo.model()[:] = ["None", "Enumeration"]
        self.controlArea.layout().addStretch()

        gui.auto_send(self.controlArea, self, "autocommit")

        self.view = pg.GraphicsView(background="w")
        self.mainArea.layout().addWidget(self.view)

        self.grid_widget = pg.GraphicsWidget()
        self.grid = QGraphicsGridLayout()
        self.grid_widget.setLayout(self.grid)

        self.viewbox = pg.ViewBox(enableMouse=False, enableMenu=False)
        self.viewbox.setAcceptedMouseButtons(Qt.NoButton)
        self.viewbox.setAcceptHoverEvents(False)
        self.grid.addItem(self.viewbox, 1, 1)

        self.left_dendrogram = DendrogramWidget(
            self.grid_widget, orientation=DendrogramWidget.Left,
            selectionMode=DendrogramWidget.NoSelection,
            hoverHighlightEnabled=False
        )
        self.left_dendrogram.setAcceptedMouseButtons(Qt.NoButton)
        self.left_dendrogram.setAcceptHoverEvents(False)

        self.top_dendrogram = DendrogramWidget(
            self.grid_widget, orientation=DendrogramWidget.Top,
            selectionMode=DendrogramWidget.NoSelection,
            hoverHighlightEnabled=False
        )
        self.top_dendrogram.setAcceptedMouseButtons(Qt.NoButton)
        self.top_dendrogram.setAcceptHoverEvents(False)

        self.grid.addItem(self.left_dendrogram, 1, 0)
        self.grid.addItem(self.top_dendrogram, 0, 1)

        self.right_labels = TextList(
            alignment=Qt.AlignLeft | Qt.AlignVCenter,
            sizePolicy=QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Expanding)
        )
        self.bottom_labels = TextList(
            orientation=Qt.Horizontal,
            alignment=Qt.AlignRight | Qt.AlignVCenter,
            sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
        )

        self.grid.addItem(self.right_labels, 1, 2)
        self.grid.addItem(self.bottom_labels, 2, 1)

        self.view.setCentralItem(self.grid_widget)

        self.left_dendrogram.hide()
        self.top_dendrogram.hide()
        self.right_labels.hide()
        self.bottom_labels.hide()

        self.matrix_item = None
        self.dendrogram = None

        self.grid_widget.scene().installEventFilter(self)

        self.settingsAboutToBePacked.connect(self.pack_settings)

    def pack_settings(self):
        if self.matrix_item is not None:
            self.pending_selection = self.matrix_item.selections()
        else:
            self.pending_selection = None

    @Inputs.distances
    def set_distances(self, matrix):
        self.closeContext()
        self.clear()
        self.error()
        if matrix is not None:
            N, _ = matrix.shape
            if N < 2:
                self.error("Empty distance matrix.")
                matrix = None

        self.matrix = matrix
        if matrix is not None:
            self.set_items(matrix.row_items, matrix.axis)
        else:
            self.set_items(None)

        if matrix is not None:
            N, _ = matrix.shape
        else:
            N = 0

        model = self.sorting_cb.model()
        item = model.item(2)

        msg = None
        if N > OWDistanceMap._MaxOrderedClustering:
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            if self.sorting == OWDistanceMap.OrderedClustering:
                self.sorting = OWDistanceMap.Clustering
                msg = "Cluster ordering was disabled due to the input " \
                      "matrix being to big"
        else:
            item.setFlags(item.flags() | Qt.ItemIsEnabled)

        item = model.item(1)
        if N > OWDistanceMap._MaxClustering:
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            if self.sorting == OWDistanceMap.Clustering:
                self.sorting = OWDistanceMap.NoOrdering
            msg = "Clustering was disabled due to the input " \
                  "matrix being to big"
        else:
            item.setFlags(item.flags() | Qt.ItemIsEnabled)

        self.information(msg)

    def set_items(self, items, axis=1):
        self.items = items
        model = self.annot_combo.model()
        if items is None:
            model[:] = ["None", "Enumeration"]
        elif not axis:
            model[:] = ["None", "Enumeration", "Attribute names"]
        elif isinstance(items, Orange.data.Table):
            annot_vars = list(filter_visible(items.domain.variables)) + list(items.domain.metas)
            model[:] = ["None", "Enumeration"] + annot_vars
            self.annotation_idx = 0
            self.openContext(items.domain)
        elif isinstance(items, list) and \
                all(isinstance(item, Orange.data.Variable) for item in items):
            model[:] = ["None", "Enumeration", "Name"]
        else:
            model[:] = ["None", "Enumeration"]
        self.annotation_idx = min(self.annotation_idx, len(model) - 1)

    def clear(self):
        self.matrix = None
        self.cluster = None
        self._tree = None
        self._ordered_tree = None
        self._sorted_matrix = None
        self._selection = []
        self._clear_plot()

    def handleNewSignals(self):
        if self.matrix is not None:
            self._update_ordering()
            self._setup_scene()
            self._update_labels()
            if self.pending_selection is not None:
                self.matrix_item.set_selections(self.pending_selection)
                self.pending_selection = None
        self.unconditional_commit()

    def _clear_plot(self):
        def remove(item):
            item.setParentItem(None)
            item.scene().removeItem(item)

        if self.matrix_item is not None:
            self.matrix_item.selectionChanged.disconnect(
                self._invalidate_selection)
            remove(self.matrix_item)
            self.matrix_item = None

        self._set_displayed_dendrogram(None)
        self._set_labels(None)

    def _cluster_tree(self):
        if self._tree is None:
            self._tree = hierarchical.dist_matrix_clustering(self.matrix)
        return self._tree

    def _ordered_cluster_tree(self):
        if self._ordered_tree is None:
            tree = self._cluster_tree()
            self._ordered_tree = \
                hierarchical.optimal_leaf_ordering(tree, self.matrix)
        return self._ordered_tree

    def _setup_scene(self):
        self._clear_plot()
        self.matrix_item = DistanceMapItem(self._sorted_matrix)
        # Scale the y axis to compensate for pg.ViewBox's y axis invert
        self.matrix_item.setTransform(QTransform.fromScale(1, -1), )
        self.viewbox.addItem(self.matrix_item)
        # Set fixed view box range.
        h, w = self._sorted_matrix.shape
        self.viewbox.setRange(QRectF(0, -h, w, h), padding=0)

        self.matrix_item.selectionChanged.connect(self._invalidate_selection)

        if self.sorting == OWDistanceMap.NoOrdering:
            tree = None
        elif self.sorting == OWDistanceMap.Clustering:
            tree = self._cluster_tree()
        elif self.sorting == OWDistanceMap.OrderedClustering:
            tree = self._ordered_cluster_tree()

        self._set_displayed_dendrogram(tree)

        self._update_color()

    def _set_displayed_dendrogram(self, root):
        self.left_dendrogram.set_root(root)
        self.top_dendrogram.set_root(root)
        self.left_dendrogram.setVisible(root is not None)
        self.top_dendrogram.setVisible(root is not None)

        constraint = 0 if root is None else -1  # 150
        self.left_dendrogram.setMaximumWidth(constraint)
        self.top_dendrogram.setMaximumHeight(constraint)

    def _invalidate_ordering(self):
        self._sorted_matrix = None
        if self.matrix is not None:
            self._update_ordering()
            self._setup_scene()
            self._update_labels()
            self._invalidate_selection()

    def _update_ordering(self):
        if self.sorting == OWDistanceMap.NoOrdering:
            self._sorted_matrix = self.matrix
            self._sort_indices = None
        else:
            if self.sorting == OWDistanceMap.Clustering:
                tree = self._cluster_tree()
            elif self.sorting == OWDistanceMap.OrderedClustering:
                tree = self._ordered_cluster_tree()

            leaves = hierarchical.leaves(tree)
            indices = numpy.array([leaf.value.index for leaf in leaves])
            X = self.matrix
            self._sorted_matrix = X[indices[:, numpy.newaxis],
                                    indices[numpy.newaxis, :]]
            self._sort_indices = indices

    def _invalidate_annotations(self):
        if self.matrix is not None:
            self._update_labels()

    def _update_labels(self, ):
        if self.annotation_idx == 0:  # None
            labels = None
        elif self.annotation_idx == 1:  # Enumeration
            labels = [str(i + 1) for i in range(self.matrix.shape[0])]
        elif self.annot_combo.model()[self.annotation_idx] == "Attribute names":
            attr = self.matrix.row_items.domain.attributes
            labels = [str(attr[i]) for i in range(self.matrix.shape[0])]
        elif self.annotation_idx == 2 and \
                isinstance(self.items, widget.AttributeList):
            labels = [v.name for v in self.items]
        elif isinstance(self.items, Orange.data.Table):
            var = self.annot_combo.model()[self.annotation_idx]
            column, _ = self.items.get_column_view(var)
            labels = [var.str_val(value) for value in column]

        self._set_labels(labels)

    def _set_labels(self, labels):
        self._labels = labels

        if labels and self.sorting != OWDistanceMap.NoOrdering:
            sortind = self._sort_indices
            labels = [labels[i] for i in sortind]

        for textlist in [self.right_labels, self.bottom_labels]:
            textlist.setItems(labels or [])
            textlist.setVisible(bool(labels))

        constraint = -1 if labels else 0
        self.right_labels.setMaximumWidth(constraint)
        self.bottom_labels.setMaximumHeight(constraint)

    def _update_color(self):
        palette = self.color_box.currentData()
        self.palette_name = palette.name
        if self.matrix_item:
            colors = palette.lookup_table(self.color_low, self.color_high)
            self.matrix_item.setLookupTable(colors)

    def _invalidate_selection(self):
        ranges = self.matrix_item.selections()
        ranges = reduce(iadd, ranges, [])
        indices = reduce(iadd, ranges, [])
        if self.sorting != OWDistanceMap.NoOrdering:
            sortind = self._sort_indices
            indices = [sortind[i] for i in indices]
        self._selection = list(sorted(set(indices)))
        self.commit()

    def commit(self):
        datasubset = None
        featuresubset = None

        if not self._selection:
            pass
        elif isinstance(self.items, Orange.data.Table):
            indices = self._selection
            if self.matrix.axis == 1:
                datasubset = self.items.from_table_rows(self.items, indices)
            elif self.matrix.axis == 0:
                domain = Orange.data.Domain(
                    [self.items.domain[i] for i in indices],
                    self.items.domain.class_vars,
                    self.items.domain.metas)
                datasubset = self.items.transform(domain)
        elif isinstance(self.items, widget.AttributeList):
            subset = [self.items[i] for i in self._selection]
            featuresubset = widget.AttributeList(subset)

        self.Outputs.selected_data.send(datasubset)
        self.Outputs.annotated_data.send(create_annotated_table(self.items, self._selection))
        self.Outputs.features.send(featuresubset)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.clear()

    def send_report(self):
        annot = self.annot_combo.currentText()
        if self.annotation_idx <= 1:
            annot = annot.lower()
        self.report_items((
            ("Sorting", self.sorting_cb.currentText().lower()),
            ("Annotations", annot)
        ))
        if self.matrix is not None:
            self.report_plot()
Пример #6
0
class OWLinePlot(OWWidget):
    name = "Line Plot"
    description = "Visualization of data profiles (e.g., time series)."
    icon = "icons/LinePlot.svg"
    priority = 1030

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = settings.PerfectDomainContextHandler()

    group_var = settings.Setting("")  #: Group by group_var's values
    selected_classes = settings.Setting([])  #: List of selected class indices
    display_index = settings.Setting(LinePlotDisplay.INSTANCES)
    display_quartiles = settings.Setting(False)
    auto_commit = settings.Setting(True)
    selection = settings.ContextSetting([])

    class Information(OWWidget.Information):
        not_enough_attrs = Msg("Need at least one continuous feature.")

    def __init__(self, parent=None):
        super().__init__(parent)

        self.classes = []

        self.data = None
        self.data_subset = None
        self.subset_selection = []
        self.group_variables = []
        self.graph_variables = []
        self.__groups = None

        # Setup GUI
        infobox = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(infobox, "No data on input.")
        displaybox = gui.widgetBox(self.controlArea, "Display")
        radiobox = gui.radioButtons(displaybox,
                                    self,
                                    "display_index",
                                    callback=self.__update_visibility)
        gui.appendRadioButton(radiobox, "Line plot")
        gui.appendRadioButton(radiobox, "Mean")
        gui.appendRadioButton(radiobox, "Line plot with mean")

        showbox = gui.widgetBox(self.controlArea, "Show")
        gui.checkBox(showbox,
                     self,
                     "display_quartiles",
                     "Error bars",
                     callback=self.__update_visibility)

        group_box = gui.widgetBox(self.controlArea, "Group by")
        self.cb_attr = gui.comboBox(group_box,
                                    self,
                                    "group_var",
                                    sendSelectedValue=True,
                                    callback=self.update_group_var)
        self.group_listbox = gui.listBox(
            group_box,
            self,
            "selected_classes",
            "classes",
            selectionMode=QListWidget.MultiSelection,
            callback=self.__on_class_selection_changed)
        self.group_listbox.setVisible(False)

        self.gui = OWPlotGUI(self)
        self.box_zoom_select(self.controlArea)

        gui.rubber(self.controlArea)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        self.graph = LinePlotGraph(self)
        self.graph.getPlotItem().buttonsHidden = True
        self.graph.setRenderHint(QPainter.Antialiasing, True)
        self.mainArea.layout().addWidget(self.graph)

    def box_zoom_select(self, parent):
        g = self.gui
        box_zoom_select = gui.vBox(parent, "Zoom/Select")
        zoom_select_toolbar = g.zoom_select_toolbar(box_zoom_select,
                                                    nomargin=True,
                                                    buttons=[
                                                        g.StateButtonsBegin,
                                                        g.SimpleSelect, g.Pan,
                                                        g.Zoom,
                                                        g.StateButtonsEnd,
                                                        g.ZoomReset
                                                    ])
        buttons = zoom_select_toolbar.buttons
        buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked)
        buttons[g.Pan].clicked.connect(self.pan_button_clicked)
        buttons[g.Zoom].clicked.connect(self.zoom_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked)
        return box_zoom_select

    def select_button_clicked(self):
        self.graph.state = SELECT
        self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode)

    def pan_button_clicked(self):
        self.graph.state = PANNING
        self.graph.getViewBox().setMouseMode(self.graph.getViewBox().PanMode)

    def zoom_button_clicked(self):
        self.graph.state = ZOOMING
        self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode)

    def reset_button_clicked(self):
        self.graph.getViewBox().autoRange()

    def selection_changed(self):
        self.selection = list(self.graph.selection)
        self.commit()

    def sizeHint(self):
        return QSize(800, 600)

    def clear(self):
        """
        Clear/reset the widget state.
        """
        self.cb_attr.clear()
        self.group_listbox.clear()
        self.data = None
        self.__groups = None
        self.graph.reset()
        self.infoLabel.setText("No data on input.")

    @Inputs.data
    def set_data(self, data):
        """
        Set the input profile dataset.
        """
        self.closeContext()
        self.clear()
        self.clear_messages()

        self.data = data
        if data is not None:
            n_instances = len(data)
            n_attrs = len(data.domain.attributes)
            self.infoLabel.setText("%i instances on input\n%i attributes" %
                                   (n_instances, n_attrs))

            self.graph_variables = [
                var for var in data.domain.attributes if var.is_continuous
            ]
            if len(self.graph_variables) < 1:
                self.Information.not_enough_attrs()
            else:
                groupvars = [
                    var for var in data.domain.variables + data.domain.metas
                    if var.is_discrete
                ]

                if len(groupvars) > 0:
                    self.cb_attr.addItems([str(var) for var in groupvars])
                    self.group_var = str(groupvars[0])
                    self.group_variables = groupvars
                    self.update_group_var()
                else:
                    self._setup_plot()

        self.selection = []
        self.openContext(data)
        self.select_data_instances()
        self.commit()

    @Inputs.data_subset
    def set_data_subset(self, subset):
        """
        Set the supplementary input subset dataset.
        """
        self.data_subset = subset

    def handleNewSignals(self):
        if len(self.subset_selection) and self.data is not None:
            self.graph.deselect_subset(self.subset_selection)
        self.subset_selection = []
        if self.data is not None and self.data_subset is not None:
            intersection = set(self.data.ids).intersection(
                set(self.data_subset.ids))
            self.subset_selection = intersection
            self.graph.select_subset(self.subset_selection)

    def select_data_instances(self):
        if self.data is None or not len(self.data) or not len(self.selection):
            return
        if max(self.selection) >= len(self.data):
            self.selection = []
        self.graph.select(self.selection)

    def _plot_curve(self, X, color, data, indices):
        dark_pen = QPen(color.darker(110), 4)
        dark_pen.setCosmetic(True)

        color.setAlpha(120)
        light_pen = QPen(color, 1)
        light_pen.setCosmetic(True)
        items = []
        for index, instance in zip(indices, data):
            item = LinePlotItem(index, instance.id, X, instance.x, light_pen)
            item.sigClicked.connect(self.graph.select_by_click)
            items.append(item)
            self.graph.add_line_plot_item(item)

        mean = np.nanmean(data.X, axis=0)
        meancurve = pg.PlotDataItem(x=X,
                                    y=mean,
                                    pen=dark_pen,
                                    symbol="o",
                                    pxMode=True,
                                    symbolSize=5,
                                    antialias=True)
        self.graph.addItem(meancurve)

        q1, q2, q3 = np.nanpercentile(data.X, [25, 50, 75], axis=0)
        errorbar = pg.ErrorBarItem(x=X,
                                   y=mean,
                                   bottom=np.clip(mean - q1, 0, mean - q1),
                                   top=np.clip(q3 - mean, 0, q3 - mean),
                                   beam=0.01)
        self.graph.addItem(errorbar)
        return items, mean, meancurve, errorbar

    def _setup_plot(self):
        """Setup the plot with new curve data."""
        assert self.data is not None
        self.graph.reset()

        data, domain = self.data, self.data.domain
        self.graph.getAxis('bottom').setTicks([[
            (i + 1, str(a)) for i, a in enumerate(self.graph_variables)
        ]])

        X = np.arange(1, len(self.graph_variables) + 1)
        groups = []

        if not self.selected_classes:
            group_data = data[:, self.graph_variables]
            items, mean, meancurve, errorbar = self._plot_curve(
                X, QColor(Qt.darkGray), group_data,
                list(range(len(self.data))))
            groups.append(
                namespace(data=group_data,
                          profiles=items,
                          mean=meancurve,
                          boxplot=errorbar))
        else:
            var = domain[self.group_var]
            class_col_data, _ = data.get_column_view(var)
            group_indices = [
                np.flatnonzero(class_col_data == i)
                for i in range(len(self.classes))
            ]

            for i, indices in enumerate(group_indices):
                if len(indices) == 0:
                    groups.append(None)
                else:
                    if self.classes:
                        color = self.class_colors[i]
                    else:
                        color = QColor(Qt.darkGray)

                    group_data = data[indices, self.graph_variables]
                    items, mean, meancurve, errorbar = self._plot_curve(
                        X, color, group_data, indices)

                    groups.append(
                        namespace(data=group_data,
                                  indices=indices,
                                  profiles=items,
                                  mean=meancurve,
                                  boxplot=errorbar))

        self.__groups = groups
        self.__update_visibility()

    def __update_visibility(self):
        if self.__groups is None:
            return
        for i, group in enumerate(self.__groups):
            if group is not None:
                if self.display_index in (LinePlotDisplay.INSTANCES,
                                          LinePlotDisplay.INSTANCES_WITH_MEAN):
                    self.graph.add_items()
                for item in group.profiles:
                    item.setVisible(self.display_index in (
                        LinePlotDisplay.INSTANCES,
                        LinePlotDisplay.INSTANCES_WITH_MEAN))
                group.mean.setVisible(self.display_index in (
                    LinePlotDisplay.MEAN, LinePlotDisplay.INSTANCES_WITH_MEAN))
                group.boxplot.setVisible(self.display_quartiles)

    def __on_class_selection_changed(self):
        self.__update_visibility()
        self.graph.deselect_all()

    def update_group_var(self):
        data_attr, _ = self.data.get_column_view(self.group_var)
        class_vals = self.data.domain[self.group_var].values
        self.classes = list(class_vals)
        self.class_colors = \
            colorpalette.ColorPaletteGenerator(len(class_vals))
        self.selected_classes = list(range(len(class_vals)))
        for i in range(len(class_vals)):
            item = self.group_listbox.item(i)
            item.setIcon(colorpalette.ColorPixmap(self.class_colors[i]))

        self._setup_plot()
        self.__on_class_selection_changed()

    def commit(self):
        selected = self.data[self.selection] \
            if self.data is not None and len(self.selection) > 0 else None
        annotated = create_annotated_table(self.data, self.selection)
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
Пример #7
0
class OWColor(widget.OWWidget):
    name = "Color"
    description = "Set color legend for variables."
    icon = "icons/Colors.svg"

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    settingsHandler = settings.PerfectDomainContextHandler(
        match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL)
    disc_descs = settings.ContextSetting([])
    cont_descs = settings.ContextSetting([])
    selected_schema_index = settings.Setting(0)
    auto_apply = settings.Setting(True)

    settings_version = 2

    want_main_area = False

    def __init__(self):
        super().__init__()
        self.data = None
        self.orig_domain = self.domain = None

        box = gui.hBox(self.controlArea, "Discrete Variables")
        self.disc_model = DiscColorTableModel()
        self.disc_view = DiscreteTable(self.disc_model)
        self.disc_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(self.disc_view)

        box = gui.hBox(self.controlArea, "Numeric Variables")
        self.cont_model = ContColorTableModel()
        self.cont_view = ContinuousTable(self.cont_model)
        self.cont_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(self.cont_view)

        box = gui.auto_apply(self.controlArea, self, "auto_apply")
        box.button.setFixedWidth(180)
        save = gui.button(None, self, "Save", callback=self.save)
        load = gui.button(None, self, "Load", callback=self.load)
        reset = gui.button(None, self, "Reset", callback=self.reset)
        box.layout().insertWidget(0, save)
        box.layout().insertWidget(0, load)
        box.layout().insertWidget(2, reset)
        box.layout().insertStretch(3)

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

    @staticmethod
    def sizeHint():  # pragma: no cover
        return QSize(500, 570)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.disc_descs = []
        self.cont_descs = []
        if data is None:
            self.data = self.domain = None
            self.info.set_input_summary(self.info.NoInput)
        else:
            self.data = data
            self.info.set_input_summary(len(data), format_summary_details(data))
            for var in chain(data.domain.variables, data.domain.metas):
                if var.is_discrete:
                    self.disc_descs.append(DiscAttrDesc(var))
                elif var.is_continuous:
                    self.cont_descs.append(ContAttrDesc(var))

        self.disc_model.set_data(self.disc_descs)
        self.cont_model.set_data(self.cont_descs)
        self.openContext(data)
        self.disc_view.resizeColumnsToContents()
        self.cont_view.resizeColumnsToContents()
        self.unconditional_commit()

    def _on_data_changed(self):
        self.commit()

    def reset(self):
        self.disc_model.reset()
        self.cont_model.reset()
        self.commit()

    def save(self):
        fname, _ = QFileDialog.getSaveFileName(
            self, "File name", self._start_dir(),
            "Variable definitions (*.colors)")
        if not fname:
            return
        QSettings().setValue("colorwidget/last-location",
                             os.path.split(fname)[0])
        self._save_var_defs(fname)

    def _save_var_defs(self, fname):
        with open(fname, "w") as f:
            json.dump(
                {vartype: {
                    var.name: var_data
                    for var, var_data in (
                        (desc.var, desc.to_dict()) for desc in repo)
                    if var_data}
                 for vartype, repo in (("categorical", self.disc_descs),
                                       ("numeric", self.cont_descs))
                },
                f,
                indent=4)

    def load(self):
        fname, _ = QFileDialog.getOpenFileName(
            self, "File name", self._start_dir(),
            "Variable definitions (*.colors)")
        if not fname:
            return

        try:
            f = open(fname)
        except IOError:
            QMessageBox.critical(self, "File error", "File cannot be opened.")
            return

        try:
            js = json.load(f)  #: dict
            self._parse_var_defs(js)
        except (json.JSONDecodeError, InvalidFileFormat):
            QMessageBox.critical(self, "File error", "Invalid file format.")

    def _parse_var_defs(self, js):
        if not isinstance(js, dict) or set(js) != {"categorical", "numeric"}:
            raise InvalidFileFormat
        try:
            renames = {
                var_name: desc["rename"]
                for repo in js.values() for var_name, desc in repo.items()
                if "rename" in desc
            }
        # js is an object coming from json file that can be manipulated by
        # the user, so there are too many things that can go wrong.
        # Catch all exceptions, therefore.
        except Exception as exc:
            raise InvalidFileFormat from exc
        if not all(isinstance(val, str)
                   for val in chain(renames, renames.values())):
            raise InvalidFileFormat
        renamed_vars = {
            renames.get(desc.var.name, desc.var.name)
            for desc in chain(self.disc_descs, self.cont_descs)
        }
        if len(renamed_vars) != len(self.disc_descs) + len(self.cont_descs):
            QMessageBox.warning(
                self,
                "Duplicated variable names",
                "Variables will not be renamed due to duplicated names.")
            for repo in js.values():
                for desc in repo.values():
                    desc.pop("rename", None)

        # First, construct all descriptions; assign later, after we know
        # there won't be exceptions due to invalid file format
        both_descs = []
        warnings = []
        for old_desc, repo, desc_type in (
                (self.disc_descs, "categorical", DiscAttrDesc),
                (self.cont_descs, "numeric", ContAttrDesc)):
            var_by_name = {desc.var.name: desc.var for desc in old_desc}
            new_descs = {}
            for var_name, var_data in js[repo].items():
                var = var_by_name.get(var_name)
                if var is None:
                    continue
                # This can throw InvalidFileFormat
                new_descs[var_name], warn = desc_type.from_dict(var, var_data)
                warnings += warn
            both_descs.append(new_descs)

        self.disc_descs = [both_descs[0].get(desc.var.name, desc)
                           for desc in self.disc_descs]
        self.cont_descs = [both_descs[1].get(desc.var.name, desc)
                           for desc in self.cont_descs]
        if warnings:
            QMessageBox.warning(
                self, "Invalid definitions", "\n".join(warnings))

        self.disc_model.set_data(self.disc_descs)
        self.cont_model.set_data(self.cont_descs)
        self.unconditional_commit()

    def _start_dir(self):
        return self.workflowEnv().get("basedir") \
               or QSettings().value("colorwidget/last-location") \
               or os.path.expanduser(f"~{os.sep}")

    def commit(self):
        def make(variables):
            new_vars = []
            for var in variables:
                source = disc_dict if var.is_discrete else cont_dict
                desc = source.get(var.name)
                new_vars.append(desc.create_variable() if desc else var)
            return new_vars

        if self.data is None:
            self.Outputs.data.send(None)
            self.info.set_output_summary(self.info.NoOutput)
            return

        disc_dict = {desc.var.name: desc for desc in self.disc_descs}
        cont_dict = {desc.var.name: desc for desc in self.cont_descs}

        dom = self.data.domain
        new_domain = Orange.data.Domain(
            make(dom.attributes), make(dom.class_vars), make(dom.metas))
        new_data = self.data.transform(new_domain)
        self.info.set_output_summary(len(new_data),
                                     format_summary_details(new_data))
        self.Outputs.data.send(new_data)

    def send_report(self):
        """Send report"""
        def _report_variables(variables):
            def was(n, o):
                return n if n == o else f"{n} (was: {o})"

            max_values = max(
                (len(var.values) for var in variables if var.is_discrete),
                default=1)

            rows = ""
            disc_dict = {k.var.name: k for k in self.disc_descs}
            cont_dict = {k.var.name: k for k in self.cont_descs}
            for var in variables:
                if var.is_discrete:
                    desc = disc_dict[var.name]
                    value_cols = "    \n".join(
                        f"<td>{square(*color)} {was(value, old_value)}</td>"
                        for color, value, old_value in
                        zip(desc.colors, desc.values, var.values))
                elif var.is_continuous:
                    desc = cont_dict[var.name]
                    pal = colorpalettes.ContinuousPalettes[desc.palette_name]
                    value_cols = f'<td colspan="{max_values}">' \
                                 f'{pal.friendly_name}</td>'
                else:
                    continue
                names = was(desc.name, desc.var.name)
                rows += '<tr style="height: 2em">\n' \
                        f'  <th style="text-align: right">{names}</th>' \
                        f'  {value_cols}\n' \
                        '</tr>\n'
            return rows

        if not self.data:
            return
        dom = self.data.domain
        sections = (
            (name, _report_variables(variables))
            for name, variables in (
                ("Features", dom.attributes),
                ("Outcome" + "s" * (len(dom.class_vars) > 1), dom.class_vars),
                ("Meta attributes", dom.metas)))
        table = "".join(f"<tr><th>{name}</th></tr>{rows}"
                        for name, rows in sections if rows)
        if table:
            self.report_raw(f"<table>{table}</table>")

    @classmethod
    def migrate_context(cls, _, version):
        if not version or version < 2:
            raise IncompatibleContext
Пример #8
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default),
               (ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)]

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    add_scores = settings.Setting(False)
    auto_commit = settings.Setting(False)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan)]

    graph_name = "scene"
    buttons_area_orientation = Qt.Vertical

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")

    def __init__(self):
        super().__init__()

        self.data = None
        self._effective_data = None
        self._matrix = None
        self._silhouette = None
        self._labels = None
        self._silplot = None

        gui.comboBox(self.controlArea,
                     self,
                     "distance_idx",
                     box="Distance",
                     items=[name for name, _ in OWSilhouettePlot.Distances],
                     orientation=Qt.Horizontal,
                     callback=self._invalidate_distances)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(box,
                                           self,
                                           "cluster_var_idx",
                                           addSpace=4,
                                           callback=self._invalidate_scores)
        gui.checkBox(box,
                     self,
                     "group_by_cluster",
                     "Group by cluster",
                     callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(box,
                    self,
                    "bar_size",
                    minValue=1,
                    maxValue=10,
                    step=1,
                    callback=self._update_bar_size,
                    addSpace=6)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(box,
                                          self,
                                          "annotation_var_idx",
                                          callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.separator(self.buttonsArea)
        box = gui.vBox(self.buttonsArea, "Output")
        # Thunk the call to commit to call conditional commit
        gui.checkBox(box,
                     self,
                     "add_scores",
                     "Add silhouette scores",
                     callback=lambda: self.commit())
        gui.auto_commit(box,
                        self,
                        "auto_commit",
                        "Commit",
                        auto_label="Auto commit",
                        box=False)
        # Ensure that the controlArea is not narrower than buttonsArea
        self.controlArea.layout().addWidget(self.buttonsArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    @check_sql_input
    def set_data(self, data):
        """
        Set the input data set.
        """
        self.closeContext()
        self.clear()
        error_msg = ""
        warning_msg = ""
        candidatevars = []
        if data is not None:
            candidatevars = [
                v for v in data.domain.variables + data.domain.metas
                if v.is_discrete and len(v.values) >= 2
            ]
            if not candidatevars:
                error_msg = "Input does not have any suitable cluster labels."
                data = None

        if data is not None:
            ncont = sum(v.is_continuous for v in data.domain.attributes)
            ndiscrete = len(data.domain.attributes) - ncont
            if ncont == 0:
                data = None
                error_msg = "No continuous columns"
            elif ncont < len(data.domain.attributes):
                warning_msg = "{0} discrete columns will not be used for " \
                              "distance computation".format(ndiscrete)

        self.data = data
        if data is not None:
            self.cluster_var_model[:] = candidatevars
            if data.domain.class_var in candidatevars:
                self.cluster_var_idx = \
                    candidatevars.index(data.domain.class_var)
            else:
                self.cluster_var_idx = 0

            annotvars = [var for var in data.domain.metas if var.is_string]
            self.annotation_var_model[:] = ["None"] + annotvars
            self.annotation_var_idx = 1 if len(annotvars) else 0
            self._effective_data = Orange.distance._preprocess(data)
            self.openContext(Orange.data.Domain(candidatevars))

        self.error(error_msg)
        self.warning(warning_msg)

    def handleNewSignals(self):
        if self._effective_data is not None:
            self._update()
            self._replot()

        self.unconditional_commit()

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self._effective_data = None
        self._matrix = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit()

    def _update(self):
        # Update/recompute the distances/scores as required
        if self.data is None:
            self._silhouette = None
            self._labels = None
            self._matrix = None
            self._clear_scene()
            return

        if self._matrix is None and self._effective_data is not None:
            _, metric = self.Distances[self.distance_idx]
            self._matrix = numpy.asarray(metric(self._effective_data))

        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = labels.astype(int)
        _, counts = numpy.unique(labels, return_counts=True)
        if numpy.count_nonzero(counts) >= 2:
            self.Error.need_two_clusters.clear()
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix, labels, metric="precomputed")
        else:
            self.Error.need_two_clusters()
            labels = silhouette = None

        self._labels = labels
        self._silhouette = silhouette

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(not visible
                                           and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values)
            else:
                silplot.setScores(
                    self._silhouette,
                    numpy.zeros(len(self._silhouette), dtype=int), [""])

            self.scene.addItem(silplot)
            self._update_annotations()

            silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize))
            silplot.selectionChanged.connect(self.commit)

            self.scene.setSceneRect(
                QRectF(QPointF(0, 0),
                       self._silplot.effectiveSizeHint(Qt.PreferredSize)))

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()
            self.scene.setSceneRect(
                QRectF(QPointF(0, 0),
                       self._silplot.effectiveSizeHint(Qt.PreferredSize)))

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(self.bar_size < 5
                                           and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = numpy.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                selectedmask[indices] = True
            scores = self._silhouette
            silhouette_var = None
            if self.add_scores:
                var = self.cluster_var_model[self.cluster_var_idx]
                silhouette_var = Orange.data.ContinuousVariable(
                    "Silhouette ({})".format(escape(var.name)))
                domain = Orange.data.Domain(
                    self.data.domain.attributes, self.data.domain.class_vars,
                    self.data.domain.metas + (silhouette_var, ))
                data = self.data.from_table(domain, self.data)
            else:
                domain = self.data.domain
                data = self.data

            if numpy.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, numpy.flatnonzero(selectedmask))

            if self.add_scores:
                if selected is not None:
                    selected[:,
                             silhouette_var] = numpy.c_[scores[selectedmask]]
                data[:, silhouette_var] = numpy.c_[scores]

        self.send("Selected Data", selected)
        self.send(ANNOTATED_DATA_SIGNAL_NAME,
                  create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Пример #9
0
class OWColor(widget.OWWidget):
    name = "Color"
    description = "Set color legend for variables."
    icon = "icons/Colors.svg"

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    settingsHandler = settings.PerfectDomainContextHandler(
        match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL)
    disc_descs = settings.ContextSetting([])
    cont_descs = settings.ContextSetting([])
    color_settings = settings.Setting(None)
    selected_schema_index = settings.Setting(0)
    auto_apply = settings.Setting(True)

    settings_version = 2

    want_main_area = False

    def __init__(self):
        super().__init__()
        self.data = None
        self.orig_domain = self.domain = None

        box = gui.hBox(self.controlArea, "Discrete Variables")
        self.disc_model = DiscColorTableModel()
        self.disc_view = DiscreteTable(self.disc_model)
        self.disc_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(self.disc_view)

        box = gui.hBox(self.controlArea, "Numeric Variables")
        self.cont_model = ContColorTableModel()
        self.cont_view = ContinuousTable(self.cont_model)
        self.cont_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(self.cont_view)

        box = gui.auto_apply(self.controlArea, self, "auto_apply")
        box.button.setFixedWidth(180)
        box.layout().insertStretch(0)

    @staticmethod
    def sizeHint():  # pragma: no cover
        return QSize(500, 570)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.disc_descs = []
        self.cont_descs = []
        if data is None:
            self.data = self.domain = None
        else:
            self.data = data
            for var in chain(data.domain.variables, data.domain.metas):
                if var.is_discrete:
                    self.disc_descs.append(DiscAttrDesc(var))
                elif var.is_continuous:
                    self.cont_descs.append(ContAttrDesc(var))

        self.disc_model.set_data(self.disc_descs)
        self.cont_model.set_data(self.cont_descs)
        self.openContext(data)
        self.disc_view.resizeColumnsToContents()
        self.cont_view.resizeColumnsToContents()
        self.unconditional_commit()

    def _on_data_changed(self):
        self.commit()

    def commit(self):
        def make(variables):
            new_vars = []
            for var in variables:
                source = disc_dict if var.is_discrete else cont_dict
                desc = source.get(var.name)
                new_vars.append(desc.create_variable() if desc else var)
            return new_vars

        if self.data is None:
            self.Outputs.data.send(None)
            return

        disc_dict = {desc.var.name: desc for desc in self.disc_descs}
        cont_dict = {desc.var.name: desc for desc in self.cont_descs}

        dom = self.data.domain
        new_domain = Orange.data.Domain(make(dom.attributes),
                                        make(dom.class_vars), make(dom.metas))
        new_data = self.data.transform(new_domain)
        self.Outputs.data.send(new_data)

    def send_report(self):
        """Send report"""
        def _report_variables(variables):
            from Orange.widgets.report import colored_square as square

            def was(n, o):
                return n if n == o else f"{n} (was: {o})"

            max_values = max(
                (len(var.values) for var in variables if var.is_discrete),
                default=1)

            rows = ""
            disc_dict = {k.var.name: k for k in self.disc_descs}
            cont_dict = {k.var.name: k for k in self.cont_descs}
            for var in variables:
                if var.is_discrete:
                    desc = disc_dict[var.name]
                    value_cols = "    \n".join(
                        f"<td>{square(*color)} {was(value, old_value)}</td>"
                        for color, value, old_value in zip(
                            desc.colors, desc.values, var.values))
                elif var.is_continuous:
                    desc = cont_dict[var.name]
                    pal = colorpalettes.ContinuousPalettes[desc.palette_name]
                    value_cols = f'<td colspan="{max_values}">' \
                                 f'{pal.friendly_name}</td>'
                else:
                    continue
                names = was(desc.name, desc.var.name)
                rows += '<tr style="height: 2em">\n' \
                        f'  <th style="text-align: right">{names}</th>' \
                        f'  {value_cols}\n' \
                        '</tr>\n'
            return rows

        if not self.data:
            return
        dom = self.data.domain
        sections = ((name, _report_variables(variables))
                    for name, variables in (("Features", dom.attributes),
                                            ("Outcome" + "s" *
                                             (len(dom.class_vars) > 1),
                                             dom.class_vars),
                                            ("Meta attributes", dom.metas)))
        table = "".join(f"<tr><th>{name}</th></tr>{rows}"
                        for name, rows in sections if rows)
        if table:
            self.report_raw(r"<table>{table}</table>")

    @classmethod
    def migrate_context(cls, context, version):
        if not version or version < 2:
            raise IncompatibleContext
Пример #10
0
class OWReshape(widget.OWWidget):
    name = "To Shopping List"
    description = "Reshape from a 'wide' records table to 'long' format."
    icon = "icons/ToShoppingList.svg"

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Data", Orange.data.Table)]

    want_main_area = False
    resizing_enabled = False

    settingsHandler = settings.PerfectDomainContextHandler(metas_in_res=True)

    idvar = settings.ContextSetting(0)  # type: Orange.data.Variable

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.data = None  # type: Orange.data.Table

        self.idvar_model = itemmodels.VariableListModel(parent=self)
        self.item_var_name = "Item"
        self.value_var_name = "Rating"

        box = gui.widgetBox(self.controlArea, "Info")
        self.info_text = gui.widgetLabel(box, "No data")

        box = gui.widgetBox(self.controlArea, "Id var")
        self.var_cb = gui.comboBox(box, self, "idvar",
                                   callback=self._invalidate)
        self.var_cb.setMinimumContentsLength(16)
        self.var_cb.setModel(self.idvar_model)
        gui.lineEdit(self.controlArea, self, "item_var_name",
                     box="Item name", callback=self._invalidate)
        gui.lineEdit(self.controlArea, self, "value_var_name",
                     box="Value name", callback=self._invalidate)

    def sizeHint(self):
        return QSize(300, 50)

    def clear(self):
        self.data = None
        self.idvar_model[:] = []
        self.error("")

    def set_data(self, data):
        self.closeContext()
        self.clear()

        idvars = []
        if data is not None:
            domain = data.domain
            idvars = [var for var in domain.metas + domain.variables
                      if isinstance(var, (Orange.data.DiscreteVariable,
                                          Orange.data.StringVariable))]
            if not idvars:
                self.error("No suitable id columns.")
                data = None

        self.data = data

        if self.data is not None:
            self.idvar_model[:] = idvars
            self.openContext(data)
            self.info_text.setText("Data with {} instances".format(len(data)))
        else:
            self.info_text.setText("No data")
        self.commit()

    def _invalidate(self):
        self.commit()

    def commit(self):
        if self.data is None:
            self.send("Data", None)
            return

        self.error("")
        data, domain = self.data, self.data.domain
        idvar = self.idvar_model[self.idvar]

        itemvars = [var for var in domain.attributes if var is not idvar]
        item_names = [v.name for v in itemvars]
        if len(set(item_names)) != len(itemvars):
            self.error("Duplicate column names")
            self.send("Data", None)
            return

        item_var = Orange.data.DiscreteVariable(self.item_var_name,
                                                values=item_names)
        value_var = Orange.data.ContinuousVariable(self.value_var_name)
        try:
            outdata = reshape_long(data, idvar, item_var, value_var)

        except ValueError as err:
            self.error(str(err))
            outdata = None

        self.send("Data", outdata)
Пример #11
0
class OWLinePlot(OWWidget):
    name = "Line Plot"
    description = "Visualization of data profiles (e.g., time series)."
    icon = "icons/LinePlot.svg"
    priority = 1030

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = settings.PerfectDomainContextHandler()

    group_var = settings.ContextSetting(None)
    display_index = settings.Setting(LinePlotDisplay.INSTANCES)
    display_quartiles = settings.Setting(False)
    auto_commit = settings.Setting(True)
    selection = settings.ContextSetting([])

    class Information(OWWidget.Information):
        not_enough_attrs = Msg("Need at least one continuous feature.")

    def __init__(self, parent=None):
        super().__init__(parent)
        self.__groups = None
        self.__profiles = None
        self.data = None
        self.data_subset = None
        self.subset_selection = []
        self.graph_variables = []

        # Setup GUI
        infobox = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(infobox, "No data on input.")
        displaybox = gui.widgetBox(self.controlArea, "Display")
        radiobox = gui.radioButtons(displaybox, self, "display_index",
                                    callback=self.__update_visibility)
        gui.appendRadioButton(radiobox, "Line plot")
        gui.appendRadioButton(radiobox, "Mean")
        gui.appendRadioButton(radiobox, "Line plot with mean")

        showbox = gui.widgetBox(self.controlArea, "Show")
        gui.checkBox(showbox, self, "display_quartiles", "Error bars",
                     callback=self.__update_visibility)

        self.group_vars = DomainModel(
            placeholder="None", separators=False,
            valid_types=DiscreteVariable)
        self.group_view = gui.listView(
            self.controlArea, self, "group_var", box="Group by",
            model=self.group_vars, callback=self.__group_var_changed)
        self.group_view.setEnabled(False)
        self.group_view.setMinimumSize(QSize(30, 100))
        self.group_view.setSizePolicy(QSizePolicy.Expanding,
                                      QSizePolicy.Ignored)

        self.gui = OWPlotGUI(self)
        self.box_zoom_select(self.controlArea)

        gui.rubber(self.controlArea)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        self.graph = LinePlotGraph(self)
        self.graph.getPlotItem().buttonsHidden = True
        self.graph.setRenderHint(QPainter.Antialiasing, True)
        self.mainArea.layout().addWidget(self.graph)

    def box_zoom_select(self, parent):
        g = self.gui
        box_zoom_select = gui.vBox(parent, "Zoom/Select")
        zoom_select_toolbar = g.zoom_select_toolbar(
            box_zoom_select, nomargin=True,
            buttons=[g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom,
                     g.StateButtonsEnd, g.ZoomReset]
        )
        buttons = zoom_select_toolbar.buttons
        buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked)
        buttons[g.Pan].clicked.connect(self.pan_button_clicked)
        buttons[g.Zoom].clicked.connect(self.zoom_button_clicked)
        buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked)
        return box_zoom_select

    def select_button_clicked(self):
        self.graph.state = SELECT
        self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode)

    def pan_button_clicked(self):
        self.graph.state = PANNING
        self.graph.getViewBox().setMouseMode(self.graph.getViewBox().PanMode)

    def zoom_button_clicked(self):
        self.graph.state = ZOOMING
        self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode)

    def reset_button_clicked(self):
        self.graph.getViewBox().autoRange()

    def selection_changed(self):
        self.selection = list(self.graph.selection)
        self.commit()

    def sizeHint(self):
        return QSize(800, 600)

    def clear(self):
        """
        Clear/reset the widget state.
        """
        self.__groups = None
        self.__profiles = None
        self.graph_variables = []
        self.graph.reset()
        self.infoLabel.setText("No data on input.")
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)

    @Inputs.data
    def set_data(self, data):
        """
        Set the input profile dataset.
        """
        self.closeContext()
        self.clear()
        self.clear_messages()

        self.data = data
        self.selection = []
        if data is not None:
            self.group_vars.set_domain(data.domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.group_var = data.domain.class_var if \
                data.domain.class_var and data.domain.class_var.is_discrete \
                else None

            self.infoLabel.setText("%i instances on input\n%i attributes" % (
                len(data), len(data.domain.attributes)))
            self.graph_variables = [var for var in data.domain.attributes
                                    if var.is_continuous]
            if len(self.graph_variables) < 1:
                self.Information.not_enough_attrs()
                self.commit()
                return

        self.openContext(data)
        self._setup_plot()
        self.commit()

    @Inputs.data_subset
    def set_data_subset(self, subset):
        """
        Set the supplementary input subset dataset.
        """
        self.data_subset = subset
        if len(self.subset_selection):
            self.graph.deselect_subset()

    def handleNewSignals(self):
        self.subset_selection = []
        if self.data is not None and self.data_subset is not None and \
                len(self.graph_variables):
            intersection = set(self.data.ids).intersection(
                set(self.data_subset.ids))
            self.subset_selection = intersection
            if self.__profiles is not None:
                self.graph.select_subset(self.subset_selection)

    def _setup_plot(self):
        """Setup the plot with new curve data."""
        if self.data is None:
            return
        self.graph.reset()
        ticks = [[(i + 1, str(a)) for i, a in enumerate(self.graph_variables)]]
        self.graph.getAxis('bottom').setTicks(ticks)
        if self.display_index in (LinePlotDisplay.INSTANCES,
                                  LinePlotDisplay.INSTANCES_WITH_MEAN):
            self._plot_profiles()
        self._plot_groups()
        self.__update_visibility()

    def _plot_profiles(self):
        X = np.arange(1, len(self.graph_variables) + 1)
        data = self.data[:, self.graph_variables]
        self.__profiles = []
        for index, inst in zip(range(len(self.data)), data):
            color = self.__get_line_color(index)
            profile = LinePlotItem(index, inst.id, X, inst.x, color)
            profile.sigClicked.connect(self.graph.select_by_click)
            self.graph.add_line_plot_item(profile)
            self.__profiles.append(profile)
        self.graph.finished_adding()
        self.__select_data_instances()

    def _plot_groups(self):
        if self.__groups is not None:
            for group in self.__groups:
                if group is not None:
                    self.graph.getViewBox().removeItem(group.mean)
                    self.graph.getViewBox().removeItem(group.error_bar)

        self.__groups = []
        X = np.arange(1, len(self.graph_variables) + 1)
        if self.group_var is None:
            self.__plot_mean_with_error(X, self.data[:, self.graph_variables])
        else:
            class_col_data, _ = self.data.get_column_view(self.group_var)
            group_indices = [np.flatnonzero(class_col_data == i)
                             for i in range(len(self.group_var.values))]
            for index, indices in enumerate(group_indices):
                if len(indices) == 0:
                    self.__groups.append(None)
                else:
                    group_data = self.data[indices, self.graph_variables]
                    self.__plot_mean_with_error(X, group_data, index)

    def __plot_mean_with_error(self, X, data, index=None):
        pen = QPen(self.__get_line_color(None, index), 4)
        pen.setCosmetic(True)
        mean = np.nanmean(data.X, axis=0)
        mean_curve = pg.PlotDataItem(x=X, y=mean, pen=pen, symbol="o",
                                     symbolSize=5, antialias=True)
        self.graph.addItem(mean_curve)

        q1, q2, q3 = np.nanpercentile(data.X, [25, 50, 75], axis=0)
        bottom = np.clip(mean - q1, 0, mean - q1)
        top = np.clip(q3 - mean, 0, q3 - mean)
        error_bar = pg.ErrorBarItem(x=X, y=mean, bottom=bottom,
                                    top=top, beam=0.01)
        self.graph.addItem(error_bar)
        self.__groups.append(namespace(mean=mean_curve, error_bar=error_bar))

    def __update_visibility(self):
        self.__update_visibility_profiles()
        self.__update_visibility_groups()

    def __update_visibility_groups(self):
        show_mean = self.display_index in (LinePlotDisplay.MEAN,
                                           LinePlotDisplay.INSTANCES_WITH_MEAN)
        if self.__groups is not None:
            for group in self.__groups:
                if group is not None:
                    group.mean.setVisible(show_mean)
                    group.error_bar.setVisible(self.display_quartiles)

    def __update_visibility_profiles(self):
        show_inst = self.display_index in (LinePlotDisplay.INSTANCES,
                                           LinePlotDisplay.INSTANCES_WITH_MEAN)
        if self.__profiles is None and show_inst:
            self._plot_profiles()
            self.graph.select_subset(self.subset_selection)
        if self.__profiles is not None:
            for profile in self.__profiles:
                profile.setVisible(show_inst)

    def __group_var_changed(self):
        if self.data is None or not len(self.graph_variables):
            return
        self.__color_profiles()
        self._plot_groups()
        self.__update_visibility()

    def __color_profiles(self):
        if self.__profiles is not None:
            for profile in self.__profiles:
                profile.setColor(self.__get_line_color(profile.index))

    def __select_data_instances(self):
        if self.data is None or not len(self.data) or not len(self.selection):
            return
        if max(self.selection) >= len(self.data):
            self.selection = []
        self.graph.select(self.selection)

    def __get_line_color(self, data_index=None, mean_index=None):
        color = QColor(LinePlotColors.DEFAULT_COLOR)
        if self.group_var is not None:
            if data_index is not None:
                value = self.data[data_index][self.group_var]
                if np.isnan(value):
                    return color
            index = int(value) if data_index is not None else mean_index
            color = LinePlotColors()(len(self.group_var.values))[index]
        return color.darker(110) if data_index is None else color

    def commit(self):
        selected = self.data[self.selection] \
            if self.data is not None and len(self.selection) > 0 else None
        annotated = create_annotated_table(self.data, self.selection)
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
Пример #12
0
class OWColor(widget.OWWidget):
    name = "Color"
    description = "Set color legend for variables"
    icon = "icons/Colors.svg"

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Data", Orange.data.Table)]

    settingsHandler = settings.PerfectDomainContextHandler()
    disc_data = settings.ContextSetting([])
    cont_data = settings.ContextSetting([])
    color_settings = settings.Setting(None)
    selected_schema_index = settings.Setting(0)
    auto_apply = settings.Setting(True)

    want_main_area = False

    def __init__(self):
        super().__init__()
        self.data = None
        self.disc_colors = []
        self.cont_colors = []

        box = gui.widgetBox(self.controlArea, "Discrete variables",
                            orientation="horizontal")
        self.disc_model = DiscColorTableModel()
        self.disc_view = DiscreteTable(self.disc_model)
        self.disc_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(self.disc_view)

        box = gui.widgetBox(self.controlArea, "Numeric variables",
                            orientation="horizontal")
        self.cont_model = ContColorTableModel()
        self.cont_view = ContinuousTable(self, self.cont_model)
        self.cont_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(self.cont_view)

        box = gui.widgetBox(self.controlArea, orientation="horizontal")
        gui.auto_commit(box, self, "auto_apply", "Send data", box=box,
                        checkbox_label="Resend data on every change  ")
        gui.rubber(box)

    def set_data(self, data):
        self.closeContext()
        self.disc_colors = []
        self.cont_colors = []
        if data is None:
            self.data = self.domain = None
        else:
            def create_part(variables):
                vars = []
                for i, var in enumerate(variables):
                    if var.is_discrete or var.is_continuous:
                        var = var.make_proxy()
                        if var.is_discrete:
                            var.values = var.values[:]
                            self.disc_colors.append(var)
                        else:
                            self.cont_colors.append(var)
                    vars.append(var)
                return vars

            domain = data.domain
            domain = Orange.data.Domain(create_part(domain.attributes),
                                        create_part(domain.class_vars),
                                        create_part(domain.metas))
            self.openContext(data)
            self.data = Orange.data.Table(domain, data)
            self.data.domain = domain

            self.disc_model.set_data(self.disc_colors)
            self.cont_model.set_data(self.cont_colors)
            self.disc_view.resizeColumnsToContents()
            self.cont_view.resizeColumnsToContents()
        self.commit()

    def storeSpecificSettings(self):
        # Store the colors that were changed -- but not others
        self.current_context.disc_data = \
            [(var.name, var.values, "colors" in var.attributes and var.colors)
             for var in self.disc_colors]
        self.current_context.cont_data = \
            [(var.name, "colors" in var.attributes and var.colors)
             for var in self.cont_colors]

    def retrieveSpecificSettings(self):
        disc_data = getattr(self.current_context, "disc_data", ())
        for var, (name, values, colors) in zip(self.disc_colors, disc_data):
            var.name = name
            var.values = values[:]
            if colors is not False:
                var.colors = colors
        cont_data = getattr(self.current_context, "cont_data", ())
        for var, (name, colors) in zip(self.cont_colors, cont_data):
            var.name = name
            if colors is not False:
                var.colors = colors

    def _on_data_changed(self, *args):
        self.commit()

    def commit(self):
        self.send("Data", self.data)
Пример #13
0
class OWTestLearners(OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner, "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Table), ("Evaluation Results", Results)]

    settings_version = 2
    settingsHandler = settings.PerfectDomainContextHandler(metas_in_res=True)

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train data set is empty.")
        test_data_empty = Msg("Test data set is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg(
            "Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train data sets "
                                 "have different target variables.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(wordWrap=True, )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestLearners.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestLearners.FeatureFold and not enabled:
            self.resampling = OWTestLearners.KFold

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        if data is not None and not len(data):
            self.Error.train_data_empty()
            data = None
        if data and not data.domain.class_vars:
            self.Error.class_required()
            data = None
        elif data and len(data.domain.class_vars) > 1:
            self.Error.too_many_classes()
            data = None
        else:
            self.Error.class_required.clear()
            self.Error.too_many_classes.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = RemoveNaNClasses(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestLearners.FeatureFold
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not len(data):
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = RemoveNaNClasses(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {
            (True, True): " ",  # both, don't specify
            (True, False): " train ",
            (False, True): " test "
        }[(self.train_data_missing_vals, self.test_data_missing_vals)]

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestLearners.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.Warning.test_data_unused.clear()
        self.Warning.test_data_missing.clear()
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()
        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                return
            elif self.test_data.domain.class_var != class_var:
                self.Error.class_inconsistent()
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.Warning.test_data_unused()

        rstate = 42

        def update_progress(finished):
            self.progressBarSet(100 * finished)

        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
            callback=update_progress,
            n_jobs=-1,
        )
        self.setStatusMessage("Running")

        with self.progressBar():
            try:
                folds = self.NFolds[self.n_folds]
                if self.resampling == OWTestLearners.KFold:
                    if len(self.data) < folds:
                        self.Error.too_many_folds()
                        return
                    warnings = []
                    results = Orange.evaluation.CrossValidation(
                        self.data,
                        learners,
                        k=folds,
                        random_state=rstate,
                        warnings=warnings,
                        **common_args)
                    if warnings:
                        self.warning(warnings[0])
                elif self.resampling == OWTestLearners.FeatureFold:
                    results = Orange.evaluation.CrossValidationFeature(
                        self.data, learners, self.fold_feature, **common_args)
                elif self.resampling == OWTestLearners.LeaveOneOut:
                    results = Orange.evaluation.LeaveOneOut(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.ShuffleSplit:
                    train_size = self.SampleSizes[self.sample_size] / 100
                    results = Orange.evaluation.ShuffleSplit(
                        self.data,
                        learners,
                        n_resamples=self.NRepeats[self.n_repeats],
                        train_size=train_size,
                        test_size=None,
                        stratified=self.shuffle_stratified,
                        random_state=rstate,
                        **common_args)
                elif self.resampling == OWTestLearners.TestOnTrain:
                    results = Orange.evaluation.TestOnTrainingData(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.TestOnTest:
                    results = Orange.evaluation.TestOnTestData(
                        self.data, self.test_data, learners, **common_args)
                else:
                    assert False
            except (RuntimeError, ValueError) as e:
                self.error(str(e))
                self.setStatusMessage("")
                return
            else:
                self.error()

        self.puts_results(learners, results, class_var)

        self.setStatusMessage("")

    def puts_results(self, learners, results, class_var):
        """
        Called by _update_results. This method prepares calculated results and
        put them into self.learners.
        """
        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                scorers = classification_stats.scores if class_var.is_discrete \
                    else regression_stats.scores
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            if learner in learner_key:
                key = learner_key.get(learner)
                self.learners[key] = \
                    self.learners[key]._replace(results=result, stats=stats)

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}".format(
                                  name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    stats = [
                        Try(lambda: score(ovr_results))
                        for score in classification_stats.scores
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        self.fold_feature_selected = \
            self.resampling == OWTestLearners.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.commit()

    def commit(self):
        """Recompute and output the results"""
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".format(
                stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.NRepeats[self.n_repeats],
                     self.SampleSizes[self.sample_size]))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if not hasattr(settings_["context_settings"][0], "attributes"):
                settings_["context_settings"][0].attributes = {}
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
Пример #14
0
class OWColor(widget.OWWidget):
    name = "Color"
    description = "Set color legend for variables."
    icon = "icons/Colors.svg"

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        data = Output("Data", Orange.data.Table)

    settingsHandler = settings.PerfectDomainContextHandler(
        match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL)
    disc_colors = settings.ContextSetting([])
    cont_colors = settings.ContextSetting([])
    color_settings = settings.Setting(None)
    selected_schema_index = settings.Setting(0)
    auto_apply = settings.Setting(True)

    want_main_area = False

    def __init__(self):
        super().__init__()
        self.data = None
        self.orig_domain = self.domain = None
        self.disc_dict = {}
        self.cont_dict = {}

        box = gui.hBox(self.controlArea, "Discrete Variables")
        self.disc_model = DiscColorTableModel()
        disc_view = self.disc_view = DiscreteTable(self.disc_model)
        disc_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.ResizeToContents)
        self.disc_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(disc_view)

        box = gui.hBox(self.controlArea, "Numeric Variables")
        self.cont_model = ContColorTableModel()
        cont_view = self.cont_view = ContinuousTable(self, self.cont_model)
        cont_view.setColumnWidth(1, 256)
        self.cont_model.dataChanged.connect(self._on_data_changed)
        box.layout().addWidget(cont_view)

        box = gui.auto_apply(self.controlArea, self, "auto_apply")
        box.button.setFixedWidth(180)
        box.layout().insertStretch(0)

    @staticmethod
    def sizeHint():
        return QSize(500, 570)

    @Inputs.data
    def set_data(self, data):
        """Handle data input signal"""
        self.closeContext()
        self.disc_colors = []
        self.cont_colors = []
        if data is None:
            self.data = self.domain = None
        else:
            self.data = data
            for var in chain(data.domain.variables, data.domain.metas):
                if var.is_discrete:
                    self.disc_colors.append(AttrDesc(var))
                elif var.is_continuous:
                    self.cont_colors.append(AttrDesc(var))

        self.disc_model.set_data(self.disc_colors)
        self.cont_model.set_data(self.cont_colors)
        self.disc_view.resizeColumnsToContents()
        self.cont_view.resizeColumnsToContents()
        self.openContext(data)
        self.disc_dict = {k.var.name: k for k in self.disc_colors}
        self.cont_dict = {k.var.name: k for k in self.cont_colors}
        self.unconditional_commit()

    def _on_data_changed(self, *args):
        self.commit()

    def commit(self):
        def make(vars):
            new_vars = []
            for var in vars:
                source = self.disc_dict if var.is_discrete else self.cont_dict
                desc = source.get(var.name)
                if desc:
                    name = desc.get_name()
                    if var.is_discrete:
                        var = var.copy(name=name, values=desc.get_values())
                    else:
                        var = var.copy(name=name)
                    var.colors = desc.colors
                new_vars.append(var)
            return new_vars

        if self.data is None:
            self.Outputs.data.send(None)
            return

        dom = self.data.domain
        new_domain = Orange.data.Domain(
            make(dom.attributes), make(dom.class_vars), make(dom.metas))
        new_data = self.data.transform(new_domain)
        self.Outputs.data.send(new_data)

    def send_report(self):
        """Send report"""
        def _report_variables(variables):
            from Orange.widgets.report import colored_square as square

            def was(n, o):
                return n if n == o else f"{n} (was: {o})"

            # definition of td element for continuous gradient
            # with support for pre-standard css (needed at least for Qt 4.8)
            max_values = max(
                (len(var.values) for var in variables if var.is_discrete),
                default=1)
            defs = ("-webkit-", "-o-", "-moz-", "")
            cont_tpl = '<td colspan="{}">' \
                       '<span class="legend-square" style="width: 100px; '.\
                format(max_values) + \
                " ".join(map(
                    "background: {}linear-gradient("
                    "left, rgb({{}}, {{}}, {{}}), {{}}rgb({{}}, {{}}, {{}}));"
                    .format, defs)) + \
                '"></span></td>'

            rows = ""
            for var in variables:
                if var.is_discrete:
                    desc = self.disc_dict[var.name]
                    values = "    \n".join(
                        "<td>{} {}</td>".
                        format(square(*color), was(value, old_value))
                        for color, value, old_value in
                        zip(desc.get_colors(), desc.get_values(), var.values))
                elif var.is_continuous:
                    desc = self.cont_dict[var.name]
                    col = desc.get_colors()
                    colors = col[0][:3] + ("black, " * col[2], ) + col[1][:3]
                    values = cont_tpl.format(*colors * len(defs))
                else:
                    continue
                names = was(desc.get_name(), desc.var.name)
                rows += '<tr style="height: 2em">\n' \
                        '  <th style="text-align: right">{}</th>{}\n</tr>\n'. \
                    format(names, values)
            return rows

        if not self.data:
            return
        dom = self.data.domain
        sections = (
            (name, _report_variables(vars))
            for name, vars in (
                ("Features", dom.attributes),
                ("Outcome" + "s" * (len(dom.class_vars) > 1), dom.class_vars),
                ("Meta attributes", dom.metas)))
        table = "".join("<tr><th>{}</th></tr>{}".format(name, rows)
                        for name, rows in sections if rows)
        if table:
            self.report_raw("<table>{}</table>".format(table))
Пример #15
0
class OWTestLearners(OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    UserAdviceMessages = [
        widget.Message("Click on the table header to select shown columns",
                       "click_header")
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    BUILTIN_ORDER = {
        DiscreteVariable: ("AUC", "CA", "F1", "Precision", "Recall"),
        ContinuousVariable: ("MSE", "RMSE", "MAE", "R2")
    }

    shown_scores = \
        settings.Setting(set(chain(*BUILTIN_ORDER.values())))

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train data set is empty.")
        test_data_empty = Msg("Test data set is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg(
            "Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train data sets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        only_one_class_var_value = Msg("Target variable has only one value.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(wordWrap=True, )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)
        header.setContextMenuPolicy(Qt.CustomContextMenu)
        header.customContextMenuRequested.connect(self.show_column_chooser)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestLearners.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestLearners.FeatureFold and not enabled:
            self.resampling = OWTestLearners.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        else:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        self.Error.class_required.clear()
        self.Error.too_many_classes.clear()
        self.Error.only_one_class_var_value.clear()
        if data is not None and not len(data):
            self.Error.train_data_empty()
            data = None
        if data:
            conds = [
                not data.domain.class_vars,
                len(data.domain.class_vars) > 1, data.domain.has_discrete_class
                and len(data.domain.class_var.values) == 1
            ]
            errors = [
                self.Error.class_required, self.Error.too_many_classes,
                self.Error.only_one_class_var_value
            ]
            for cond, error in zip(conds, errors):
                if cond:
                    error()
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestLearners.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not len(data):
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {
            (True, True): " ",  # both, don't specify
            (True, False): " train ",
            (False, True): " test "
        }[(self.train_data_missing_vals, self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data is None or self.data.domain.class_var is None:
            self.scorers = []
            return
        class_var = self.data and self.data.domain.class_var
        order = {
            name: i
            for i, name in enumerate(self.BUILTIN_ORDER[type(class_var)])
        }
        # 'abstract' is retrieved from __dict__ to avoid inheriting
        usable = (cls for cls in scoring.Score.registry.values()
                  if cls.is_scalar and not cls.__dict__.get("abstract")
                  and isinstance(class_var, cls.class_types))
        self.scorers = sorted(usable, key=lambda cls: order.get(cls.name, 99))

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self._update_header()
        self._update_stats_model()
        if self.__needupdate:
            self.__update()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestLearners.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()
        self.__update()

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        model = self.result_model
        model.setColumnCount(1 + len(self.scorers))
        for col, score in enumerate(self.scorers):
            item = QStandardItem(score.name)
            item.setToolTip(score.long_name)
            model.setHorizontalHeaderItem(col + 1, item)
        self._update_shown_columns()

    def _update_shown_columns(self):
        # pylint doesn't know that self.shown_scores is a set, not a Setting
        # pylint: disable=unsupported-membership-test
        model = self.result_model
        header = self.view.horizontalHeader()
        for section in range(1, model.columnCount()):
            col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole)
            header.setSectionHidden(section, col_name not in self.shown_scores)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}".format(
                                  name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [
                        Try(scorer_caller(scorer, ovr_results))
                        for scorer in self.scorers
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        self.fold_feature_selected = \
            self.resampling == OWTestLearners.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.__needupdate = True

    def show_column_chooser(self, pos):
        # pylint doesn't know that self.shown_scores is a set, not a Setting
        # pylint: disable=unsupported-membership-test
        def update(col_name, checked):
            if checked:
                self.shown_scores.add(col_name)
            else:
                self.shown_scores.remove(col_name)
            self._update_shown_columns()

        menu = QMenu()
        model = self.result_model
        header = self.view.horizontalHeader()
        for section in range(1, model.columnCount()):
            col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole)
            action = menu.addAction(col_name)
            action.setCheckable(True)
            action.setChecked(col_name in self.shown_scores)
            action.triggered.connect(partial(update, col_name))
        menu.exec(header.mapToGlobal(pos))

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(
                    combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".format(
                stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.NRepeats[self.n_repeats],
                     self.SampleSizes[self.sample_size]))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')
            ]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value, processEvents=False)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Warning.test_data_missing.clear()
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestLearners.KFold and \
                len(self.data) < self.NFolds[self.n_folds]:
            self.Error.too_many_folds()
            self.__state = State.Waiting
            self.commit()
            return

        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
        )
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestLearners.KFold:
            folds = self.NFolds[self.n_folds]
            test_f = partial(Orange.evaluation.CrossValidation,
                             self.data,
                             learners_c,
                             k=folds,
                             random_state=rstate,
                             **common_args)
        elif self.resampling == OWTestLearners.FeatureFold:
            test_f = partial(Orange.evaluation.CrossValidationFeature,
                             self.data, learners_c, self.fold_feature,
                             **common_args)
        elif self.resampling == OWTestLearners.LeaveOneOut:
            test_f = partial(Orange.evaluation.LeaveOneOut, self.data,
                             learners_c, **common_args)
        elif self.resampling == OWTestLearners.ShuffleSplit:
            train_size = self.SampleSizes[self.sample_size] / 100
            test_f = partial(Orange.evaluation.ShuffleSplit,
                             self.data,
                             learners_c,
                             n_resamples=self.NRepeats[self.n_repeats],
                             train_size=train_size,
                             test_size=None,
                             stratified=self.shuffle_stratified,
                             random_state=rstate,
                             **common_args)
        elif self.resampling == OWTestLearners.TestOnTrain:
            test_f = partial(Orange.evaluation.TestOnTrainingData, self.data,
                             learners_c, **common_args)
        elif self.resampling == OWTestLearners.TestOnTest:
            test_f = partial(Orange.evaluation.TestOnTestData, self.data,
                             self.test_data, learners_c, **common_args)
        else:
            assert False, "self.resampling %s" % self.resampling

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[float]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = Task()

        def progress_callback(finished):
            if task.cancelled:
                raise UserInterrupt()
            QMetaObject.invokeMethod(self, "setProgressValue",
                                     Qt.QueuedConnection,
                                     Q_ARG(float, 100 * finished))

        def ondone(_):
            QMetaObject.invokeMethod(self, "__task_complete",
                                     Qt.QueuedConnection, Q_ARG(object, task))

        testfunc = partial(testfunc, callback=progress_callback)
        task.future = self.__executor.submit(testfunc)
        task.future.add_done_callback(ondone)

        self.progressBarInit(processEvents=None)
        self.setBlocking(True)
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, task):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        if self.__task is not task:
            assert task.cancelled
            log.debug("Reaping cancelled task: %r", "<>")
            return

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)
        self.setStatusMessage("")
        result = task.future
        assert result.done()
        self.__task = None
        try:
            results = result.result()  # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:
            log.exception("testing error (in __task_complete):", exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er),
                                                                 er)))
            self.__state = State.Done
            return

        self.__state = State.Done

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [
                        Try(scorer_caller(scorer, result))
                        for scorer in self.scorers
                    ]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self._update_header()
        self._update_stats_model()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            assert task.future.done()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #16
0
class OW1ka(widget.OWWidget):
    name = "EnKlik Anketa"
    description = "Import data from EnKlikAnketa (1ka.si) public URL."
    icon = "icons/1ka.svg"
    priority = 200

    class Outputs:
        data = Output("Data", Table)

    want_main_area = False
    resizing_enabled = False

    settingsHandler = settings.PerfectDomainContextHandler(
        match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL)

    recent = settings.Setting([])
    reload_idx = settings.Setting(0)
    autocommit = settings.Setting(True)
    domain_editor = settings.SettingProvider(DomainEditor)

    UserAdviceMessages = [
        widget.Message(
            'You can import data from public links to 1ka surveys results. '
            'Click to learn more on how to get a shareable public link URL for '
            '1ka surveys that you manage.',
            'public-link',
            icon=widget.Message.Information,
            moreurl=
            'http://english.1ka.si/db/24/468/Guides/Public_link_to_access_data_and_analysis/'
        ),
    ]

    class Error(widget.OWWidget.Error):
        net_error = widget.Msg(
            "Couldn't load data: {}. Ensure network connection, firewall ...")
        parse_error = widget.Msg(
            "Couldn't parse data: {}. Ensure well-formatted data or submit a bug report."
        )
        invalid_url = widget.Msg(
            'Invalid URL. Public shareable link should match: ' +
            VALID_URL_HELP)
        data_is_anal = widget.Msg(
            "The provided URL is a public link to 'Analysis'. Need public link to 'Data'."
        )

    class Information(widget.OWWidget.Information):
        response_data_empty = widget.Msg(
            'Response data is empty. Get some responses first.')

    def __init__(self):
        super().__init__()
        self.table = None
        self._html = None

        def _loadFinished(is_ok):
            if is_ok:
                QTimer.singleShot(
                    1, lambda: setattr(self, '_html', self.webview.html()))

        self.webview = WebviewWidget(loadFinished=_loadFinished)

        vb = gui.vBox(self.controlArea, 'Import Data')
        hb = gui.hBox(vb)
        self.combo = combo = URLComboBox(
            hb,
            self.recent,
            editable=True,
            minimumWidth=400,
            insertPolicy=QComboBox.InsertAtTop,
            toolTip='Format: ' + VALID_URL_HELP,
            editTextChanged=self.is_valid_url,
            # Indirect via QTimer because calling wait() -> processEvents,
            # while our currentIndexChanged event hadn't yet finished.
            # Avoids calling handler twice.
            currentIndexChanged=lambda: QTimer.singleShot(1, self.load_url))
        hb.layout().addWidget(QLabel('Public link URL:', hb))
        hb.layout().addWidget(combo)
        hb.layout().setStretch(1, 2)

        RELOAD_TIMES = (
            ('No reload', ),
            ('5 s', 5000),
            ('10 s', 10000),
            ('30 s', 30000),
            ('1 min', 60 * 1000),
            ('2 min', 2 * 60 * 1000),
            ('5 min', 5 * 60 * 1000),
        )

        reload_timer = QTimer(self,
                              timeout=lambda: self.load_url(from_reload=True))

        def _on_reload_changed():
            if self.reload_idx == 0:
                reload_timer.stop()
                return
            reload_timer.start(RELOAD_TIMES[self.reload_idx][1])

        gui.comboBox(vb,
                     self,
                     'reload_idx',
                     label='Reload every:',
                     orientation=Qt.Horizontal,
                     items=[i[0] for i in RELOAD_TIMES],
                     callback=_on_reload_changed)

        box = gui.widgetBox(self.controlArea, "Columns (Double-click to edit)")
        self.domain_editor = DomainEditor(self)
        editor_model = self.domain_editor.model()

        def editorDataChanged():
            self.apply_domain_edit()
            self.commit()

        editor_model.dataChanged.connect(editorDataChanged)
        box.layout().addWidget(self.domain_editor)

        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        info = self.data_info = gui.widgetLabel(box, '')
        info.setWordWrap(True)

        self.controlArea.layout().addStretch(1)
        gui.auto_commit(self.controlArea, self, 'autocommit', label='Commit')

        self.set_info()

    def set_combo_items(self):
        self.combo.clear()
        for sheet in self.recent:
            self.combo.addItem(sheet.name, sheet.url)

    def commit(self):
        self.Outputs.data.send(self.table)

    def is_valid_url(self, url):
        if is_valid_url(url):
            self.Error.invalid_url.clear()
            return True
        self.Error.invalid_url()
        QToolTip.showText(self.combo.mapToGlobal(QPoint(0, 0)),
                          self.combo.toolTip())

    def load_url(self, from_reload=False):
        self.closeContext()
        self.domain_editor.set_domain(None)

        url = self.combo.currentText()
        if not self.is_valid_url(url):
            self.table = None
            self.commit()
            return

        if url not in self.recent:
            self.recent.insert(0, url)

        prev_table = self.table
        with self.progressBar(3) as progress:
            try:
                self._html = None
                self.webview.setUrl(url)
                wait(until=lambda: self._html is not None)
                progress.advance()
                # Wait some seconds for discrete labels to have loaded via AJAX,
                # then re-query HTML.
                # *Webview.loadFinished doesn't guarantee it sufficiently
                try:
                    wait(until=lambda: False, timeout=1200)
                except TimeoutError:
                    pass
                progress.advance()
                html = self.webview.html()
            except Exception as e:
                log.exception("Couldn't load data from: %s", url)
                self.Error.net_error(try_(lambda: e.args[0], ''))
                self.table = None
            else:
                self.Error.clear()
                self.Information.clear()
                self.table = None
                try:
                    table = self.table = self.table_from_html(html)
                except DataEmptyError:
                    self.Information.response_data_empty()
                except DataIsAnalError:
                    self.Error.data_is_anal()
                except Exception as e:
                    log.exception('Parsing error: %s', url)
                    self.Error.parse_error(try_(lambda: e.args[0], ''))
                else:
                    self.openContext(table.domain)
                    self.combo.setTitleFor(self.combo.currentIndex(),
                                           table.name)

        def _equal(data1, data2):
            NAN = float('nan')
            return (try_(lambda: data1.checksum(),
                         NAN) == try_(lambda: data2.checksum(), NAN))

        self._orig_table = self.table
        self.apply_domain_edit()

        if not (from_reload and _equal(prev_table, self.table)):
            self.commit()

    def apply_domain_edit(self):
        data = self._orig_table
        if data is None:
            self.set_info()
            return

        domain, cols = self.domain_editor.get_domain(data.domain, data)

        # Copied verbatim from OWFile
        if not (domain.variables or domain.metas):
            table = None
        else:
            X, y, m = cols
            table = Table.from_numpy(domain, X, y, m, data.W)
            table.name = data.name
            table.ids = np.array(data.ids)
            table.attributes = getattr(data, 'attributes', {})

        self.table = table
        self.set_info()

    DATETIME_VAR = 'Paradata (insert)'

    def table_from_html(self, html):
        soup = BeautifulSoup(html, 'html.parser')
        try:
            html_table = soup.find_all('table')[-1]
        except IndexError:
            raise DataEmptyError

        if '<h2>Anal' in html or 'div_analiza_' in html:
            raise DataIsAnalError

        def _header_row_strings(row):
            return chain.from_iterable(
                repeat(th.get_text(), int(th.get('colspan') or 1)) for th in
                html_table.select('thead tr:nth-of-type(%d) th[title]' % row))

        # self.DATETIME_VAR (available when Paradata is enabled in 1ka UI)
        # should match this variable name format
        header = [
            th1.rstrip(':') +
            ('' if th3 == th1 else ' ({})').format(th3.rstrip(':'))
            for th1, th3 in zip(_header_row_strings(1), _header_row_strings(3))
        ]
        values = [
            [
                (  # If no span, feature is a number or a text field
                    td.get_text() if td.span is None else
                    # If have span, it's a number, but if negative, replace with NaN
                    '' if td.contents[0].strip().startswith('-') else
                    # Else if span, the number is its code, but we want its value
                    td.span.get_text()[1:-1]) for td in tr.select('td')
                if 'data_uid' not in td.get('class', ())
            ] for tr in html_table.select('tbody tr')
        ]

        # Save parsed values into in-mem file for default values processing
        buffer = StringIO()
        writer = csv.writer(buffer, delimiter='\t')
        writer.writerow(header)
        writer.writerows(values)
        buffer.flush()
        buffer.seek(0)

        data = TabReader(buffer).read()

        title = soup.select('body h2:nth-of-type(1)')[0].get_text().split(
            ': ', maxsplit=1)[-1]
        data.name = title

        return data

    def set_info(self):
        data = self.table
        if data is None:
            self.data_info.setText('No spreadsheet loaded.')
            return
        text = "{}\n\n{} instance(s), {} feature(s), {} meta attribute(s)\n".format(
            data.name, len(data), len(data.domain.attributes),
            len(data.domain.metas))
        text += try_(
            lambda: '\nFirst entry: {}'
            '\nLast entry: {}'.format(data[0, self.DATETIME_VAR], data[
                -1, self.DATETIME_VAR]), '')
        self.data_info.setText(text)
Пример #17
0
class OWTestLearners(OWWidget):
    name = "Test and Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100
    keywords = ['Cross Validation', 'CV']

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    UserAdviceMessages = [
        widget.Message("Click on the table header to select shown columns",
                       "click_header")
    ]

    settingsHandler = settings.PerfectDomainContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    use_rope = settings.Setting(False)
    rope = settings.Setting(0.1)
    comparison_criterion = settings.Setting(0, schema_only=True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train dataset is empty.")
        test_data_empty = Msg("Test dataset is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg(
            "Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train datasets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        no_class_values = Msg("Target variable has no values.")
        only_one_class_var_value = Msg("Target variable has only one value.")
        test_data_incompatible = Msg(
            "Test data may be incompatible with train data.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")
        test_data_transformed = Msg(
            "Test data has been transformed to match the train data.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []
        self.__pending_comparison_criterion = self.comparison_criterion

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[TaskState]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison")
        gui.comboBox(box,
                     self,
                     "comparison_criterion",
                     model=PyListModel(),
                     callback=self.update_comparison_table)

        hbox = gui.hBox(box)
        gui.checkBox(hbox,
                     self,
                     "use_rope",
                     "Negligible difference: ",
                     callback=self._on_use_rope_changed)
        gui.lineEdit(hbox,
                     self,
                     "rope",
                     validator=QDoubleValidator(),
                     controlWidth=70,
                     callback=self.update_comparison_table,
                     alignment=Qt.AlignRight)
        self.controls.rope.setEnabled(self.use_rope)

        gui.rubber(self.controlArea)
        self.score_table = ScoreTable(self)
        self.score_table.shownScoresChanged.connect(self.update_stats_model)
        view = self.score_table.view
        view.setSizeAdjustPolicy(view.AdjustToContents)

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.score_table.view)

        self.compbox = box = gui.vBox(self.mainArea, box="Model comparison")
        table = self.comparison_table = QTableWidget(
            wordWrap=False,
            editTriggers=QTableWidget.NoEditTriggers,
            selectionMode=QTableWidget.NoSelection)
        table.setSizeAdjustPolicy(table.AdjustToContents)
        header = table.verticalHeader()
        header.setSectionResizeMode(QHeaderView.Fixed)
        header.setSectionsClickable(False)

        header = table.horizontalHeader()
        header.setTextElideMode(Qt.ElideRight)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setSectionsClickable(False)
        header.setStretchLastSection(False)
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        avg_width = self.fontMetrics().averageCharWidth()
        header.setMinimumSectionSize(8 * avg_width)
        header.setMaximumSectionSize(15 * avg_width)
        header.setDefaultSectionSize(15 * avg_width)
        box.layout().addWidget(table)
        box.layout().addWidget(
            QLabel(
                "<small>Table shows probabilities that the score for the model in "
                "the row is higher than that of the model in the column. "
                "Small numbers show the probability that the difference is "
                "negligible.</small>",
                wordWrap=True))

    @staticmethod
    def sizeHint():
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestLearners.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestLearners.FeatureFold and not enabled:
            self.resampling = OWTestLearners.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        elif learner is not None:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.cancel()
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        self.Error.class_required.clear()
        self.Error.too_many_classes.clear()
        self.Error.no_class_values.clear()
        self.Error.only_one_class_var_value.clear()
        if data is not None and not data:
            self.Error.train_data_empty()
            data = None
        if data:
            conds = [
                not data.domain.class_vars,
                len(data.domain.class_vars) > 1,
                np.isnan(data.Y).all(), data.domain.has_discrete_class
                and len(data.domain.class_var.values) == 1
            ]
            errors = [
                self.Error.class_required, self.Error.too_many_classes,
                self.Error.no_class_values, self.Error.only_one_class_var_value
            ]
            for cond, error in zip(conds, errors):
                if cond:
                    error()
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestLearners.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not data:
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {
            (True, True): " ",  # both, don't specify
            (True, False): " train ",
            (False, True): " test "
        }[(self.train_data_missing_vals, self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data and self.data.domain.class_var:
            new_scorers = usable_scorers(self.data.domain.class_var)
        else:
            new_scorers = []
        # Don't unnecessarily reset the model because this would always reset
        # comparison_criterion; we alse set it explicitly, though, for clarity
        if new_scorers != self.scorers:
            self.scorers = new_scorers
            self.controls.comparison_criterion.model()[:] = \
                [scorer.long_name or scorer.name for scorer in self.scorers]
            self.comparison_criterion = 0
        if self.__pending_comparison_criterion is not None:
            # Check for the unlikely case that some scorers have been removed
            # from modules
            if self.__pending_comparison_criterion < len(self.scorers):
                self.comparison_criterion = self.__pending_comparison_criterion
            self.__pending_comparison_criterion = None
        self._update_compbox_title()

    def _update_compbox_title(self):
        criterion = self.comparison_criterion
        if criterion < len(self.scorers):
            scorer = self.scorers[criterion]()
            self.compbox.setTitle(f"Model Comparison by {scorer.name}")
        else:
            self.compbox.setTitle(f"Model Comparison")

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.score_table.update_header(self.scorers)
        self._update_view_enabled()
        self.update_stats_model()
        if self.__needupdate:
            self.__update()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestLearners.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self.modcompbox.setEnabled(self.resampling == OWTestLearners.KFold)
        self._update_view_enabled()
        self._invalidate()
        self.__update()

    def _update_view_enabled(self):
        self.comparison_table.setEnabled(
            self.resampling == OWTestLearners.KFold and len(self.learners) > 1
            and self.data is not None)
        self.score_table.view.setEnabled(self.data is not None)

    def update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.score_table.model
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        names = []
        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            names.append(name)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            results = slot.results
            if results is not None and results.success:
                train = QStandardItem("{:.3f}".format(
                    results.value.train_time))
                train.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                train.setData(key, Qt.UserRole)
                test = QStandardItem("{:.3f}".format(results.value.test_time))
                test.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                test.setData(key, Qt.UserRole)
                row = [head, train, test]
            else:
                row = [head]
            if isinstance(results, Try.Fail):
                head.setToolTip(str(results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                if isinstance(results.exception, DomainTransformationError) \
                        and self.resampling == self.TestOnTest:
                    self.Error.test_data_incompatible()
                    self.Information.test_data_transformed.clear()
                else:
                    errors.append("{name} failed with error:\n"
                                  "{exc.__class__.__name__}: {exc!s}".format(
                                      name=name, exc=slot.results.exception))

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [
                        Try(scorer_caller(scorer, ovr_results, target=1))
                        for scorer in self.scorers
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat, scorer in zip(stats, self.scorers):
                    item = QStandardItem()
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    if stat.success:
                        item.setData(float(stat.value[0]), Qt.DisplayRole)
                    else:
                        item.setToolTip(str(stat.exception))
                        if scorer.name in self.score_table.shown_scores:
                            has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        # Resort rows based on current sorting
        header = self.score_table.view.horizontalHeader()
        model.sort(header.sortIndicatorSection(), header.sortIndicatorOrder())
        self._set_comparison_headers(names)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _on_use_rope_changed(self):
        self.controls.rope.setEnabled(self.use_rope)
        self.update_comparison_table()

    def update_comparison_table(self):
        self.comparison_table.clearContents()
        slots = self._successful_slots()
        if not (slots and self.scorers):
            return
        names = [learner_name(slot.learner) for slot in slots]
        self._set_comparison_headers(names)
        if self.resampling == OWTestLearners.KFold:
            scores = self._scores_by_folds(slots)
            self._fill_table(names, scores)

    def _successful_slots(self):
        model = self.score_table.model
        proxy = self.score_table.sorted_model

        keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole)
                for row in range(proxy.rowCount()))
        slots = [
            slot for slot in (self.learners[key] for key in keys)
            if slot.results is not None and slot.results.success
        ]
        return slots

    def _set_comparison_headers(self, names):
        table = self.comparison_table
        try:
            # Prevent glitching during update
            table.setUpdatesEnabled(False)
            header = table.horizontalHeader()
            if len(names) > 2:
                header.setSectionResizeMode(QHeaderView.Stretch)
            else:
                header.setSectionResizeMode(QHeaderView.Fixed)
            table.setRowCount(len(names))
            table.setColumnCount(len(names))
            table.setVerticalHeaderLabels(names)
            table.setHorizontalHeaderLabels(names)
        finally:
            table.setUpdatesEnabled(True)

    def _scores_by_folds(self, slots):
        scorer = self.scorers[self.comparison_criterion]()
        self._update_compbox_title()
        if scorer.is_binary:
            if self.class_selection != self.TARGET_AVERAGE:
                class_var = self.data.domain.class_var
                target_index = class_var.values.index(self.class_selection)
                kw = dict(target=target_index)
            else:
                kw = dict(average='weighted')
        else:
            kw = {}

        def call_scorer(results):
            def thunked():
                return scorer.scores_by_folds(results.value, **kw).flatten()

            return thunked

        scores = [Try(call_scorer(slot.results)) for slot in slots]
        scores = [score.value if score.success else None for score in scores]
        # `None in scores doesn't work -- these are np.arrays)
        if any(score is None for score in scores):
            self.Warning.scores_not_computed()
        return scores

    def _fill_table(self, names, scores):
        table = self.comparison_table
        for row, row_name, row_scores in zip(count(), names, scores):
            for col, col_name, col_scores in zip(range(row), names, scores):
                if row_scores is None or col_scores is None:
                    continue
                if self.use_rope and self.rope:
                    p0, rope, p1 = baycomp.two_on_single(
                        row_scores, col_scores, self.rope)
                    if np.isnan(p0) or np.isnan(rope) or np.isnan(p1):
                        self._set_cells_na(table, row, col)
                        continue
                    self._set_cell(
                        table, row, col,
                        f"{p0:.3f}<br/><small>{rope:.3f}</small>",
                        f"p({row_name} > {col_name}) = {p0:.3f}\n"
                        f"p({row_name} = {col_name}) = {rope:.3f}")
                    self._set_cell(
                        table, col, row,
                        f"{p1:.3f}<br/><small>{rope:.3f}</small>",
                        f"p({col_name} > {row_name}) = {p1:.3f}\n"
                        f"p({col_name} = {row_name}) = {rope:.3f}")
                else:
                    p0, p1 = baycomp.two_on_single(row_scores, col_scores)
                    if np.isnan(p0) or np.isnan(p1):
                        self._set_cells_na(table, row, col)
                        continue
                    self._set_cell(table, row, col, f"{p0:.3f}",
                                   f"p({row_name} > {col_name}) = {p0:.3f}")
                    self._set_cell(table, col, row, f"{p1:.3f}",
                                   f"p({col_name} > {row_name}) = {p1:.3f}")

    @classmethod
    def _set_cells_na(cls, table, row, col):
        cls._set_cell(table, row, col, "NA", "comparison cannot be computed")
        cls._set_cell(table, col, row, "NA", "comparison cannot be computed")

    @staticmethod
    def _set_cell(table, row, col, label, tooltip):
        item = QLabel(label)
        item.setToolTip(tooltip)
        item.setAlignment(Qt.AlignCenter)
        table.setCellWidget(row, col, item)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self.update_stats_model()
        self.update_comparison_table()

    def _invalidate(self, which=None):
        self.cancel()
        self.fold_feature_selected = \
            self.resampling == OWTestLearners.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.score_table.model
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.comparison_table.clearContents()

        self.__needupdate = True

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(
                    combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".format(
                stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.NRepeats[self.n_repeats],
                     self.SampleSizes[self.sample_size]))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.score_table.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')
            ]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Error.test_data_incompatible.clear()
        self.Warning.test_data_missing.clear()
        self.Information.test_data_transformed(
            shown=self.resampling == self.TestOnTest and self.data is not None
            and self.test_data is not None and
            self.data.domain.attributes != self.test_data.domain.attributes)
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestLearners.KFold and \
                len(self.data) < self.NFolds[self.n_folds]:
            self.Error.too_many_folds()
            self.__state = State.Waiting
            self.commit()
            return

        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestLearners.TestOnTest:
            test_f = partial(
                Orange.evaluation.TestOnTestData(store_data=True,
                                                 store_models=True), self.data,
                self.test_data, learners_c, self.preprocessor)
        else:
            if self.resampling == OWTestLearners.KFold:
                sampler = Orange.evaluation.CrossValidation(
                    k=self.NFolds[self.n_folds], random_state=rstate)
            elif self.resampling == OWTestLearners.FeatureFold:
                sampler = Orange.evaluation.CrossValidationFeature(
                    feature=self.fold_feature)
            elif self.resampling == OWTestLearners.LeaveOneOut:
                sampler = Orange.evaluation.LeaveOneOut()
            elif self.resampling == OWTestLearners.ShuffleSplit:
                sampler = Orange.evaluation.ShuffleSplit(
                    n_resamples=self.NRepeats[self.n_repeats],
                    train_size=self.SampleSizes[self.sample_size] / 100,
                    test_size=None,
                    stratified=self.shuffle_stratified,
                    random_state=rstate)
            elif self.resampling == OWTestLearners.TestOnTrain:
                sampler = Orange.evaluation.TestOnTrainingData(
                    store_models=True)
            else:
                assert False, "self.resampling %s" % self.resampling

            sampler.store_data = True
            test_f = partial(sampler, self.data, learners_c, self.preprocessor)

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[[float], None]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = TaskState()

        def progress_callback(finished):
            if task.is_interruption_requested():
                raise UserInterrupt()
            task.set_progress_value(100 * finished)

        testfunc = partial(testfunc, callback=progress_callback)
        task.start(self.__executor, testfunc)

        task.progress_changed.connect(self.setProgressValue)
        task.watcher.finished.connect(self.__task_complete)

        self.Outputs.evaluations_results.invalidate()
        self.Outputs.predictions.invalidate()
        self.progressBarInit()
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, f: 'Future[Results]'):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        assert self.__task is not None and self.__task.future is f
        self.progressBarFinished()
        self.setStatusMessage("")
        assert f.done()
        self.__task = None
        self.__state = State.Done
        try:
            results = f.result()  # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:  # pylint: disable=broad-except
            log.exception("testing error (in __task_complete):", exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er),
                                                                 er)))
            return

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [
                        Try(scorer_caller(scorer, result))
                        for scorer in self.scorers
                    ]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.score_table.update_header(self.scorers)
        self.update_stats_model()
        self.update_comparison_table()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            task.progress_changed.disconnect(self.setProgressValue)
            task.watcher.finished.disconnect(self.__task_complete)
            self.progressBarFinished()
            self.setStatusMessage("")

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #18
0
class OWDistanceMap(widget.OWWidget):
    name = "距离图(Distance Map)"
    description = "可视化距离矩阵"
    icon = "icons/DistanceMap.svg"
    priority = 1200
    keywords = ['juliyingshe', 'yingshe', 'julitu']
    category = '非监督(Unsupervised)'

    class Inputs:
        distances = Input("距离(Distances)", Orange.misc.DistMatrix, replaces=['Distances'])

    class Outputs:
        selected_data = Output("选定的数据(Selected Data)", Orange.data.Table, default=True, replaces=['Selected Data'])
        annotated_data = Output("数据(Data)", Orange.data.Table, replaces=['Data'])
        features = Output("特征(Features)", widget.AttributeList, dynamic=False, replaces=['Features'])

    settingsHandler = settings.PerfectDomainContextHandler()

    #: type of ordering to apply to matrix rows/columns
    NoOrdering, Clustering, OrderedClustering = 0, 1, 2

    sorting = settings.Setting(NoOrdering)

    palette_name = settings.Setting(colorpalettes.DefaultContinuousPaletteName)
    color_gamma = settings.Setting(0.0)
    color_low = settings.Setting(0.0)
    color_high = settings.Setting(1.0)

    annotation_idx = settings.ContextSetting(0)
    pending_selection = settings.Setting(None, schema_only=True)

    autocommit = settings.Setting(True)

    graph_name = "grid_widget"

    # Disable clustering for inputs bigger than this
    _MaxClustering = 25000
    # Disable cluster leaf ordering for inputs bigger than this
    _MaxOrderedClustering = 2000

    def __init__(self):
        super().__init__()

        self.matrix = None
        self._matrix_range = 0.
        self._tree = None
        self._ordered_tree = None
        self._sorted_matrix = None
        self._sort_indices = None
        self._selection = None

        self.sorting_cb = gui.comboBox(
            self.controlArea, self, "sorting", box="元素排序",
            items=["无", "聚类(Clustering)", "有序叶聚类"],
            callback=self._invalidate_ordering)

        box = gui.vBox(self.controlArea, "颜色")
        self.color_map_widget = cmw = ColorGradientSelection(
            thresholds=(self.color_low, self.color_high),
        )
        model = itemmodels.ContinuousPalettesModel(parent=self)
        cmw.setModel(model)
        idx = cmw.findData(self.palette_name, model.KeyRole)
        if idx != -1:
            cmw.setCurrentIndex(idx)

        cmw.activated.connect(self._update_color)

        def _set_thresholds(low, high):
            self.color_low, self.color_high = low, high
            self._update_color()

        cmw.thresholdsChanged.connect(_set_thresholds)
        box.layout().addWidget(self.color_map_widget)

        self.annot_combo = gui.comboBox(
            self.controlArea, self, "annotation_idx", box="注释",
            contentsLength=12, searchable=True,
            callback=self._invalidate_annotations
        )
        self.annot_combo.setModel(itemmodels.VariableListModel())
        self.annot_combo.model()[:] = ["无", "枚举"]
        gui.rubber(self.controlArea)

        gui.auto_send(self.buttonsArea, self, "autocommit")

        self.view = GraphicsView(background=None)
        self.mainArea.layout().addWidget(self.view)

        self.grid_widget = pg.GraphicsWidget()
        self.grid = QGraphicsGridLayout()
        self.grid_widget.setLayout(self.grid)

        self.gradient_legend = GradientLegendWidget(0, 1, self._color_map())
        self.gradient_legend.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed)
        self.gradient_legend.setMaximumWidth(250)
        self.grid.addItem(self.gradient_legend, 0, 1)
        self.viewbox = pg.ViewBox(enableMouse=False, enableMenu=False)
        self.viewbox.setAcceptedMouseButtons(Qt.NoButton)
        self.viewbox.setAcceptHoverEvents(False)
        self.grid.addItem(self.viewbox, 2, 1)

        self.left_dendrogram = DendrogramWidget(
            self.grid_widget, orientation=DendrogramWidget.Left,
            selectionMode=DendrogramWidget.NoSelection,
            hoverHighlightEnabled=False
        )
        self.left_dendrogram.setAcceptedMouseButtons(Qt.NoButton)
        self.left_dendrogram.setAcceptHoverEvents(False)

        self.top_dendrogram = DendrogramWidget(
            self.grid_widget, orientation=DendrogramWidget.Top,
            selectionMode=DendrogramWidget.NoSelection,
            hoverHighlightEnabled=False
        )
        self.top_dendrogram.setAcceptedMouseButtons(Qt.NoButton)
        self.top_dendrogram.setAcceptHoverEvents(False)

        self.grid.addItem(self.left_dendrogram, 2, 0)
        self.grid.addItem(self.top_dendrogram, 1, 1)

        self.right_labels = TextList(
            alignment=Qt.AlignLeft | Qt.AlignVCenter,
            sizePolicy=QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Expanding)
        )
        self.bottom_labels = TextList(
            orientation=Qt.Horizontal,
            alignment=Qt.AlignRight | Qt.AlignVCenter,
            sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)
        )

        self.grid.addItem(self.right_labels, 2, 2)
        self.grid.addItem(self.bottom_labels, 3, 1)

        self.view.setCentralItem(self.grid_widget)

        self.gradient_legend.hide()
        self.left_dendrogram.hide()
        self.top_dendrogram.hide()
        self.right_labels.hide()
        self.bottom_labels.hide()

        self.matrix_item = None
        self.dendrogram = None

        self.settingsAboutToBePacked.connect(self.pack_settings)

    def pack_settings(self):
        if self.matrix_item is not None:
            self.pending_selection = self.matrix_item.selections()
        else:
            self.pending_selection = None

    @Inputs.distances
    def set_distances(self, matrix):
        self.closeContext()
        self.clear()
        self.error()
        if matrix is not None:
            N, _ = matrix.shape
            if N < 2:
                self.error("Empty distance matrix.")
                matrix = None

        self.matrix = matrix
        if matrix is not None:
            self._matrix_range = numpy.nanmax(matrix)
            self.set_items(matrix.row_items, matrix.axis)
        else:
            self._matrix_range = 0.
            self.set_items(None)

        if matrix is not None:
            N, _ = matrix.shape
        else:
            N = 0

        model = self.sorting_cb.model()
        item = model.item(2)

        msg = None
        if N > OWDistanceMap._MaxOrderedClustering:
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            if self.sorting == OWDistanceMap.OrderedClustering:
                self.sorting = OWDistanceMap.Clustering
                msg = "Cluster ordering was disabled due to the input " \
                      "matrix being to big"
        else:
            item.setFlags(item.flags() | Qt.ItemIsEnabled)

        item = model.item(1)
        if N > OWDistanceMap._MaxClustering:
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            if self.sorting == OWDistanceMap.Clustering:
                self.sorting = OWDistanceMap.NoOrdering
            msg = "Clustering was disabled due to the input " \
                  "matrix being to big"
        else:
            item.setFlags(item.flags() | Qt.ItemIsEnabled)

        self.information(msg)

    def set_items(self, items, axis=1):
        self.items = items
        model = self.annot_combo.model()
        if items is None:
            model[:] = ["无", "枚举"]
        elif not axis:
            model[:] = ["无", "枚举", "Attribute names"]
        elif isinstance(items, Orange.data.Table):
            annot_vars = list(filter_visible(items.domain.variables)) + list(items.domain.metas)
            model[:] = ["无", "枚举"] + annot_vars
            self.annotation_idx = 0
            self.openContext(items.domain)
        elif isinstance(items, list) and \
                all(isinstance(item, Orange.data.Variable) for item in items):
            model[:] = ["无", "枚举", "Name"]
        else:
            model[:] = ["无", "枚举"]
        self.annotation_idx = min(self.annotation_idx, len(model) - 1)

    def clear(self):
        self.matrix = None
        self._tree = None
        self._ordered_tree = None
        self._sorted_matrix = None
        self._selection = []
        self._clear_plot()

    def handleNewSignals(self):
        if self.matrix is not None:
            self._update_ordering()
            self._setup_scene()
            self._update_labels()
            if self.pending_selection is not None:
                self.matrix_item.set_selections(self.pending_selection)
                self.pending_selection = None
        self.commit.now()

    def _clear_plot(self):
        def remove(item):
            item.setParentItem(None)
            item.scene().removeItem(item)

        if self.matrix_item is not None:
            self.matrix_item.selectionChanged.disconnect(
                self._invalidate_selection)
            remove(self.matrix_item)
            self.matrix_item = None

        self._set_displayed_dendrogram(None)
        self._set_labels(None)
        self.gradient_legend.hide()

    def _cluster_tree(self):
        if self._tree is None:
            self._tree = hierarchical.dist_matrix_clustering(self.matrix)
        return self._tree

    def _ordered_cluster_tree(self):
        if self._ordered_tree is None:
            tree = self._cluster_tree()
            self._ordered_tree = \
                hierarchical.optimal_leaf_ordering(tree, self.matrix)
        return self._ordered_tree

    def _setup_scene(self):
        self._clear_plot()
        self.matrix_item = DistanceMapItem(self._sorted_matrix)
        # Scale the y axis to compensate for pg.ViewBox's y axis invert
        self.matrix_item.setTransform(QTransform.fromScale(1, -1), )
        self.viewbox.addItem(self.matrix_item)
        # Set fixed view box range.
        h, w = self._sorted_matrix.shape
        self.viewbox.setRange(QRectF(0, -h, w, h), padding=0)

        self.matrix_item.selectionChanged.connect(self._invalidate_selection)

        if self.sorting == OWDistanceMap.NoOrdering:
            tree = None
        elif self.sorting == OWDistanceMap.Clustering:
            tree = self._cluster_tree()
        elif self.sorting == OWDistanceMap.OrderedClustering:
            tree = self._ordered_cluster_tree()

        self._set_displayed_dendrogram(tree)

        self._update_color()

    def _set_displayed_dendrogram(self, root):
        self.left_dendrogram.set_root(root)
        self.top_dendrogram.set_root(root)
        self.left_dendrogram.setVisible(root is not None)
        self.top_dendrogram.setVisible(root is not None)

        constraint = 0 if root is None else -1  # 150
        self.left_dendrogram.setMaximumWidth(constraint)
        self.top_dendrogram.setMaximumHeight(constraint)

    def _invalidate_ordering(self):
        self._sorted_matrix = None
        if self.matrix is not None:
            self._update_ordering()
            self._setup_scene()
            self._update_labels()
            self._invalidate_selection()

    def _update_ordering(self):
        if self.sorting == OWDistanceMap.NoOrdering:
            self._sorted_matrix = self.matrix
            self._sort_indices = None
        else:
            if self.sorting == OWDistanceMap.Clustering:
                tree = self._cluster_tree()
            elif self.sorting == OWDistanceMap.OrderedClustering:
                tree = self._ordered_cluster_tree()

            leaves = hierarchical.leaves(tree)
            indices = numpy.array([leaf.value.index for leaf in leaves])
            X = self.matrix
            self._sorted_matrix = X[indices[:, numpy.newaxis],
                                    indices[numpy.newaxis, :]]
            self._sort_indices = indices

    def _invalidate_annotations(self):
        if self.matrix is not None:
            self._update_labels()

    def _update_labels(self, ):
        if self.annotation_idx == 0:  # None
            labels = None
        elif self.annotation_idx == 1:  # Enumeration
            labels = [str(i + 1) for i in range(self.matrix.shape[0])]
        elif self.annot_combo.model()[self.annotation_idx] == "Attribute names":
            attr = self.matrix.row_items.domain.attributes
            labels = [str(attr[i]) for i in range(self.matrix.shape[0])]
        elif self.annotation_idx == 2 and \
                isinstance(self.items, widget.AttributeList):
            labels = [v.name for v in self.items]
        elif isinstance(self.items, Orange.data.Table):
            var = self.annot_combo.model()[self.annotation_idx]
            column, _ = self.items.get_column_view(var)
            labels = [var.str_val(value) for value in column]

        self._set_labels(labels)

    def _set_labels(self, labels):
        self._labels = labels

        if labels and self.sorting != OWDistanceMap.NoOrdering:
            sortind = self._sort_indices
            labels = [labels[i] for i in sortind]

        for textlist in [self.right_labels, self.bottom_labels]:
            textlist.setItems(labels or [])
            textlist.setVisible(bool(labels))

        constraint = -1 if labels else 0
        self.right_labels.setMaximumWidth(constraint)
        self.bottom_labels.setMaximumHeight(constraint)

    def _color_map(self) -> GradientColorMap:
        palette = self.color_map_widget.currentData()
        return GradientColorMap(
            palette.lookup_table(),
            thresholds=(self.color_low, max(self.color_high, self.color_low)),
            span=(0., self._matrix_range))

    def _update_color(self):
        palette = self.color_map_widget.currentData()
        self.palette_name = palette.name
        if self.matrix_item:
            cmap = self._color_map().replace(span=(0., 1.))
            colors = cmap.apply(numpy.arange(256) / 255.)
            self.matrix_item.setLookupTable(colors)
            self.gradient_legend.show()
            self.gradient_legend.setRange(0, self._matrix_range)
            self.gradient_legend.setColorMap(self._color_map())

    def _invalidate_selection(self):
        ranges = self.matrix_item.selections()
        ranges = reduce(iadd, ranges, [])
        indices = reduce(iadd, ranges, [])
        if self.sorting != OWDistanceMap.NoOrdering:
            sortind = self._sort_indices
            indices = [sortind[i] for i in indices]
        self._selection = list(sorted(set(indices)))
        self.commit.deferred()

    @gui.deferred
    def commit(self):
        datasubset = None
        featuresubset = None

        if not self._selection:
            pass
        elif isinstance(self.items, Orange.data.Table):
            indices = self._selection
            if self.matrix.axis == 1:
                datasubset = self.items.from_table_rows(self.items, indices)
            elif self.matrix.axis == 0:
                domain = Orange.data.Domain(
                    [self.items.domain[i] for i in indices],
                    self.items.domain.class_vars,
                    self.items.domain.metas)
                datasubset = self.items.transform(domain)
        elif isinstance(self.items, widget.AttributeList):
            subset = [self.items[i] for i in self._selection]
            featuresubset = widget.AttributeList(subset)

        self.Outputs.selected_data.send(datasubset)
        self.Outputs.annotated_data.send(create_annotated_table(self.items, self._selection))
        self.Outputs.features.send(featuresubset)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        self.clear()

    def send_report(self):
        annot = self.annot_combo.currentText()
        if self.annotation_idx <= 1:
            annot = annot.lower()
        self.report_items((
            ("Sorting", self.sorting_cb.currentText().lower()),
            ("Annotations", annot)
        ))
        if self.matrix is not None:
            self.report_plot()