def test_editlinksnode(self):
        from ...registry.tests import small_testing_registry

        reg = small_testing_registry()
        file_desc = reg.widget("Orange.widgets.data.owfile.OWFile")
        bayes_desc = reg.widget("Orange.widgets.classify.ownaivebayes."
                                "OWNaiveBayes")
        source_node = SchemeNode(file_desc, title="This is File")
        sink_node = SchemeNode(bayes_desc)

        scene = QGraphicsScene()
        view = QGraphicsView(scene)

        node = EditLinksNode(node=source_node)
        scene.addItem(node)

        node = EditLinksNode(direction=Qt.RightToLeft)
        node.setSchemeNode(sink_node)

        node.setPos(300, 0)
        scene.addItem(node)

        view.show()
        view.resize(800, 300)
        self.app.exec_()
Exemplo n.º 2
0
 def test_other_exporter(self):
     sc = QGraphicsScene()
     sc.addItem(QGraphicsRectItem(0, 0, 3, 3))
     with patch("orangewidget.io.ImgFormat._get_exporter", Mock()) as mfn:
         with self.assertRaises(Exception):
             imgio.ImgFormat.write("", sc)
         self.assertEqual(0, mfn.call_count)
Exemplo n.º 3
0
    def test_editlinksnode(self):
        from ...registry.tests import small_testing_registry

        reg = small_testing_registry()
        file_desc = reg.widget("Orange.widgets.data.owfile.OWFile")
        bayes_desc = reg.widget("Orange.widgets.classify.ownaivebayes."
                                "OWNaiveBayes")
        source_node = SchemeNode(file_desc, title="This is File")
        sink_node = SchemeNode(bayes_desc)

        scene = QGraphicsScene()
        view = QGraphicsView(scene)

        node = EditLinksNode(node=source_node)
        scene.addItem(node)

        node = EditLinksNode(direction=Qt.RightToLeft)
        node.setSchemeNode(sink_node)

        node.setPos(300, 0)
        scene.addItem(node)

        view.show()
        view.resize(800, 300)
        self.app.exec_()
Exemplo n.º 4
0
class IconWidget(QWidget):
    def __init__(self):
        super().__init__()
        self.setLayout(QHBoxLayout())
        self.layout().setContentsMargins(0, 0, 0, 0)
        self.setFixedSize(50, 50)

        view = QGraphicsView()
        self.layout().addWidget(view)
        self.scene = QGraphicsScene(view)
        view.setScene(self.scene)

    def set_widget(self, widget_description, category_description):
        node = NodeItem(widget_description)
        if category_description is not None:
            node.setWidgetCategory(category_description)
        self.scene.addItem(node)

    def render_as_png(self, filename):
        png = self.__transparent_png()
        painter = QtGui.QPainter(png)
        painter.setRenderHint(QtGui.QPainter.Antialiasing, 1)
        self.scene.render(painter, QRectF(0, 0, 50, 50),
                          QRectF(-25, -25, 50, 50))
        painter.end()
        png.save(filename)

    def __transparent_png(self):
        # PyQt is stupid and does not increment reference count on bg
        self.__bg = bg = np.zeros((50, 50, 4), dtype=np.ubyte)
        return QImage(bg.ctypes.data, bg.shape[1], bg.shape[0],
                      QtGui.QImage.Format_ARGB32)
class TestGraphicsPixmapWidget(GuiTest):
    def setUp(self) -> None:
        super().setUp()
        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)

    def tearDown(self) -> None:
        self.scene.clear()
        self.scene.deleteLater()
        self.view.deleteLater()
        del self.scene
        del self.view

    def test_graphicspixmapwidget(self):
        w = GraphicsPixmapWidget()
        self.scene.addItem(w)
        w.setPixmap(QPixmap(100, 100))
        p = w.pixmap()
        self.assertEqual(p.size(), QSize(100, 100))
        self.view.grab()
        w.setScaleContents(True)
        w.setAspectRatioMode(Qt.KeepAspectRatio)
        s = w.sizeHint(Qt.PreferredSize)
        self.assertEqual(s, QSizeF(100., 100.))
        s = w.sizeHint(Qt.PreferredSize, QSizeF(200., -1.))
        self.assertEqual(s, QSizeF(200., 200.))
        s = w.sizeHint(Qt.PreferredSize, QSizeF(-1., 200.))
        self.assertEqual(s, QSizeF(200., 200.))
        self.view.grab()
class IconWidget(QWidget):
    def __init__(self):
        super().__init__()
        self.setLayout(QHBoxLayout())
        self.layout().setContentsMargins(0, 0, 0, 0)
        self.setFixedSize(50, 50)

        view = QGraphicsView()
        self.layout().addWidget(view)
        self.scene = QGraphicsScene(view)
        view.setScene(self.scene)

    def set_widget(self, widget_description, category_description):
        node = NodeItem(widget_description)
        if category_description is not None:
            node.setWidgetCategory(category_description)
        self.scene.addItem(node)

    def render_as_png(self, filename):
        img = QImage(50, 50, QImage.Format_ARGB32)
        img.fill(Qt.transparent)
        painter = QPainter(img)
        painter.setRenderHint(QPainter.Antialiasing, 1)
        self.scene.render(painter, QRectF(0, 0, 50, 50),
                          QRectF(-25, -25, 50, 50))
        painter.end()
        img.save(filename)
    def paint(self, painter, option, index):
        # type: (QPainter, QStyleOptionViewItem, QModelIndex) -> None
        data = index.data(FeatureStatisticsTableModel.DistributionRole)
        if data is None:
            return super().paint(painter, option, index)

        row = index.model().mapToSourceRows(index.row())

        if row not in self.__cache:
            scene = QGraphicsScene(self)
            attribute, data = data
            histogram = Histogram(
                data=data,
                variable=attribute,
                color_attribute=self.color_attribute,
                border=(0, 0, 2, 0),
                border_color='#ccc',
            )
            scene.addItem(histogram)
            self.__cache[row] = scene

        painter.setRenderHint(QPainter.HighQualityAntialiasing)

        if option.state & QStyle.State_Selected:
            background_color = option.palette.highlight()
        else:
            background_color = index.data(Qt.BackgroundRole)
        self.__cache[row].setBackgroundBrush(background_color)

        self.__cache[row].render(
            painter,
            target=QRectF(option.rect),
            mode=Qt.IgnoreAspectRatio,
        )
Exemplo n.º 8
0
def main(argv=None):  # pragma: no cover
    # pylint: disable=import-outside-toplevel
    import sys
    from AnyQt.QtWidgets import QGraphicsScene, QMenu
    from AnyQt.QtGui import QBrush
    app = QApplication(argv or sys.argv)
    scene = QGraphicsScene()
    view = GraphicsWidgetView(scene)
    scene.setParent(view)
    view.setContextMenuPolicy(Qt.CustomContextMenu)

    def context(pos):
        menu = QMenu(view)
        menu.addActions(view.actions())
        a = menu.addAction("Aspect mode")
        am = QMenu(menu)
        am.addAction("Ignore", lambda: view.setAspectMode(Qt.IgnoreAspectRatio))
        am.addAction("Keep", lambda: view.setAspectMode(Qt.KeepAspectRatio))
        am.addAction("Keep by expanding", lambda: view.setAspectMode(Qt.KeepAspectRatioByExpanding))
        a.setMenu(am)
        menu.popup(view.viewport().mapToGlobal(pos))

    view.customContextMenuRequested.connect(context)

    w = QGraphicsWidget()
    w.setPreferredSize(500, 500)
    palette = w.palette()
    palette.setBrush(palette.Window, QBrush(Qt.red, Qt.BDiagPattern))
    w.setPalette(palette)
    w.setAutoFillBackground(True)
    scene.addItem(w)
    view.setCentralWidget(w)
    view.show()
    return app.exec()
Exemplo n.º 9
0
    def data(self, index, role=Qt.DisplayRole):
        # type: (QModelIndex, Qt.QDisplayRole) -> Any
        if not index.isValid():
            return None

        idx = index.row()

        if role == Qt.SizeHintRole:
            return self.item_scale * QSize(100, 100)

        if role == Qt.DisplayRole:
            if 'tree' not in self._other_data[idx]:
                scene = QGraphicsScene(parent=self)
                tree = PythagorasTreeViewer(
                    adapter=self._list[idx],
                    weight_adjustment=OWPythagoreanForest.SIZE_CALCULATION[
                        self.size_calc_idx][1],
                    interactive=False,
                    padding=100,
                    depth_limit=self.depth_limit,
                    target_class_index=self.target_class_idx,
                )
                scene.addItem(tree)
                self._other_data[idx]['scene'] = scene
                self._other_data[idx]['tree'] = tree

            return self._other_data[idx]['scene']

        return super().data(index, role)
Exemplo n.º 10
0
    def data(self, index, role=Qt.DisplayRole):
        # type: (QModelIndex, Qt.QDisplayRole) -> Any
        if not index.isValid():
            return None

        idx = index.row()

        if role == Qt.SizeHintRole:
            return self.item_scale * QSize(100, 100)

        if role == Qt.DisplayRole:
            if 'tree' not in self._other_data[idx]:
                scene = QGraphicsScene(parent=self)
                tree = PythagorasTreeViewer(
                    adapter=self._list[idx],
                    weight_adjustment=OWPythagoreanForest.SIZE_CALCULATION[
                        self.size_calc_idx][1],
                    interactive=False,
                    padding=100,
                    depth_limit=self.depth_limit,
                    target_class_index=self.target_class_idx,
                )
                scene.addItem(tree)
                self._other_data[idx]['scene'] = scene
                self._other_data[idx]['tree'] = tree

            return self._other_data[idx]['scene']

        return super().data(index, role)
Exemplo n.º 11
0
        def display():
            # pylint: disable=too-many-branches
            def format_zeros(str_val):
                """Zeros should be handled separately as they cannot be negative."""
                if float(str_val) == 0:
                    num_decimals = min(self.variables[row].number_of_decimals,
                                       2)
                    str_val = f"{0:.{num_decimals}f}"
                return str_val

            def render_value(value):
                if np.isnan(value):
                    return ""
                if np.isinf(value):
                    return "∞"

                str_val = attribute.str_val(value)
                if attribute.is_continuous and not attribute.is_time:
                    str_val = format_zeros(str_val)

                return str_val

            if column == self.Columns.NAME:
                return attribute.name
            elif column == self.Columns.DISTRIBUTION:
                if isinstance(attribute,
                              (DiscreteVariable, ContinuousVariable)):
                    if row not in self.__distributions_cache:
                        scene = QGraphicsScene(parent=self)
                        histogram = Histogram(
                            data=self.table,
                            variable=attribute,
                            color_attribute=self.target_var,
                            border=(0, 0, 2, 0),
                            border_color='#ccc',
                        )
                        scene.addItem(histogram)
                        self.__distributions_cache[row] = scene
                    return self.__distributions_cache[row]
            elif column == self.Columns.CENTER:
                return render_value(self._center[row])
            elif column == self.Columns.MEDIAN:
                return render_value(self._median[row])
            elif column == self.Columns.DISPERSION:
                if isinstance(attribute, TimeVariable):
                    return format_time_diff(self._min[row], self._max[row])
                elif isinstance(attribute, DiscreteVariable):
                    return "%.3g" % self._dispersion[row]
                else:
                    return render_value(self._dispersion[row])
            elif column == self.Columns.MIN:
                if not isinstance(attribute, DiscreteVariable):
                    return render_value(self._min[row])
            elif column == self.Columns.MAX:
                if not isinstance(attribute, DiscreteVariable):
                    return render_value(self._max[row])
            elif column == self.Columns.MISSING:
                return '%d (%d%%)' % (self._missing[row], 100 *
                                      self._missing[row] / self.n_instances)
            return None
Exemplo n.º 12
0
 def test_other_exporter(self):
     sc = QGraphicsScene()
     sc.addItem(QGraphicsRectItem(0, 0, 3, 3))
     with patch("Orange.widgets.io.ImgFormat._get_exporter",
                Mock()) as mfn:
         with self.assertRaises(Exception):
             imgio.ImgFormat.write("", sc)
         self.assertEqual(0, mfn.call_count)
Exemplo n.º 13
0
 def test_pdf(self):
     sc = QGraphicsScene()
     sc.addItem(QGraphicsRectItem(0, 0, 20, 20))
     fd, fname = tempfile.mkstemp()
     os.close(fd)
     try:
         imgio.PdfFormat.write_image(fname, sc)
     finally:
         os.unlink(fname)
Exemplo n.º 14
0
 def test_other(self):
     fd, fname = tempfile.mkstemp('.pdf')
     os.close(fd)
     sc = QGraphicsScene()
     sc.addItem(QGraphicsRectItem(0, 0, 3, 3))
     try:
         imgio.PdfFormat.write(fname, sc)
         with open(fname, "rb") as f:
             self.assertTrue(f.read().startswith(b'%PDF'))
     finally:
         os.unlink(fname)
Exemplo n.º 15
0
    def test_graphicstextwidget(self):
        scene = QGraphicsScene()
        view = QGraphicsView(scene)

        text = GraphicsTextWidget()
        text.setHtml("<center><b>a text</b></center><p>paragraph</p>")
        scene.addItem(text)
        view.show()
        view.resize(400, 300)

        self.app.exec_()
Exemplo n.º 16
0
 def test_other(self):
     fd, fname = tempfile.mkstemp('.png')
     os.close(fd)
     sc = QGraphicsScene()
     sc.addItem(QGraphicsRectItem(0, 0, 3, 3))
     try:
         imgio.PngFormat.write(fname, sc)
         im = QImage(fname)
         # writer adds 15*2 of empty space
         self.assertEqual((30+4, 30+4), (im.width(), im.height()))
     finally:
         os.unlink(fname)
class TestDendrogramWidget(GuiTest):
    def setUp(self) -> None:
        super().setUp()
        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.widget = DendrogramWidget()
        self.scene.addItem(self.widget)

    def tearDown(self) -> None:
        self.scene.clear()
        del self.widget
        del self.view
        super().tearDown()

    def test_widget(self):
        w = self.widget

        T = hierarchical.Tree
        C = hierarchical.ClusterData
        S = hierarchical.SingletonData

        def t(h: float, left: T, right: T):
            return T(C((left.value.first, right.value.last), h), (left, right))

        def leaf(r, index):
            return T(S((r, r + 1), 0.0, index))

        T = hierarchical.Tree

        w.set_root(t(0.0, leaf(0, 0), leaf(1, 1)))
        w.resize(w.effectiveSizeHint(Qt.PreferredSize))
        h = w.height_at(QPoint())
        self.assertEqual(h, 0)
        h = w.height_at(QPoint(10, 0))
        self.assertEqual(h, 0)

        self.assertEqual(w.pos_at_height(0).x(), w.rect().x())
        self.assertEqual(w.pos_at_height(1).x(), w.rect().x())

        height = np.finfo(float).eps
        w.set_root(t(height, leaf(0, 0), leaf(1, 1)))

        h = w.height_at(QPoint())
        self.assertEqual(h, height)
        h = w.height_at(QPoint(w.size().width(), 0))
        self.assertEqual(h, 0)

        self.assertEqual(w.pos_at_height(0).x(), w.rect().right())
        self.assertEqual(w.pos_at_height(height).x(), w.rect().left())
Exemplo n.º 18
0
    def __init__(self, master, *args):
        QGraphicsView.__init__(self, *args)
        self.master = master

        self.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
        self.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)

        self.setRenderHints(QPainter.Antialiasing)
        scene = QGraphicsScene(self)
        self.pixmapGraphicsItem = QGraphicsPixmapItem(None)
        scene.addItem(self.pixmapGraphicsItem)
        self.setScene(scene)

        self.setMouseTracking(True)
        self.viewport().setMouseTracking(True)

        self.setFocusPolicy(Qt.WheelFocus)
    def __init__(self, master, *args):
        QGraphicsView.__init__(self, *args)
        self.master = master

        self.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
        self.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)

        self.setRenderHints(QPainter.Antialiasing)
        scene = QGraphicsScene(self)
        self.pixmapGraphicsItem = QGraphicsPixmapItem(None)
        scene.addItem(self.pixmapGraphicsItem)
        self.setScene(scene)

        self.setMouseTracking(True)
        self.viewport().setMouseTracking(True)

        self.setFocusPolicy(Qt.WheelFocus)
Exemplo n.º 20
0
        def display():
            # pylint: disable=too-many-branches
            def render_value(value):
                return "" if np.isnan(value) else attribute.str_val(value)

            if column == self.Columns.NAME:
                return attribute.name
            elif column == self.Columns.DISTRIBUTION:
                if isinstance(attribute,
                              (DiscreteVariable, ContinuousVariable)):
                    if row not in self.__distributions_cache:
                        scene = QGraphicsScene(parent=self)
                        histogram = Histogram(
                            data=self.table,
                            variable=attribute,
                            color_attribute=self.target_var,
                            border=(0, 0, 2, 0),
                            border_color='#ccc',
                        )
                        scene.addItem(histogram)
                        self.__distributions_cache[row] = scene
                    return self.__distributions_cache[row]
            elif column == self.Columns.CENTER:
                return render_value(self._center[row])
            elif column == self.Columns.DISPERSION:
                if isinstance(attribute, TimeVariable):
                    return format_time_diff(self._min[row], self._max[row])
                elif isinstance(attribute, DiscreteVariable):
                    return "%.3g" % self._dispersion[row]
                else:
                    return render_value(self._dispersion[row])
            elif column == self.Columns.MIN:
                if not isinstance(attribute, DiscreteVariable) \
                        or attribute.ordered:
                    return render_value(self._min[row])
            elif column == self.Columns.MAX:
                if not isinstance(attribute, DiscreteVariable) \
                        or attribute.ordered:
                    return render_value(self._max[row])
            elif column == self.Columns.MISSING:
                return '%d (%d%%)' % (
                    self._missing[row],
                    100 * self._missing[row] / self.n_instances
                )
            return None
Exemplo n.º 21
0
def render_drop_shadow_frame(pixmap, shadow_rect, shadow_color, offset, radius,
                             rect_fill_color):
    pixmap.fill(QColor(0, 0, 0, 0))
    scene = QGraphicsScene()
    rect = QGraphicsRectItem(shadow_rect)
    rect.setBrush(QColor(rect_fill_color))
    rect.setPen(QPen(Qt.NoPen))
    scene.addItem(rect)
    effect = QGraphicsDropShadowEffect(color=shadow_color,
                                       blurRadius=radius,
                                       offset=offset)

    rect.setGraphicsEffect(effect)
    scene.setSceneRect(QRectF(QPointF(0, 0), QSizeF(pixmap.size())))
    painter = QPainter(pixmap)
    scene.render(painter)
    painter.end()
    scene.clear()
    scene.deleteLater()
    return pixmap
Exemplo n.º 22
0
def render_drop_shadow_frame(pixmap, shadow_rect, shadow_color,
                             offset, radius, rect_fill_color):
    pixmap.fill(QColor(0, 0, 0, 0))
    scene = QGraphicsScene()
    rect = QGraphicsRectItem(shadow_rect)
    rect.setBrush(QColor(rect_fill_color))
    rect.setPen(QPen(Qt.NoPen))
    scene.addItem(rect)
    effect = QGraphicsDropShadowEffect(color=shadow_color,
                                       blurRadius=radius,
                                       offset=offset)

    rect.setGraphicsEffect(effect)
    scene.setSceneRect(QRectF(QPointF(0, 0), QSizeF(pixmap.size())))
    painter = QPainter(pixmap)
    scene.render(painter)
    painter.end()
    scene.clear()
    scene.deleteLater()
    return pixmap
Exemplo n.º 23
0
def save_widget_icon(
        background,
        icon: str,
        outname: str,
        export_size=QSize(100, 100),
        format="png",
):
    # create fake windget and category descriptions
    widget_description = WidgetDescription("",
                                           "",
                                           icon=icon,
                                           qualified_name="orangecanvas")
    category = CategoryDescription(background=background)
    item = NodeItem(widget_description)
    item.setWidgetCategory(category)
    iconItem = item.icon_item
    shapeItem = item.shapeItem

    shapeSize = export_size
    iconSize = QSize(export_size.width() * 3 / 4, export_size.height() * 3 / 4)

    rect = QRectF(QPointF(-shapeSize.width() / 2, -shapeSize.height() / 2),
                  QSizeF(shapeSize))
    shapeItem.setShapeRect(rect)
    iconItem.setIconSize(iconSize)
    iconItem.setPos(-iconSize.width() / 2, -iconSize.height() / 2)

    image = QImage(export_size, QImage.Format_ARGB32)
    image.fill(QColor("#00FFFFFF"))
    painter = QPainter(image)

    painter.setRenderHint(QPainter.Antialiasing, 1)

    scene = QGraphicsScene()
    scene.addItem(shapeItem)

    scene.render(painter, QRectF(image.rect()), scene.sceneRect())
    painter.end()

    if not image.save(outname, format, 80):
        print("Failed to save " + outname)
Exemplo n.º 24
0
    def test_editlinksnode(self):
        reg = small_testing_registry()
        one_desc = reg.widget("one")
        negate_desc = reg.widget("negate")
        source_node = SchemeNode(one_desc, title="This is 1")
        sink_node = SchemeNode(negate_desc)

        scene = QGraphicsScene()
        view = QGraphicsView(scene)

        node = EditLinksNode(node=source_node)
        scene.addItem(node)

        node = EditLinksNode(direction=Qt.RightToLeft)
        node.setSchemeNode(sink_node)

        node.setPos(300, 0)
        scene.addItem(node)

        view.show()
        view.resize(800, 300)
        self.app.exec_()
Exemplo n.º 25
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the (displayed) silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    add_scores = settings.Setting(False)
    auto_commit = settings.Setting(True)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan),
                 ("Cosine", Orange.distance.Cosine)]

    graph_name = "scene"
    buttons_area_orientation = Qt.Vertical

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")
        singleton_clusters_all = Msg("All clusters are singletons")
        memory_error = Msg("Not enough memory")
        value_error = Msg("Distances could not be computed: '{}'")

    class Warning(widget.OWWidget.Warning):
        missing_cluster_assignment = Msg(
            "{} instance{s} omitted (missing cluster assignment)")
        nan_distances = Msg("{} instance{s} omitted (undefined distances)")
        ignoring_categorical = Msg("Ignoring categorical features")

    def __init__(self):
        super().__init__()
        #: The input data
        self.data = None         # type: Optional[Orange.data.Table]
        #: Distance matrix computed from data
        self._matrix = None      # type: Optional[Orange.misc.DistMatrix]
        #: An bool mask (size == len(data)) indicating missing group/cluster
        #: assignments
        self._mask = None        # type: Optional[np.ndarray]
        #: An array of cluster/group labels for instances with valid group
        #: assignment
        self._labels = None      # type: Optional[np.ndarray]
        #: An array of silhouette scores for instances with valid group
        #: assignment
        self._silhouette = None  # type: Optional[np.ndarray]
        self._silplot = None     # type: Optional[SilhouettePlot]

        gui.comboBox(
            self.controlArea, self, "distance_idx", box="Distance",
            items=[name for name, _ in OWSilhouettePlot.Distances],
            orientation=Qt.Horizontal, callback=self._invalidate_distances)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(
            box, self, "cluster_var_idx", contentsLength=14, addSpace=4,
            callback=self._invalidate_scores
        )
        gui.checkBox(
            box, self, "group_by_cluster", "Group by cluster",
            callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(
            box, self, "bar_size", minValue=1, maxValue=10, step=1,
            callback=self._update_bar_size, addSpace=6)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(
            box, self, "annotation_var_idx", contentsLength=14,
            callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.separator(self.buttonsArea)
        box = gui.vBox(self.buttonsArea, "Output")
        # Thunk the call to commit to call conditional commit
        gui.checkBox(box, self, "add_scores", "Add silhouette scores",
                     callback=lambda: self.commit())
        gui.auto_commit(
            box, self, "auto_commit", "Commit",
            auto_label="Auto commit", box=False)
        # Ensure that the controlArea is not narrower than buttonsArea
        self.controlArea.layout().addWidget(self.buttonsArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """
        Set the input dataset.
        """
        self.closeContext()
        self.clear()
        error_msg = ""
        warning_msg = ""
        candidatevars = []
        if data is not None:
            candidatevars = [
                v for v in data.domain.variables + data.domain.metas
                if v.is_discrete and len(v.values) >= 2]
            if not candidatevars:
                error_msg = "Input does not have any suitable labels."
                data = None

        self.data = data
        if data is not None:
            self.cluster_var_model[:] = candidatevars
            if data.domain.class_var in candidatevars:
                self.cluster_var_idx = \
                    candidatevars.index(data.domain.class_var)
            else:
                self.cluster_var_idx = 0

            annotvars = [var for var in data.domain.metas if var.is_string]
            self.annotation_var_model[:] = ["None"] + annotvars
            self.annotation_var_idx = 1 if len(annotvars) else 0
            self.openContext(Orange.data.Domain(candidatevars))

        self.error(error_msg)
        self.warning(warning_msg)

    def handleNewSignals(self):
        if self.data is not None:
            self._update()
            self._replot()

        self.unconditional_commit()

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self._matrix = None
        self._mask = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()
        self.Error.clear()
        self.Warning.clear()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = self._mask = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit()

    def _update(self):
        # Update/recompute the distances/scores as required
        self._clear_messages()

        if self.data is None or not len(self.data):
            self._reset_all()
            return

        if self._matrix is None and self.data is not None:
            _, metric = self.Distances[self.distance_idx]
            data = self.data
            if not metric.supports_discrete and any(
                    a.is_discrete for a in data.domain.attributes):
                self.Warning.ignoring_categorical()
                data = Orange.distance.remove_discrete_features(data)
            try:
                self._matrix = np.asarray(metric(data))
            except MemoryError:
                self.Error.memory_error()
                return
            except ValueError as err:
                self.Error.value_error(str(err))
                return

        self._update_labels()

    def _reset_all(self):
        self._mask = None
        self._silhouette = None
        self._labels = None
        self._matrix = None
        self._clear_scene()

    def _clear_messages(self):
        self.Error.clear()
        self.Warning.clear()

    def _update_labels(self):
        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = np.asarray(labels, dtype=float)
        cluster_mask = np.isnan(labels)
        dist_mask = np.isnan(self._matrix).all(axis=0)
        mask = cluster_mask | dist_mask
        labels = labels.astype(int)
        labels = labels[~mask]

        labels_unq, _ = np.unique(labels, return_counts=True)

        if len(labels_unq) < 2:
            self.Error.need_two_clusters()
            labels = silhouette = mask = None
        elif len(labels_unq) == len(labels):
            self.Error.singleton_clusters_all()
            labels = silhouette = mask = None
        else:
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix[~mask, :][:, ~mask], labels, metric="precomputed")
        self._mask = mask
        self._labels = labels
        self._silhouette = silhouette

        if mask is not None:
            count_missing = np.count_nonzero(cluster_mask)
            if count_missing:
                self.Warning.missing_cluster_assignment(
                    count_missing, s="s" if count_missing > 1 else "")
            count_nandist = np.count_nonzero(dist_mask)
            if count_nandist:
                self.Warning.nan_distances(
                    count_nandist, s="s" if count_nandist > 1 else "")

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(
            not visible and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values,
                                  var.colors)
            else:
                silplot.setScores(
                    self._silhouette,
                    np.zeros(len(self._silhouette), dtype=int),
                    [""], np.array([[63, 207, 207]])
                )

            self.scene.addItem(silplot)
            self._update_annotations()
            silplot.selectionChanged.connect(self.commit)
            silplot.layout().activate()
            self._update_scene_rect()
            silplot.geometryChanged.connect(self._update_scene_rect)

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(
            self.bar_size < 5 and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                if self._mask is not None:
                    assert column.shape == self._mask.shape
                    # pylint: disable=invalid-unary-operand-type
                    column = column[~self._mask]
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def _update_scene_rect(self):
        self.scene.setSceneRect(self._silplot.geometry())

    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = np.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                assert (np.diff(indices) > 0).all(), "strictly increasing"
                if self._mask is not None:
                    # pylint: disable=invalid-unary-operand-type
                    indices = np.flatnonzero(~self._mask)[indices]
                selectedmask[indices] = True

            if self._mask is not None:
                scores = np.full(shape=selectedmask.shape,
                                 fill_value=np.nan)
                # pylint: disable=invalid-unary-operand-type
                scores[~self._mask] = self._silhouette
            else:
                scores = self._silhouette

            silhouette_var = None
            if self.add_scores:
                var = self.cluster_var_model[self.cluster_var_idx]
                silhouette_var = Orange.data.ContinuousVariable(
                    "Silhouette ({})".format(escape(var.name)))
                domain = Orange.data.Domain(
                    self.data.domain.attributes,
                    self.data.domain.class_vars,
                    self.data.domain.metas + (silhouette_var, ))
                data = self.data.transform(domain)
            else:
                domain = self.data.domain
                data = self.data

            if np.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, np.flatnonzero(selectedmask))

            if self.add_scores:
                if selected is not None:
                    selected[:, silhouette_var] = np.c_[scores[selectedmask]]
                data[:, silhouette_var] = np.c_[scores]

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Exemplo n.º 26
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `attr_changed`.

    - `attr_changed` handles changes of attribute or grouping (callbacks for
    list boxes). It recomputes box data by calling `compute_box_data`, shows
    the appropriate display box (discrete/continuous) and then calls
    `layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100
    inputs = [("Data", Orange.data.Table, "set_data")]

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()

    attribute = ContextSetting(None)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMedians)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "", CompareMedians: "median", CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted,
                _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        common_options = dict(
            callback=self.attr_changed, sizeHint=(200, 100))
        self.attrs = VariableListModel()
        gui.listView(
            self.controlArea, self, "attribute", box="Variable",
            model=self.attrs, **common_options)
        self.group_vars = VariableListModel()
        gui.listView(
            self.controlArea, self, "group_var", box="Grouping",
            model=self.group_vars, **common_options)

        # TODO: move Compare median/mean to grouping box
        self.display_box = gui.vBox(self.controlArea, "Display")

        gui.checkBox(self.display_box, self, "show_annotations", "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box, self, 'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.display_changed)

        self.stretching_box = gui.checkBox(
            self.controlArea, self, 'stretched', "Stretch bars", box='Display',
            callback=self.display_changed).box

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing |
                                     QPainter.TextAntialiasing |
                                     QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(650)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    # noinspection PyTypeChecker
    def set_data(self, dataset):
        if dataset is not None and (
                not bool(dataset) or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars[:] = \
                [None] + \
                [a for a in chain(domain.variables, domain.metas)
                 if a.is_discrete]
            self.attrs[:] = chain(domain.variables,
                                  (a for a in domain.metas if a.is_primitive()))
            if self.attrs:
                self.attribute = self.attrs[0]
            if domain.class_var and domain.class_var.is_discrete:
                self.group_var = domain.class_var
            else:
                self.group_var = None  # Reset to trigger selection via callback
            self.openContext(self.dataset)
            self.attr_changed()
        else:
            self.reset_all_data()

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs[:] = []
        self.group_vars[:] = []
        self.is_continuous = False
        self.update_display_box()

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        if dataset is None:
            self.stats = self.dist = self.conts = []
            return
        self.is_continuous = attr.is_continuous
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(
                dataset, attr, self.group_var)
            if self.is_continuous:
                self.stats = [BoxData(cont) for cont in self.conts]
            self.label_txts_all = self.group_var.values
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist)]
            self.label_txts_all = [""]
        self.label_txts = [txts for stat, txts in zip(self.stats,
                                                      self.label_txts_all)
                           if stat.n > 0]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()

    def clear_scene(self):
        self.box_scene.clear()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.mean_labels = [self.mean_label(stat, attr, lab)
                            for stat, lab in zip(self.stats, self.label_txts)]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [self.label_group(stat, attr, mean_lab)
                       for stat, mean_lab in zip(self.stats, self.mean_labels)]
        self.attr_labels = [QGraphicsSimpleTextItem(lab)
                            for lab in self.label_txts]
        for it in chain(self.labels, self.boxes, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            return self.display_changed_disc()

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            self.order = sorted(
                self.order, key=lambda i: getattr(self.stats[i], criterion))

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            self.boxes[box_index].setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width, len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()

    def display_changed_disc(self):
        self.clear_scene()
        self.attr_labels = [QGraphicsSimpleTextItem(lab)
                            for lab in self.label_txts_all]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts]
            else:
                self.labels = [
                    QGraphicsTextItem(str(int(sum(self.dist))))]

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = [self.strudel(cont) for cont in self.conts]
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box in enumerate(self.boxes):
            y = (-len(self.boxes) + row) * 40 + 10

            label = self.attr_labels[row]
            b = label.boundingRect()
            label.setPos(-b.width() - 10, y - b.height() / 2)
            self.box_scene.addItem(label)
            if not self.stretched:
                label = self.labels[row]
                b = label.boundingRect()
                if self.group_var:
                    right = self.scale_x * sum(self.conts[row])
                else:
                    right = self.scale_x * sum(self.dist)
                label.setPos(right + 10, y - b.height() / 2)
                self.box_scene.addItem(label)

            if self.attribute is not self.group_var:
                for text_item, bar_part in zip(box.childItems()[1::2],
                                               box.childItems()[::2]):
                    label = QGraphicsSimpleTextItem(
                        text_item.toPlainText())
                    label.setPos(bar_part.boundingRect().x(),
                                 y - label.boundingRect().height() - 8)
                    self.box_scene.addItem(label)
            for text_item in box.childItems()[1::2]:
                box.removeFromGroup(text_item)
            self.box_scene.addItem(box)
            box.setPos(0, y)
        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width, len(self.boxes * 40) + 90)
        self.infot1.setText("")

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            pooled_var = d1.var / d1.n + d2.var / d2.n
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg) ** 2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]))]
        mean_labels = self.mean_labels or [self.mean_label(stats[0], self.attribute, "")]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)

            t = self.box_scene.addSimpleText(
                self.attribute.repr_val(val) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(
                t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(
            bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        if self.stretched:
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(sum(dist) * scale_x + 10 +
                            lbl.boundingRect().width()
                            for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        msc = stat.median * self.scale_x
        med_t = centered_text(stat.median, msc)
        med_box_width2 = med_t.boundingRect().width()
        line(msc)

        x = stat.q25 * self.scale_x
        t = centered_text(stat.q25, x)
        t_box = t.boundingRect()
        med_left = msc - med_box_width2
        if x + t_box.width() / 2 >= med_left - 5:
            move_label(t, x, med_left - t_box.width() - 5)
        else:
            line(x)

        x = stat.q75 * self.scale_x
        t = centered_text(stat.q75, x)
        t_box = t.boundingRect()
        med_right = msc + med_box_width2
        if x - t_box.width() / 2 <= med_right + 5:
            move_label(t, x, med_right + 5)
        else:
            line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = QGraphicsItemGroup()
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5, box)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5, box)
        vert_line = line(stat.a_min, 0, stat.a_max, 0, box)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3, box)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0, box)
        var_line.setPen(self._pen_paramet)

        mbox = QGraphicsRectItem(stat.q25 * scale_x, -height / 2,
                                 (stat.q75 - stat.q25) * scale_x, height,
                                 box)
        mbox.setBrush(self._box_brush)
        mbox.setPen(QPen(Qt.NoPen))
        mbox.setZValue(-200)

        median_line = line(stat.median, -height / 2,
                           stat.median, height / 2, box)
        median_line.setPen(self._pen_median)
        median_line.setZValue(-150)

        return box

    def strudel(self, dist):
        attr = self.attribute
        ss = np.sum(dist)
        box = QGraphicsItemGroup()
        if ss < 1e-6:
            QGraphicsRectItem(0, -10, 1, 10, box)
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            rect = QGraphicsRectItem(cum + 1, -6, v - 2, 12, box)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.addToGroup(text)
            cum += v
        return box

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line) * self.scale_x
            xs.append(x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if last_to == to or frm == to:
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = - 6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        if self.attribute:
            return self.attribute.name

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)
Exemplo n.º 27
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    def __init__(self):
        super().__init__()
        self.model = None
        self.forest_adapter = None
        self.instances = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []
        # In some rare cases, we need to prevent commiting, the only one
        # that this currently helps is that when changing the size calculation
        # the trees are all recomputed, but we don't want to output a new tree
        # to keep things consistent with other ui controls.
        self.__prevent_commit = False

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x + 1)),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info)

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(
            box_display, self, 'depth_limit', label='Depth', ticks=False,
            callback=self.update_depth)
        self.ui_target_class_combo = gui.comboBox(
            box_display, self, 'target_class_index', label='Target class',
            orientation=Qt.Horizontal, items=[], contentsLength=8,
            callback=self.update_colors)
        self.ui_size_calc_combo = gui.comboBox(
            box_display, self, 'size_calc_idx', label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8,
            callback=self.update_size_calc)
        self.ui_zoom_slider = gui.hSlider(
            box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20,
            maxValue=150, callback=self.zoom_changed, createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self._draw_trees()
            self.color_palette = self.forest_adapter.get_trees()[0]

            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and self.instances.domain != model.domain:
                self.clf_dataset = self.instances.transform(self.model.domain)
            else:
                self.clf_dataset = self.instances

            self._update_info_box()
            self._update_target_class_combo()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    def update_depth(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def update_colors(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_changed(self.target_class_index)

    def update_size_calc(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            with self._prevent_commit():
                self.grid.clear()
                self._draw_trees()
                # Keep the selected item
                if self.selected_tree_index != -1:
                    self.grid_items[self.selected_tree_index].setSelected(True)
                self.update_depth()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    @contextmanager
    def _prevent_commit(self):
        try:
            self.__prevent_commit = True
            yield
        finally:
            self.__prevent_commit = False

    def _update_info_box(self):
        self.ui_info.setText('Trees: {}'.format(len(self.forest_adapter.get_trees())))

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    def _get_max_depth(self):
        return max(tree.tree_adapter.max_depth for tree in self.ptrees)

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    @contextmanager
    def disable_ui(self):
        """Temporarly disable the UI while trees may be redrawn."""
        try:
            self.ui_size_calc_combo.setEnabled(False)
            self.ui_depth_slider.setEnabled(False)
            self.ui_target_class_combo.setEnabled(False)
            self.ui_zoom_slider.setEnabled(False)
            yield
        finally:
            self.ui_size_calc_combo.setEnabled(True)
            self.ui_depth_slider.setEnabled(True)
            self.ui_target_class_combo.setEnabled(True)
            self.ui_zoom_slider.setEnabled(True)

    def _draw_trees(self):
        self.grid_items, self.ptrees = [], []

        num_trees = len(self.forest_adapter.get_trees())
        with self.progressBar(num_trees) as prg, self.disable_ui():
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None, tree, interactive=False, padding=100,
                    target_class_index=self.target_class_index,
                    weight_adjustment=self.SIZE_CALCULATION[self.size_calc_idx][1]
                )
                grid_item = GridItem(
                    ptree, self.grid, max_size=self._calculate_zoom(self.zoom)
                )
                # We don't want to show flickering while the trees are being
                grid_item.setVisible(False)

                self.grid_items.append(grid_item)
                self.ptrees.append(ptree)
                prg.advance()

            self.grid.set_items(self.grid_items)
            # This is necessary when adding items for the first time
            if self.grid:
                width = (self.view.width() - self.view.verticalScrollBar().width())
                self.grid.reflow(width)
                self.grid.setPreferredWidth(width)
                # After drawing is complete, we show the trees
                for grid_item in self.grid_items:
                    grid_item.setVisible(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if self.__prevent_commit:
            return

        if not self.scene.selectedItems():
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        tree = self.model.trees[self.selected_tree_index]
        tree.instances = self.instances
        tree.meta_target_class_index = self.target_class_index
        tree.meta_size_calc_idx = self.size_calc_idx
        tree.meta_depth_limit = self.depth_limit

        self.send('Tree', tree)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [x for x in self.ui_target_class_combo.parent().children()
                 if isinstance(x, QLabel)][0]

        if self.instances.domain.has_discrete_class:
            label_text = 'Target class'
            values = [c.title() for c in self.instances.domain.class_vars[0].values]
            values.insert(0, 'None')
        else:
            label_text = 'Node color'
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.ui_target_class_combo.addItems(values)
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)
Exemplo n.º 28
0
class OWMosaicDisplay(OWWidget):
    name = "Mosaic Display"
    description = "Display data in a mosaic plot."
    icon = "icons/MosaicDisplay.svg"
    priority = 220
    keywords = []

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    vizrank = SettingProvider(MosaicVizRank)
    settings_version = 2
    use_boxes = Setting(True)
    variable1 = ContextSetting(None)
    variable2 = ContextSetting(None)
    variable3 = ContextSetting(None)
    variable4 = ContextSetting(None)
    variable_color = ContextSetting(None)
    selection = ContextSetting(set())

    BAR_WIDTH = 5
    SPACING = 4
    ATTR_NAME_OFFSET = 20
    ATTR_VAL_OFFSET = 3
    BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255),
                   QColor(110, 110, 255), QColor(0, 0, 255)]
    RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200),
                  QColor(255, 100, 100), QColor(255, 0, 0)]
    graph_name = "canvas"

    attrs_changed_manually = Signal(list)

    class Warning(OWWidget.Warning):
        incompatible_subset = Msg("Data subset is incompatible with Data")
        no_valid_data = Msg("No valid data")
        no_cont_selection_sql = \
            Msg("Selection of numeric features on SQL is not supported")

    def __init__(self):
        super().__init__()

        self.data = None
        self.discrete_data = None
        self.subset_data = None
        self.subset_indices = None

        self.color_data = None

        self.areas = []

        self.canvas = QGraphicsScene()
        self.canvas_view = ViewWithPress(
            self.canvas, handler=self.clear_selection)
        self.mainArea.layout().addWidget(self.canvas_view)
        self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setRenderHint(QPainter.Antialiasing)

        box = gui.vBox(self.controlArea, box=True)
        self.model_1 = DomainModel(
            order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE)
        self.model_234 = DomainModel(
            order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE,
            placeholder="(None)")
        self.attr_combos = [
            gui.comboBox(
                box, self, value="variable{}".format(i),
                orientation=Qt.Horizontal, contentsLength=12,
                callback=self.attr_changed,
                model=self.model_1 if i == 1 else self.model_234)
            for i in range(1, 5)]
        self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank(
            box, self, "Find Informative Mosaics", self.set_attr)

        box2 = gui.vBox(self.controlArea, box="Interior Coloring")
        self.color_model = DomainModel(
            order=DomainModel.MIXED, valid_types=DomainModel.PRIMITIVE,
            placeholder="(Pearson residuals)")
        self.cb_attr_color = gui.comboBox(
            box2, self, value="variable_color",
            orientation=Qt.Horizontal, contentsLength=12, labelWidth=50,
            callback=self.set_color_data, model=self.color_model)
        self.bar_button = gui.checkBox(
            box2, self, 'use_boxes', label='Compare with total',
            callback=self.update_graph)
        gui.rubber(self.controlArea)

    def sizeHint(self):
        return QSize(720, 530)

    def _get_discrete_data(self, data):
        """
        Discretize continuous attributes.
        Return None when there is no data, no rows, or no primitive attributes.
        """
        if (data is None or
                not len(data) or
                not any(attr.is_discrete or attr.is_continuous
                        for attr in chain(data.domain.variables,
                                          data.domain.metas))):
            return None
        elif any(attr.is_continuous for attr in data.domain.variables):
            return Discretize(
                method=EqualFreq(n=4), remove_const=False, discretize_classes=True,
                discretize_metas=True)(data)
        else:
            return data

    def init_combos(self, data):
        def set_combos(value):
            self.model_1.set_domain(value)
            self.model_234.set_domain(value)
            self.color_model.set_domain(value)

        if data is None:
            set_combos(None)
            self.variable1 = self.variable2 = self.variable3 \
                = self.variable4 = self.variable_color = None
            return
        set_combos(self.data.domain)

        if len(self.model_1) > 0:
            self.variable1 = self.model_1[0]
            self.variable2 = self.model_1[min(1, len(self.model_1) - 1)]
        self.variable3 = self.variable4 = None
        self.variable_color = self.data.domain.class_var  # None is OK, too

    def get_disc_attr_list(self):
        return [self.discrete_data.domain[var.name]
                for var in (self.variable1, self.variable2,
                            self.variable3, self.variable4)
                if var]

    def set_attr(self, *attrs):
        self.variable1, self.variable2, self.variable3, self.variable4 = [
            attr and self.data.domain[attr.name] for attr in attrs]
        self.reset_graph()

    def attr_changed(self):
        self.attrs_changed_manually.emit(self.get_disc_attr_list())
        self.reset_graph()

    def resizeEvent(self, e):
        OWWidget.resizeEvent(self, e)
        self.update_graph()

    def showEvent(self, ev):
        OWWidget.showEvent(self, ev)
        self.update_graph()

    @Inputs.data
    def set_data(self, data):
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data

        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1
            and len(self.data.domain.attributes) >= 1)

        if self.data is None:
            self.discrete_data = None
            self.init_combos(None)
            return

        self.init_combos(self.data)
        self.openContext(self.data)

    @Inputs.data_subset
    def set_subset_data(self, data):
        self.subset_data = data

    # this is called by widget after setData and setSubsetData are called.
    # this way the graph is updated only once
    def handleNewSignals(self):
        self.Warning.incompatible_subset.clear()
        self.subset_indices = None
        if self.data is not None and self.subset_data:
            transformed = self.subset_data.transform(self.data.domain)
            if np.all(np.isnan(transformed.X)) \
                    and np.all(np.isnan(transformed.Y)):
                self.Warning.incompatible_subset()
            else:
                indices = {e.id for e in transformed}
                self.subset_indices = [ex.id in indices for ex in self.data]
        self.set_color_data()
        self.reset_graph()

    def clear_selection(self):
        self.selection = set()
        self.update_selection_rects()
        self.send_selection()

    def coloring_changed(self):
        self.vizrank.coloring_changed()
        self.update_graph()

    def reset_graph(self):
        self.clear_selection()
        self.update_graph()

    def set_color_data(self):
        if self.data is None:
            return
        self.bar_button.setEnabled(self.variable_color is not None)
        attrs = [v for v in self.model_1 if v and v is not self.variable_color]
        domain = Domain(attrs, self.variable_color, None)
        self.color_data = self.data.from_table(domain, self.data)
        self.discrete_data = self._get_discrete_data(self.color_data)
        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(True)
        self.coloring_changed()

    def update_selection_rects(self):
        pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine))
        for i, (_, _, area) in enumerate(self.areas):
            area.setPen(pens[i in self.selection])

    def select_area(self, index, ev):
        if ev.button() != Qt.LeftButton:
            return
        if ev.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection_rects()
        self.send_selection()

    def send_selection(self):
        if not self.selection or self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, _ = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))

    def send_report(self):
        self.report_plot(self.canvas)

    def update_graph(self):
        spacing = self.SPACING
        bar_width = self.BAR_WIDTH

        def get_counts(attr_vals, values):
            """Calculate rectangles' widths; if all are 0, they are set to 1."""
            if not attr_vals:
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [conditionaldict[attr_vals + "-" + val]
                          for val in values]
            total = sum(counts)
            if total == 0:
                counts = [1] * len(values)
                total = sum(counts)
            return total, counts

        def draw_data(attr_list, x0_x1, y0_y1, side, condition,
                      total_attrs, used_attrs, used_vals, attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0, x1, y0, y1, "",
                         used_attrs, used_vals, attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(attr)
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            total, counts = get_counts(attr_vals, values)

            # when visualizing the third attribute and the first attribute has
            # the last value, reverse the order in which the boxes are drawn;
            # otherwise, if the last cell, nearest to the labels of the fourth
            # attribute, is empty, we wouldn't be able to position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(used_attrs[0])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = to_html(val)
                newattrvals = attr_vals + "-" + val if attr_vals else val

                tooltip = "{}&nbsp;&nbsp;&nbsp;&nbsp;{}: <b>{}</b><br/>".format(
                    condition, attr.name, htmlval)
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args)
                    else:
                        draw_data(
                            attr_list[1:], (x0 + start, x0 + end), (y0, y1),
                            side + 1, tooltip, total_attrs, *args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args)
                    else:
                        draw_data(
                            attr_list[1:], (x0, x1), (y0 + start, y0 + end),
                            side + 1, tooltip, total_attrs, *args)
            draw_text(side, attr_list[0], (x0, x1), (y0, y1),
                      total_attrs, used_attrs, used_vals, attr_vals)

        def draw_text(side, attr, x0_x1, y0_y1,
                      total_attrs, used_attrs, used_vals, attr_vals):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if side in drawn_sides:
                return

            # the text on the right will be drawn when we are processing
            # visualization of the last value of the first attribute
            if side == 3:
                attr1values = get_variable_values_sorted(used_attrs[0])
                if used_vals[0] != attr1values[-1]:
                    return

            if not conditionaldict[attr_vals]:
                if side not in draw_positions:
                    draw_positions[side] = (x0, x1, y0, y1)
                return
            else:
                if side in draw_positions:
                    # restore the positions of attribute values and name
                    (x0, x1, y0, y1) = draw_positions[side]

            drawn_sides.add(side)

            values = get_variable_values_sorted(attr)
            if side % 2:
                values = values[::-1]

            spaces = spacing * (total_attrs - side) * (len(values) - 1)
            width = x1 - x0 - spaces * (side % 2 == 0)
            height = y1 - y0 - spaces * (side % 2 == 1)

            # calculate position of first attribute
            currpos = 0
            total, counts = get_counts(attr_vals, values)
            aligns = [Qt.AlignTop | Qt.AlignHCenter,
                      Qt.AlignRight | Qt.AlignVCenter,
                      Qt.AlignBottom | Qt.AlignHCenter,
                      Qt.AlignLeft | Qt.AlignVCenter]
            align = aligns[side]
            for i, val in enumerate(values):
                if distributiondict[val] != 0:
                    perc = counts[i] / float(total)
                    xs = [x0 + currpos + width * 0.5 * perc,
                          x0 - self.ATTR_VAL_OFFSET,
                          x0 + currpos + width * perc * 0.5,
                          x1 + self.ATTR_VAL_OFFSET]
                    ys = [y1 + self.ATTR_VAL_OFFSET,
                          y0 + currpos + height * 0.5 * perc,
                          y0 - self.ATTR_VAL_OFFSET,
                          y0 + currpos + height * 0.5 * perc]
                    CanvasText(self.canvas, val, xs[side], ys[side], align)
                    space = height if side % 2 else width
                    currpos += perc * space + spacing * (total_attrs - side)

            xs = [x0 + (x1 - x0) / 2,
                  x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET,
                  x0 + (x1 - x0) / 2,
                  x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET]
            ys = [y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET,
                  y0 + (y1 - y0) / 2,
                  y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET,
                  y0 + (y1 - y0) / 2]
            CanvasText(
                self.canvas, attr.name, xs[side], ys[side], align, bold=True,
                vertical=side % 2)

        def add_rect(x0, x1, y0, y1, condition,
                     used_attrs, used_vals, attr_vals=""):
            area_index = len(self.areas)
            x1 += (x0 == x1)
            y1 += (y0 == y1)
            # rectangles of width and height 1 are not shown - increase
            y1 += (x1 - x0 + y1 - y0 == 2)
            colors = class_var and [QColor(*col) for col in class_var.colors]

            def select_area(_, ev):
                self.select_area(area_index, ev)

            def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args):
                if pen_color is None:
                    return CanvasRectangle(
                        self.canvas, x, y, w, h, z=z, onclick=select_area,
                        **args)
                if brush_color is None:
                    brush_color = pen_color
                return CanvasRectangle(
                    self.canvas, x, y, w, h, pen_color, brush_color, z=z,
                    onclick=select_area, **args)

            def line(x1, y1, x2, y2):
                r = QGraphicsLineItem(x1, y1, x2, y2, None)
                self.canvas.addItem(r)
                r.setPen(QPen(Qt.white, 2))
                r.setZValue(30)

            outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30)
            self.areas.append((used_attrs, used_vals, outer_rect))
            if not conditionaldict[attr_vals]:
                return

            if self.variable_color is None:
                s = sum(apriori_dists[0])
                expected = s * reduce(
                    mul,
                    (apriori_dists[i][used_vals[i]] / float(s)
                     for i in range(len(used_vals))))
                actual = conditionaldict[attr_vals]
                pearson = float((actual - expected) / sqrt(expected))
                if pearson == 0:
                    ind = 0
                else:
                    ind = max(0, min(int(log(abs(pearson), 2)), 3))
                color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind]
                rect(x0, y0, x1 - x0, y1 - y0, -20, color)
                outer_rect.setToolTip(
                    condition + "<hr/>" +
                    "Expected instances: %.1f<br>"
                    "Actual instances: %d<br>"
                    "Standardized (Pearson) residual: %.1f" %
                    (expected, conditionaldict[attr_vals], pearson))
            else:
                cls_values = get_variable_values_sorted(class_var)
                prior = get_distribution(data, class_var.name)
                total = 0
                for i, value in enumerate(cls_values):
                    val = conditionaldict[attr_vals + "-" + value]
                    if val == 0:
                        continue
                    if i == len(cls_values) - 1:
                        v = y1 - y0 - total
                    else:
                        v = ((y1 - y0) * val) / conditionaldict[attr_vals]
                    rect(x0, y0 + total, x1 - x0, v, -20, colors[i])
                    total += v

                if self.use_boxes and \
                        abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width:
                    total = 0
                    line(x0 + bar_width, y0, x0 + bar_width, y1)
                    n = sum(prior)
                    for i, (val, color) in enumerate(zip(prior, colors)):
                        if i == len(prior) - 1:
                            h = y1 - y0 - total
                        else:
                            h = (y1 - y0) * val / n
                        rect(x0, y0 + total, bar_width, h, 20, color)
                        total += h

                if conditionalsubsetdict:
                    if conditionalsubsetdict[attr_vals]:
                        if self.subset_indices is not None:
                            line(x1 - bar_width, y0, x1 - bar_width, y1)
                            total = 0
                            n = conditionalsubsetdict[attr_vals]
                            if n:
                                for i, (cls, color) in \
                                        enumerate(zip(cls_values, colors)):
                                    val = conditionalsubsetdict[
                                        attr_vals + "-" + cls]
                                    if val == 0:
                                        continue
                                    if i == len(prior) - 1:
                                        v = y1 - y0 - total
                                    else:
                                        v = ((y1 - y0) * val) / n
                                    rect(x1 - bar_width, y0 + total,
                                         bar_width, v, 15, color)
                                    total += v

                actual = [conditionaldict[attr_vals + "-" + cls_values[i]]
                          for i in range(len(prior))]
                n_actual = sum(actual)
                if n_actual > 0:
                    apriori = [prior[key] for key in cls_values]
                    n_apriori = sum(apriori)
                    text = "<br/>".join(
                        "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" %
                        (cls, act, 100.0 * act / n_actual,
                         apr / n_apriori * n_actual, 100.0 * apr / n_apriori)
                        for cls, act, apr in zip(cls_values, actual, apriori))
                else:
                    text = ""
                outer_rect.setToolTip(
                    "{}<hr>Instances: {}<br><br>{}".format(
                        condition, n_actual, text[:-4]))

        def draw_legend(x0_x1, y0_y1):
            x0, x1 = x0_x1
            _, y1 = y0_y1
            if self.variable_color is None:
                names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8",
                         "Residuals:"]
                colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:]
            else:
                names = get_variable_values_sorted(class_var) + \
                        [class_var.name + ":"]
                colors = [QColor(*col) for col in class_var.colors]

            names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter)
                     for name in names]
            totalwidth = sum(text.boundingRect().width() for text in names)

            # compute the x position of the center of the legend
            y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35
            distance = 30
            startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2

            names[-1].setPos(startx + 15, y)
            names[-1].show()
            xoffset = names[-1].boundingRect().width() + distance

            size = 8
            for i in range(len(names) - 1):
                if self.variable_color is None:
                    edgecolor = Qt.black
                else:
                    edgecolor = colors[i]

                CanvasRectangle(self.canvas, startx + xoffset, y - size / 2,
                                size, size, edgecolor, colors[i])
                names[i].setPos(startx + xoffset + 10, y)
                xoffset += distance + names[i].boundingRect().width()

        self.canvas.clear()
        self.areas = []

        data = self.discrete_data
        if data is None:
            return
        attr_list = self.get_disc_attr_list()
        class_var = data.domain.class_var
        if class_var:
            sql = isinstance(data, SqlTable)
            name = not sql and data.name
            # save class_var because it is removed in the next line
            data = data[:, attr_list + [class_var]]
            data.domain.class_var = class_var
            if not sql:
                data.name = name
        else:
            data = data[:, attr_list]
        # TODO: check this
        # data = Preprocessor_dropMissing(data)
        if len(data) == 0:
            self.Warning.no_valid_data()
            return
        else:
            self.Warning.no_valid_data.clear()

        attrs = [attr for attr in attr_list if not attr.values]
        if attrs:
            CanvasText(self.canvas,
                       "Feature {} has no values".format(attrs[0]),
                       (self.canvas_view.width() - 120) / 2,
                       self.canvas_view.height() / 2)
            return
        if self.variable_color is None:
            apriori_dists = [get_distribution(data, attr) for attr in attr_list]
        else:
            apriori_dists = []

        def get_max_label_width(attr):
            values = get_variable_values_sorted(attr)
            maxw = 0
            for val in values:
                t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False)
                maxw = max(int(t.boundingRect().width()), maxw)
            return maxw

        # get the maximum width of rectangle
        xoff = 20
        width = 20
        max_ylabel_w1 = max_ylabel_w2 = 0
        if len(attr_list) > 1:
            text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0)
            max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150)
            width = 5 + text.boundingRect().height() + \
                self.ATTR_VAL_OFFSET + max_ylabel_w1
            xoff = width
            if len(attr_list) == 4:
                text = CanvasText(self.canvas, attr_list[3].name, bold=1, show=0)
                max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150)
                width += text.boundingRect().height() + \
                    self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10

        # get the maximum height of rectangle
        height = 100
        yoff = 45
        square_size = min(self.canvas_view.width() - width - 20,
                          self.canvas_view.height() - height - 20)

        if square_size < 0:
            return  # canvas is too small to draw rectangles
        self.canvas_view.setSceneRect(
            0, 0, self.canvas_view.width(), self.canvas_view.height())

        drawn_sides = set()
        draw_positions = {}

        conditionaldict, distributiondict = \
            get_conditional_distribution(data, attr_list)
        conditionalsubsetdict = None
        if self.subset_indices:
            conditionalsubsetdict, _ = get_conditional_distribution(
                self.discrete_data[self.subset_indices], attr_list)

        # draw rectangles
        draw_data(
            attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size),
            0, "", len(attr_list), [], [])
        draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size))
        self.update_selection_rects()

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            settings.migrate_str_to_variable(context, none_placeholder="(None)")
Exemplo n.º 29
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `grouping_changed`.

    - `grouping_changed` handles changes of grouping attribute: it enables or
    disables the box for ordering, orders attributes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute. It recomputes box data by
    calling `compute_box_data`, shows the appropriate display box
    (discrete/continuous) and then calls`layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100
    keywords = ["whisker"]

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    conditions = ContextSetting([])

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    sort_freqs = Setting(False)
    auto_commit = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "", CompareMedians: "median", CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted,
                _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        self.attrs = VariableListModel()
        view = gui.listView(
            self.controlArea, self, "attribute", box="Variable",
            model=self.attrs, callback=self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        gui.separator(view.box, 6, 6)
        self.cb_order = gui.checkBox(
            view.box, self, "order_by_importance",
            "Order by relevance",
            tooltip="Order by 𝜒² or ANOVA over the subgroups",
            callback=self.apply_sorting)
        self.group_vars = DomainModel(
            placeholder="None", separators=False,
            valid_types=Orange.data.DiscreteVariable)
        self.group_view = view = gui.listView(
            self.controlArea, self, "group_var", box="Subgroups",
            model=self.group_vars, callback=self.grouping_changed)
        view.setEnabled(False)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(
            self.controlArea, "Display",
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum),
            addSpace=False)

        gui.checkBox(self.display_box, self, "show_annotations", "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box, self, 'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.layout_changed)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(
            self.controlArea, box="Display",
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Fixed))
        self.stretching_box.sizeHint = self.display_box.sizeHint
        gui.checkBox(
            box, self, 'stretched', "Stretch bars",
            callback=self.display_changed)
        gui.checkBox(
            box, self, 'show_labels', "Show box labels",
            callback=self.display_changed)
        self.sort_cb = gui.checkBox(
            box, self, 'sort_freqs', "Sort by subgroup frequencies",
            callback=self.display_changed)
        gui.rubber(box)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_scene.selectionChanged.connect(self.commit)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing |
                                     QPainter.TextAntialiasing |
                                     QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(300)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def sizeHint(self):
        return QSize(900, 500)

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    def reset_attrs(self, domain):
        self.attrs[:] = [
            var for var in chain(
                domain.class_vars, domain.metas, domain.attributes)
            if var.is_primitive()]

    # noinspection PyTypeChecker
    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (
                not bool(dataset) or not len(dataset.domain) and not
                any(var.is_primitive() for var in dataset.domain.metas)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars.set_domain(domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.reset_attrs(domain)
            self.select_default_variables(domain)
            self.openContext(self.dataset)
            self.grouping_changed()
        else:
            self.reset_all_data()
        self.commit()

    def select_default_variables(self, domain):
        # visualize first non-class variable, group by class (if present)
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var
        else:
            self.group_var = None  # Reset to trigger selection via callback

    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(), f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs.clear()
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)
        self.is_continuous = False
        self.update_display_box()

    def grouping_changed(self):
        self.cb_order.setEnabled(self.group_var is not None)
        self.apply_sorting()
        self.attr_changed()

    def select_box_items(self):
        temp_cond = self.conditions.copy()
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(box.filter.conditions in
                                [c.conditions for c in temp_cond])

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        self.is_continuous = attr.is_continuous
        if dataset is None or not self.is_continuous and not attr.values or \
                        self.group_var and not self.group_var.values:
            self.stats = self.dist = self.conts = []
            return
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(
                dataset, attr, self.group_var)
            if self.is_continuous:
                stats, label_texts = [], []
                for i, cont in enumerate(self.conts):
                    if np.sum(cont[1]):
                        stats.append(BoxData(cont, attr, i, self.group_var))
                        label_texts.append(self.group_var.values[i])
                self.stats = stats
                self.label_txts_all = label_texts
            else:
                self.label_txts_all = \
                    [v for v, c in zip(self.group_var.values, self.conts)
                     if np.sum(c) > 0]
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist, attr, None)]
            self.label_txts_all = [""]
        self.label_txts = [txts for stat, txts in zip(self.stats,
                                                      self.label_txts_all)
                           if stat.n > 0]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()
            self.sort_cb.setEnabled(self.group_var is not None)

    def clear_scene(self):
        self.closeContext()
        self.box_scene.clearSelection()
        self.box_scene.clear()
        self.box_view.viewport().update()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []
        self.openContext(self.dataset)

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.mean_labels = [self.mean_label(stat, attr, lab)
                            for stat, lab in zip(self.stats, self.label_txts)]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [self.label_group(stat, attr, mean_lab)
                       for stat, mean_lab in zip(self.stats, self.mean_labels)]
        self.attr_labels = [QGraphicsSimpleTextItem(lab)
                            for lab in self.label_txts]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width, len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()
        self.select_box_items()

    def display_changed_disc(self):
        assert not self.is_continuous
        self.clear_scene()
        self.attr_labels = [QGraphicsSimpleTextItem(lab)
                            for lab in self.label_txts_all]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts if np.sum(cont) > 0]
            else:
                self.labels = [
                    QGraphicsTextItem(str(int(sum(self.dist))))]

        self.order = list(range(len(self.attr_labels)))

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = \
                [self.strudel(cont, i) for i, cont in enumerate(self.conts)
                 if np.sum(cont) > 0]
            self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0]

            if self.sort_freqs:
                # pylint: disable=invalid-unary-operand-type
                self.order = sorted(self.order, key=(-np.sum(self.conts, axis=1)).__getitem__)
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box_index in enumerate(self.order):
            y = (-len(self.boxes) + row) * 40 + 10
            box = self.boxes[box_index]
            bars, labels = box[::2], box[1::2]

            self.__draw_group_labels(y, box_index)
            if not self.stretched:
                self.__draw_row_counts(y, box_index)
            if self.show_labels and self.attribute is not self.group_var:
                self.__draw_bar_labels(y, bars, labels)
            self.__draw_bars(y, bars)

        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width, len(self.boxes * 40) + 90)
        self.infot1.setText("")
        self.select_box_items()

    def __draw_group_labels(self, y, row):
        """Draw group labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.attr_labels[row]
        b = label.boundingRect()
        label.setPos(-b.width() - 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_row_counts(self, y, row):
        """Draw row counts

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        assert not self.is_continuous
        label = self.labels[row]
        b = label.boundingRect()
        if self.group_var:
            right = self.scale_x * sum(self.conts[row])
        else:
            right = self.scale_x * sum(self.dist)
        label.setPos(right + 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_bar_labels(self, y, bars, labels):
        """Draw bar labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        bars: List[FilterGraphicsRectItem]
            list of bars being drawn
        labels: List[QGraphicsTextItem]
            list of labels for corresponding bars
        """
        label = bar_part = None
        for text_item, bar_part in zip(labels, bars):
            label = self.Label(
                text_item.toPlainText())
            label.setPos(bar_part.boundingRect().x(),
                         y - label.boundingRect().height() - 8)
            label.setMaxWidth(bar_part.boundingRect().width())
            self.box_scene.addItem(label)

    def __draw_bars(self, y, bars):
        """Draw bars

        Parameters
        ----------
        y: int
            vertical offset of bars

        bars: List[FilterGraphicsRectItem]
            list of bars to draw
        """
        for item in bars:
            item.setPos(0, y)
            self.box_scene.addItem(item)

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests

        # pylint: disable=comparison-with-itself
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n < 2 or d2.n < 2:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            # pylint: disable=comparison-with-itself
            if pooled_var == 0 or np.isnan(pooled_var):
                return np.nan, np.nan
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg) ** 2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            if var_within == 0 or df_within == 0 or df_between == 0:
                return np.nan, np.nan
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, p)
            else:
                t, p = stat_ttest()
                t = "" if np.isnan(t) else f"Student's t: {t:.3f} (p={p:.3f})"
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, p)
            else:
                F, p = stat_ANOVA()
                t = "" if np.isnan(F) else f"ANOVA: {F:.3f} (p={p:.3f})"
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [self.mean_label(stats[0], self.attribute, "")]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        decimals = max(3, 4 - int(math.log10(step)))
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(
                repr(round(val, decimals)) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(
                t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(
            bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        assert not self.is_continuous
        if self.stretched:
            if not self.attr_labels:
                return
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(sum(dist) * scale_x + 10 +
                            lbl.boundingRect().width()
                            for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width() / 2
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None and stat.q75 is not None:
            mbox = FilterGraphicsRectItem(
                stat.conditions, stat.q25 * scale_x, -height / 2,
                (stat.q75 - stat.q25) * scale_x, height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2,
                               stat.median, height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val_index=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = [FilterDiscrete(attr, None)]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            cond = [FilterDiscrete(attr, [i])]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def commit(self):
        self.conditions = [item.filter for item in
                           self.box_scene.selectedItems() if item.filter]
        selected, selection = None, []
        if self.conditions:
            selected = Values(self.conditions, conjunction=False)(self.dataset)
            selection = np.in1d(
                self.dataset.ids, selected.ids, assume_unique=True).nonzero()[0]
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if to in (last_to, frm):
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = - 6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        return self.attribute.name if self.attribute else None

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)

    class Label(QGraphicsSimpleTextItem):
        """Boxplot Label with settable maxWidth"""
        # Minimum width to display label text
        MIN_LABEL_WIDTH = 25

        # padding bellow the text
        PADDING = 3

        __max_width = None

        def maxWidth(self):
            return self.__max_width

        def setMaxWidth(self, max_width):
            self.__max_width = max_width

        def paint(self, painter, option, widget):
            """Overrides QGraphicsSimpleTextItem.paint

            If label text is too long, it is elided
            to fit into the allowed region
            """
            if self.__max_width is None:
                width = option.rect.width()
            else:
                width = self.__max_width

            if width < self.MIN_LABEL_WIDTH:
                # if space is too narrow, no label
                return

            fm = painter.fontMetrics()
            text = fm.elidedText(self.text(), Qt.ElideRight, width)
            painter.drawText(
                option.rect.x(),
                option.rect.y() + self.boundingRect().height() - self.PADDING,
                text)
Exemplo n.º 30
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default),
               (ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)]

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    add_scores = settings.Setting(False)
    auto_commit = settings.Setting(False)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan)]

    graph_name = "scene"
    buttons_area_orientation = Qt.Vertical

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")

    def __init__(self):
        super().__init__()

        self.data = None
        self._effective_data = None
        self._matrix = None
        self._silhouette = None
        self._labels = None
        self._silplot = None

        gui.comboBox(
            self.controlArea, self, "distance_idx", box="Distance",
            items=[name for name, _ in OWSilhouettePlot.Distances],
            orientation=Qt.Horizontal, callback=self._invalidate_distances)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(
            box, self, "cluster_var_idx", addSpace=4,
            callback=self._invalidate_scores)
        gui.checkBox(
            box, self, "group_by_cluster", "Group by cluster",
            callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(
            box, self, "bar_size", minValue=1, maxValue=10, step=1,
            callback=self._update_bar_size, addSpace=6)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(
            box, self, "annotation_var_idx", callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.separator(self.buttonsArea)
        box = gui.vBox(self.buttonsArea, "Output")
        # Thunk the call to commit to call conditional commit
        gui.checkBox(box, self, "add_scores", "Add silhouette scores",
                     callback=lambda: self.commit())
        gui.auto_commit(
            box, self, "auto_commit", "Commit",
            auto_label="Auto commit", box=False)
        # Ensure that the controlArea is not narrower than buttonsArea
        self.controlArea.layout().addWidget(self.buttonsArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    @check_sql_input
    def set_data(self, data):
        """
        Set the input data set.
        """
        self.closeContext()
        self.clear()
        error_msg = ""
        warning_msg = ""
        candidatevars = []
        if data is not None:
            candidatevars = [
                v for v in data.domain.variables + data.domain.metas
                if v.is_discrete and len(v.values) >= 2]
            if not candidatevars:
                error_msg = "Input does not have any suitable cluster labels."
                data = None

        if data is not None:
            ncont = sum(v.is_continuous for v in data.domain.attributes)
            ndiscrete = len(data.domain.attributes) - ncont
            if ncont == 0:
                data = None
                error_msg = "No continuous columns"
            elif ncont < len(data.domain.attributes):
                warning_msg = "{0} discrete columns will not be used for " \
                              "distance computation".format(ndiscrete)

        self.data = data
        if data is not None:
            self.cluster_var_model[:] = candidatevars
            if data.domain.class_var in candidatevars:
                self.cluster_var_idx = \
                    candidatevars.index(data.domain.class_var)
            else:
                self.cluster_var_idx = 0

            annotvars = [var for var in data.domain.metas if var.is_string]
            self.annotation_var_model[:] = ["None"] + annotvars
            self.annotation_var_idx = 1 if len(annotvars) else 0
            self._effective_data = Orange.distance._preprocess(data)
            self.openContext(Orange.data.Domain(candidatevars))

        self.error(error_msg)
        self.warning(warning_msg)

    def handleNewSignals(self):
        if self._effective_data is not None:
            self._update()
            self._replot()

        self.unconditional_commit()

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self._effective_data = None
        self._matrix = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit()

    def _update(self):
        # Update/recompute the distances/scores as required
        if self.data is None:
            self._silhouette = None
            self._labels = None
            self._matrix = None
            self._clear_scene()
            return

        if self._matrix is None and self._effective_data is not None:
            _, metric = self.Distances[self.distance_idx]
            self._matrix = numpy.asarray(metric(self._effective_data))

        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = labels.astype(int)
        _, counts = numpy.unique(labels, return_counts=True)
        if numpy.count_nonzero(counts) >= 2:
            self.Error.need_two_clusters.clear()
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix, labels, metric="precomputed")
        else:
            self.Error.need_two_clusters()
            labels = silhouette = None

        self._labels = labels
        self._silhouette = silhouette

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(
            not visible and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values)
            else:
                silplot.setScores(
                    self._silhouette,
                    numpy.zeros(len(self._silhouette), dtype=int),
                    [""]
                )

            self.scene.addItem(silplot)
            self._update_annotations()

            silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize))
            silplot.selectionChanged.connect(self.commit)

            self.scene.setSceneRect(
                QRectF(QPointF(0, 0),
                       self._silplot.effectiveSizeHint(Qt.PreferredSize)))

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()
            self.scene.setSceneRect(
                QRectF(QPointF(0, 0),
                       self._silplot.effectiveSizeHint(Qt.PreferredSize)))

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(
            self.bar_size < 5 and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = numpy.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                selectedmask[indices] = True
            scores = self._silhouette
            silhouette_var = None
            if self.add_scores:
                var = self.cluster_var_model[self.cluster_var_idx]
                silhouette_var = Orange.data.ContinuousVariable(
                    "Silhouette ({})".format(escape(var.name)))
                domain = Orange.data.Domain(
                    self.data.domain.attributes,
                    self.data.domain.class_vars,
                    self.data.domain.metas + (silhouette_var, ))
                data = self.data.from_table(
                    domain, self.data)
            else:
                domain = self.data.domain
                data = self.data

            if numpy.count_nonzero(selectedmask):
                selected = self.data.from_table(
                    domain, self.data, numpy.flatnonzero(selectedmask))

            if self.add_scores:
                if selected is not None:
                    selected[:, silhouette_var] = numpy.c_[scores[selectedmask]]
                data[:, silhouette_var] = numpy.c_[scores]

        self.send("Selected Data", selected)
        self.send(ANNOTATED_DATA_SIGNAL_NAME,
                  create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Exemplo n.º 31
0
class OWSilhouettePlot(widget.OWWidget):
    name = "Silhouette Plot"
    description = "Visually assess cluster quality and " \
                  "the degree of cluster membership."

    icon = "icons/SilhouettePlot.svg"
    priority = 300
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    replaces = [
        "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot",
        "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot"
    ]

    settingsHandler = settings.PerfectDomainContextHandler()

    #: Distance metric index
    distance_idx = settings.Setting(0)
    #: Group/cluster variable index
    cluster_var_idx = settings.ContextSetting(0)
    #: Annotation variable index
    annotation_var_idx = settings.ContextSetting(0)
    #: Group the (displayed) silhouettes by cluster
    group_by_cluster = settings.Setting(True)
    #: A fixed size for an instance bar
    bar_size = settings.Setting(3)
    #: Add silhouette scores to output data
    add_scores = settings.Setting(False)
    auto_commit = settings.Setting(True)

    Distances = [("Euclidean", Orange.distance.Euclidean),
                 ("Manhattan", Orange.distance.Manhattan)]

    graph_name = "scene"
    buttons_area_orientation = Qt.Vertical

    class Error(widget.OWWidget.Error):
        need_two_clusters = Msg("Need at least two non-empty clusters")
        singleton_clusters_all = Msg("All clusters are singletons")
        memory_error = Msg("Not enough memory")
        value_error = Msg("Distances could not be computed: '{}'")

    class Warning(widget.OWWidget.Warning):
        missing_cluster_assignment = Msg(
            "{} instance{s} omitted (missing cluster assignment)")

    def __init__(self):
        super().__init__()
        #: The input data
        self.data = None  # type: Optional[Orange.data.Table]
        #: Distance matrix computed from data
        self._matrix = None  # type: Optional[Orange.misc.DistMatrix]
        #: An bool mask (size == len(data)) indicating missing group/cluster
        #: assignments
        self._mask = None  # type: Optional[np.ndarray]
        #: An array of cluster/group labels for instances with valid group
        #: assignment
        self._labels = None  # type: Optional[np.ndarray]
        #: An array of silhouette scores for instances with valid group
        #: assignment
        self._silhouette = None  # type: Optional[np.ndarray]
        self._silplot = None  # type: Optional[SilhouettePlot]

        gui.comboBox(self.controlArea,
                     self,
                     "distance_idx",
                     box="Distance",
                     items=[name for name, _ in OWSilhouettePlot.Distances],
                     orientation=Qt.Horizontal,
                     callback=self._invalidate_distances)

        box = gui.vBox(self.controlArea, "Cluster Label")
        self.cluster_var_cb = gui.comboBox(box,
                                           self,
                                           "cluster_var_idx",
                                           contentsLength=14,
                                           addSpace=4,
                                           callback=self._invalidate_scores)
        gui.checkBox(box,
                     self,
                     "group_by_cluster",
                     "Group by cluster",
                     callback=self._replot)
        self.cluster_var_model = itemmodels.VariableListModel(parent=self)
        self.cluster_var_cb.setModel(self.cluster_var_model)

        box = gui.vBox(self.controlArea, "Bars")
        gui.widgetLabel(box, "Bar width:")
        gui.hSlider(box,
                    self,
                    "bar_size",
                    minValue=1,
                    maxValue=10,
                    step=1,
                    callback=self._update_bar_size,
                    addSpace=6)
        gui.widgetLabel(box, "Annotations:")
        self.annotation_cb = gui.comboBox(box,
                                          self,
                                          "annotation_var_idx",
                                          contentsLength=14,
                                          callback=self._update_annotations)
        self.annotation_var_model = itemmodels.VariableListModel(parent=self)
        self.annotation_var_model[:] = ["None"]
        self.annotation_cb.setModel(self.annotation_var_model)
        ibox = gui.indentedBox(box, 5)
        self.ann_hidden_warning = warning = gui.widgetLabel(
            ibox, "(increase the width to show)")
        ibox.setFixedWidth(ibox.sizeHint().width())
        warning.setVisible(False)

        gui.rubber(self.controlArea)

        gui.separator(self.buttonsArea)
        box = gui.vBox(self.buttonsArea, "Output")
        # Thunk the call to commit to call conditional commit
        gui.checkBox(box,
                     self,
                     "add_scores",
                     "Add silhouette scores",
                     callback=lambda: self.commit())
        gui.auto_commit(box,
                        self,
                        "auto_commit",
                        "Commit",
                        auto_label="Auto commit",
                        box=False)
        # Ensure that the controlArea is not narrower than buttonsArea
        self.controlArea.layout().addWidget(self.buttonsArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def sizeHint(self):
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(600, 720))

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """
        Set the input dataset.
        """
        self.closeContext()
        self.clear()
        error_msg = ""
        warning_msg = ""
        candidatevars = []
        if data is not None:
            candidatevars = [
                v for v in data.domain.variables + data.domain.metas
                if v.is_discrete and len(v.values) >= 2
            ]
            if not candidatevars:
                error_msg = "Input does not have any suitable labels."
                data = None

        self.data = data
        if data is not None:
            self.cluster_var_model[:] = candidatevars
            if data.domain.class_var in candidatevars:
                self.cluster_var_idx = \
                    candidatevars.index(data.domain.class_var)
            else:
                self.cluster_var_idx = 0

            annotvars = [var for var in data.domain.metas if var.is_string]
            self.annotation_var_model[:] = ["None"] + annotvars
            self.annotation_var_idx = 1 if len(annotvars) else 0
            self.openContext(Orange.data.Domain(candidatevars))

        self.error(error_msg)
        self.warning(warning_msg)

    def handleNewSignals(self):
        if self.data is not None:
            self._update()
            self._replot()

        self.unconditional_commit()

    def clear(self):
        """
        Clear the widget state.
        """
        self.data = None
        self._matrix = None
        self._mask = None
        self._silhouette = None
        self._labels = None
        self.cluster_var_model[:] = []
        self.annotation_var_model[:] = ["None"]
        self._clear_scene()
        self.Error.clear()
        self.Warning.clear()

    def _clear_scene(self):
        # Clear the graphics scene and associated objects
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self._silplot = None

    def _invalidate_distances(self):
        # Invalidate the computed distance matrix and recompute the silhouette.
        self._matrix = None
        self._invalidate_scores()

    def _invalidate_scores(self):
        # Invalidate and recompute the current silhouette scores.
        self._labels = self._silhouette = self._mask = None
        self._update()
        self._replot()
        if self.data is not None:
            self.commit()

    def _update(self):
        # Update/recompute the distances/scores as required
        self._clear_messages()

        if self.data is None or not len(self.data):
            self._reset_all()
            return

        if self._matrix is None and self.data is not None:
            _, metric = self.Distances[self.distance_idx]
            try:
                self._matrix = np.asarray(metric(self.data))
            except MemoryError:
                self.Error.memory_error()
                return
            except ValueError as err:
                self.Error.value_error(str(err))
                return

        self._update_labels()

    def _reset_all(self):
        self._mask = None
        self._silhouette = None
        self._labels = None
        self._matrix = None
        self._clear_scene()

    def _clear_messages(self):
        self.Error.clear()
        self.Warning.missing_cluster_assignment.clear()

    def _update_labels(self):
        labelvar = self.cluster_var_model[self.cluster_var_idx]
        labels, _ = self.data.get_column_view(labelvar)
        labels = np.asarray(labels, dtype=float)
        mask = np.isnan(labels)
        labels = labels.astype(int)
        labels = labels[~mask]

        labels_unq, _ = np.unique(labels, return_counts=True)

        if len(labels_unq) < 2:
            self.Error.need_two_clusters()
            labels = silhouette = mask = None
        elif len(labels_unq) == len(labels):
            self.Error.singleton_clusters_all()
            labels = silhouette = mask = None
        else:
            silhouette = sklearn.metrics.silhouette_samples(
                self._matrix[~mask, :][:, ~mask], labels, metric="precomputed")
        self._mask = mask
        self._labels = labels
        self._silhouette = silhouette

        if labels is not None:
            count_missing = np.count_nonzero(mask)
            if count_missing:
                self.Warning.missing_cluster_assignment(
                    count_missing, s="s" if count_missing > 1 else "")

    def _set_bar_height(self):
        visible = self.bar_size >= 5
        self._silplot.setBarHeight(self.bar_size)
        self._silplot.setRowNamesVisible(visible)
        self.ann_hidden_warning.setVisible(not visible
                                           and self.annotation_var_idx > 0)

    def _replot(self):
        # Clear and replot/initialize the scene
        self._clear_scene()
        if self._silhouette is not None and self._labels is not None:
            var = self.cluster_var_model[self.cluster_var_idx]
            self._silplot = silplot = SilhouettePlot()
            self._set_bar_height()

            if self.group_by_cluster:
                silplot.setScores(self._silhouette, self._labels, var.values,
                                  var.colors)
            else:
                silplot.setScores(self._silhouette,
                                  np.zeros(len(self._silhouette), dtype=int),
                                  [""], np.array([[63, 207, 207]]))

            self.scene.addItem(silplot)
            self._update_annotations()
            silplot.selectionChanged.connect(self.commit)
            silplot.layout().activate()
            self._update_scene_rect()
            silplot.geometryChanged.connect(self._update_scene_rect)

    def _update_bar_size(self):
        if self._silplot is not None:
            self._set_bar_height()

    def _update_annotations(self):
        if 0 < self.annotation_var_idx < len(self.annotation_var_model):
            annot_var = self.annotation_var_model[self.annotation_var_idx]
        else:
            annot_var = None
        self.ann_hidden_warning.setVisible(self.bar_size < 5
                                           and annot_var is not None)

        if self._silplot is not None:
            if annot_var is not None:
                column, _ = self.data.get_column_view(annot_var)
                if self._mask is not None:
                    assert column.shape == self._mask.shape
                    column = column[~self._mask]
                self._silplot.setRowNames(
                    [annot_var.str_val(value) for value in column])
            else:
                self._silplot.setRowNames(None)

    def _update_scene_rect(self):
        self.scene.setSceneRect(self._silplot.geometry())

    def commit(self):
        """
        Commit/send the current selection to the output.
        """
        selected = indices = data = None
        if self.data is not None:
            selectedmask = np.full(len(self.data), False, dtype=bool)
            if self._silplot is not None:
                indices = self._silplot.selection()
                assert (np.diff(indices) > 0).all(), "strictly increasing"
                if self._mask is not None:
                    indices = np.flatnonzero(~self._mask)[indices]
                selectedmask[indices] = True

            if self._mask is not None:
                scores = np.full(shape=selectedmask.shape, fill_value=np.nan)
                scores[~self._mask] = self._silhouette
            else:
                scores = self._silhouette

            silhouette_var = None
            if self.add_scores:
                var = self.cluster_var_model[self.cluster_var_idx]
                silhouette_var = Orange.data.ContinuousVariable(
                    "Silhouette ({})".format(escape(var.name)))
                domain = Orange.data.Domain(
                    self.data.domain.attributes, self.data.domain.class_vars,
                    self.data.domain.metas + (silhouette_var, ))
                data = self.data.transform(domain)
            else:
                domain = self.data.domain
                data = self.data

            if np.count_nonzero(selectedmask):
                selected = self.data.from_table(domain, self.data,
                                                np.flatnonzero(selectedmask))

            if self.add_scores:
                if selected is not None:
                    selected[:, silhouette_var] = np.c_[scores[selectedmask]]
                data[:, silhouette_var] = np.c_[scores]

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(create_annotated_table(data, indices))

    def send_report(self):
        if not len(self.cluster_var_model):
            return

        self.report_plot()
        caption = "Silhouette plot ({} distance), clustered by '{}'".format(
            self.Distances[self.distance_idx][0],
            self.cluster_var_model[self.cluster_var_idx])
        if self.annotation_var_idx and self._silplot.rowNamesVisible():
            caption += ", annotated with '{}'".format(
                self.annotation_var_model[self.annotation_var_idx])
        self.report_caption(caption)

    def onDeleteWidget(self):
        self.clear()
        super().onDeleteWidget()
Exemplo n.º 32
0
class OWMosaicDisplay(OWWidget):
    name = "Mosaic Display"
    description = "Display data in a mosaic plot."
    icon = "icons/MosaicDisplay.svg"
    priority = 220
    keywords = []

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    vizrank = SettingProvider(MosaicVizRank)
    settings_version = 2
    use_boxes = Setting(True)
    variable1: Variable = ContextSetting(None)
    variable2: Variable = ContextSetting(None)
    variable3: Variable = ContextSetting(None)
    variable4: Variable = ContextSetting(None)
    variable_color: DiscreteVariable = ContextSetting(None)
    selection = Setting(set(), schema_only=True)

    BAR_WIDTH = 5
    SPACING = 4
    ATTR_NAME_OFFSET = 20
    ATTR_VAL_OFFSET = 3
    BLUE_COLORS = [
        QColor(255, 255, 255),
        QColor(210, 210, 255),
        QColor(110, 110, 255),
        QColor(0, 0, 255)
    ]
    RED_COLORS = [
        QColor(255, 255, 255),
        QColor(255, 200, 200),
        QColor(255, 100, 100),
        QColor(255, 0, 0)
    ]
    graph_name = "canvas"

    attrs_changed_manually = Signal(list)

    class Warning(OWWidget.Warning):
        incompatible_subset = Msg("Data subset is incompatible with Data")
        no_valid_data = Msg("No valid data")
        no_cont_selection_sql = \
            Msg("Selection of numeric features on SQL is not supported")

    def __init__(self):
        super().__init__()

        self.data = None
        self.discrete_data = None
        self.subset_data = None
        self.subset_indices = None
        self.__pending_selection = self.selection
        self.selection = set()

        self.color_data = None

        self.areas = []

        self.canvas = QGraphicsScene(self)
        self.canvas_view = ViewWithPress(self.canvas,
                                         handler=self.clear_selection)
        self.mainArea.layout().addWidget(self.canvas_view)
        self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setRenderHint(QPainter.Antialiasing)

        box = gui.vBox(self.controlArea, box=True)
        self.model_1 = DomainModel(order=DomainModel.MIXED,
                                   valid_types=DomainModel.PRIMITIVE)
        self.model_234 = DomainModel(order=DomainModel.MIXED,
                                     valid_types=DomainModel.PRIMITIVE,
                                     placeholder="(None)")
        self.attr_combos = [
            gui.comboBox(box,
                         self,
                         value="variable{}".format(i),
                         orientation=Qt.Horizontal,
                         contentsLength=12,
                         searchable=True,
                         callback=self.attr_changed,
                         model=self.model_1 if i == 1 else self.model_234)
            for i in range(1, 5)
        ]
        self.vizrank, self.vizrank_button = MosaicVizRank.add_vizrank(
            box, self, "Find Informative Mosaics", self.set_attr)

        box2 = gui.vBox(self.controlArea, box="Interior Coloring")
        self.color_model = DomainModel(order=DomainModel.MIXED,
                                       valid_types=DomainModel.PRIMITIVE,
                                       placeholder="(Pearson residuals)")
        self.cb_attr_color = gui.comboBox(box2,
                                          self,
                                          value="variable_color",
                                          orientation=Qt.Horizontal,
                                          contentsLength=12,
                                          labelWidth=50,
                                          searchable=True,
                                          callback=self.set_color_data,
                                          model=self.color_model)
        self.bar_button = gui.checkBox(box2,
                                       self,
                                       'use_boxes',
                                       label='Compare with total',
                                       callback=self.update_graph)
        gui.rubber(self.controlArea)

    def sizeHint(self):
        return QSize(720, 530)

    def _get_discrete_data(self, data):
        """
        Discretize continuous attributes.
        Return None when there is no data, no rows, or no primitive attributes.
        """
        if (data is None or not len(data) or not any(
                attr.is_discrete or attr.is_continuous
                for attr in chain(data.domain.variables, data.domain.metas))):
            return None
        elif any(attr.is_continuous for attr in data.domain.variables):
            return Discretize(method=EqualFreq(n=4),
                              remove_const=False,
                              discretize_classes=True,
                              discretize_metas=True)(data)
        else:
            return data

    def init_combos(self, data):
        def set_combos(value):
            self.model_1.set_domain(value)
            self.model_234.set_domain(value)
            self.color_model.set_domain(value)

        if data is None:
            set_combos(None)
            self.variable1 = self.variable2 = self.variable3 \
                = self.variable4 = self.variable_color = None
            return
        set_combos(self.data.domain)

        if len(self.model_1) > 0:
            self.variable1 = self.model_1[0]
            self.variable2 = self.model_1[min(1, len(self.model_1) - 1)]
        self.variable3 = self.variable4 = None
        self.variable_color = self.data.domain.class_var  # None is OK, too

    def get_disc_attr_list(self):
        return [
            self.discrete_data.domain[var.name]
            for var in (self.variable1, self.variable2, self.variable3,
                        self.variable4) if var
        ]

    def set_attr(self, *attrs):
        self.variable1, self.variable2, self.variable3, self.variable4 = [
            attr and self.data.domain[attr.name] for attr in attrs
        ]
        self.reset_graph()

    def attr_changed(self):
        self.attrs_changed_manually.emit(self.get_disc_attr_list())
        self.reset_graph()

    def resizeEvent(self, e):
        OWWidget.resizeEvent(self, e)
        self.update_graph()

    def showEvent(self, ev):
        OWWidget.showEvent(self, ev)
        self.update_graph()

    @Inputs.data
    def set_data(self, data):
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data

        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1
            and len(self.data.domain.attributes) >= 1)

        if self.data is None:
            self.discrete_data = None
            self.init_combos(None)
            return

        self.init_combos(self.data)
        self.openContext(self.data)

    @Inputs.data_subset
    def set_subset_data(self, data):
        self.subset_data = data

    # this is called by widget after setData and setSubsetData are called.
    # this way the graph is updated only once
    def handleNewSignals(self):
        self.Warning.incompatible_subset.clear()
        self.subset_indices = None
        if self.data is not None and self.subset_data:
            transformed = self.subset_data.transform(self.data.domain)
            if np.all(np.isnan(transformed.X)) \
                    and np.all(np.isnan(transformed.Y)):
                self.Warning.incompatible_subset()
            else:
                indices = {e.id for e in transformed}
                self.subset_indices = [ex.id in indices for ex in self.data]
        if self.data is not None and self.__pending_selection is not None:
            self.selection = self.__pending_selection
            self.__pending_selection = None
        else:
            self.selection = set()
        self.set_color_data()
        self.update_graph()
        self.send_selection()

    def clear_selection(self):
        self.selection = set()
        self.update_selection_rects()
        self.send_selection()

    def coloring_changed(self):
        self.vizrank.coloring_changed()
        self.update_graph()

    def reset_graph(self):
        self.clear_selection()
        self.update_graph()

    def set_color_data(self):
        if self.data is None:
            return
        self.bar_button.setEnabled(self.variable_color is not None)
        attrs = [v for v in self.model_1 if v and v is not self.variable_color]
        domain = Domain(attrs, self.variable_color, None)
        self.color_data = self.data.from_table(domain, self.data)
        self.discrete_data = self._get_discrete_data(self.color_data)
        self.vizrank.stop_and_reset()
        self.vizrank_button.setEnabled(True)
        self.coloring_changed()

    def update_selection_rects(self):
        pens = (QPen(), QPen(Qt.black, 3, Qt.DotLine))
        for i, (_, _, area) in enumerate(self.areas):
            area.setPen(pens[i in self.selection])

    def select_area(self, index, ev):
        if ev.button() != Qt.LeftButton:
            return
        if ev.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection_rects()
        self.send_selection()

    def send_selection(self):
        if not self.selection or self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(
                create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, _ = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]

        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.data, sel_idx))

    def send_report(self):
        self.report_plot(self.canvas)

    def update_graph(self):
        spacing = self.SPACING
        bar_width = self.BAR_WIDTH

        def get_counts(attr_vals, values):
            """Calculate rectangles' widths; if all are 0, they are set to 1."""
            if not attr_vals:
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [
                    conditionaldict[attr_vals + "-" + val] for val in values
                ]
            total = sum(counts)
            if total == 0:
                counts = [1] * len(values)
                total = sum(counts)
            return total, counts

        def draw_data(attr_list,
                      x0_x1,
                      y0_y1,
                      side,
                      condition,
                      total_attrs,
                      used_attrs,
                      used_vals,
                      attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0,
                         x1,
                         y0,
                         y1,
                         "",
                         used_attrs,
                         used_vals,
                         attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(attr)
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            total, counts = get_counts(attr_vals, values)

            # when visualizing the third attribute and the first attribute has
            # the last value, reverse the order in which the boxes are drawn;
            # otherwise, if the last cell, nearest to the labels of the fourth
            # attribute, is empty, we wouldn't be able to position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(used_attrs[0])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = to_html(val)
                newattrvals = attr_vals + "-" + val if attr_vals else val

                tooltip = "{}&nbsp;&nbsp;&nbsp;&nbsp;{}: <b>{}</b><br/>".format(
                    condition, attr.name, htmlval)
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1, tooltip, *args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1, tooltip, total_attrs,
                                  *args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end, tooltip, *args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1, tooltip,
                                  total_attrs, *args)
            draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                      used_attrs, used_vals, attr_vals)

        def draw_text(side, attr, x0_x1, y0_y1, total_attrs, used_attrs,
                      used_vals, attr_vals):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if side in drawn_sides:
                return

            # the text on the right will be drawn when we are processing
            # visualization of the last value of the first attribute
            if side == 3:
                attr1values = get_variable_values_sorted(used_attrs[0])
                if used_vals[0] != attr1values[-1]:
                    return

            if not conditionaldict[attr_vals]:
                if side not in draw_positions:
                    draw_positions[side] = (x0, x1, y0, y1)
                return
            else:
                if side in draw_positions:
                    # restore the positions of attribute values and name
                    (x0, x1, y0, y1) = draw_positions[side]

            drawn_sides.add(side)

            values = get_variable_values_sorted(attr)
            if side % 2:
                values = values[::-1]

            spaces = spacing * (total_attrs - side) * (len(values) - 1)
            width = x1 - x0 - spaces * (side % 2 == 0)
            height = y1 - y0 - spaces * (side % 2 == 1)

            # calculate position of first attribute
            currpos = 0
            total, counts = get_counts(attr_vals, values)
            aligns = [
                Qt.AlignTop | Qt.AlignHCenter, Qt.AlignRight | Qt.AlignVCenter,
                Qt.AlignBottom | Qt.AlignHCenter,
                Qt.AlignLeft | Qt.AlignVCenter
            ]
            align = aligns[side]
            for i, val in enumerate(values):
                if distributiondict[val] != 0:
                    perc = counts[i] / float(total)
                    rwidth = width * perc
                    xs = [
                        x0 + currpos + rwidth / 2, x0 - self.ATTR_VAL_OFFSET,
                        x0 + currpos + rwidth / 2, x1 + self.ATTR_VAL_OFFSET
                    ]
                    ys = [
                        y1 + self.ATTR_VAL_OFFSET,
                        y0 + currpos + height * 0.5 * perc,
                        y0 - self.ATTR_VAL_OFFSET,
                        y0 + currpos + height * 0.5 * perc
                    ]

                    CanvasText(self.canvas,
                               val,
                               xs[side],
                               ys[side],
                               align,
                               max_width=rwidth if side == 0 else None)
                    space = height if side % 2 else width
                    currpos += perc * space + spacing * (total_attrs - side)

            xs = [
                x0 + (x1 - x0) / 2, x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET,
                x0 + (x1 - x0) / 2, x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET
            ]
            ys = [
                y1 + self.ATTR_VAL_OFFSET + self.ATTR_NAME_OFFSET,
                y0 + (y1 - y0) / 2,
                y0 - self.ATTR_VAL_OFFSET - self.ATTR_NAME_OFFSET,
                y0 + (y1 - y0) / 2
            ]
            CanvasText(self.canvas,
                       attr.name,
                       xs[side],
                       ys[side],
                       align,
                       bold=True,
                       vertical=side % 2)

        def add_rect(x0,
                     x1,
                     y0,
                     y1,
                     condition,
                     used_attrs,
                     used_vals,
                     attr_vals=""):
            area_index = len(self.areas)
            x1 += (x0 == x1)
            y1 += (y0 == y1)
            # rectangles of width and height 1 are not shown - increase
            y1 += (x1 - x0 + y1 - y0 == 2)
            colors = class_var and [QColor(*col) for col in class_var.colors]

            def select_area(_, ev):
                self.select_area(area_index, ev)

            def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args):
                if pen_color is None:
                    return CanvasRectangle(self.canvas,
                                           x,
                                           y,
                                           w,
                                           h,
                                           z=z,
                                           onclick=select_area,
                                           **args)
                if brush_color is None:
                    brush_color = pen_color
                return CanvasRectangle(self.canvas,
                                       x,
                                       y,
                                       w,
                                       h,
                                       pen_color,
                                       brush_color,
                                       z=z,
                                       onclick=select_area,
                                       **args)

            def line(x1, y1, x2, y2):
                r = QGraphicsLineItem(x1, y1, x2, y2, None)
                self.canvas.addItem(r)
                r.setPen(QPen(Qt.white, 2))
                r.setZValue(30)

            outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30)
            self.areas.append((used_attrs, used_vals, outer_rect))
            if not conditionaldict[attr_vals]:
                return

            if self.variable_color is None:
                s = sum(apriori_dists[0])
                expected = s * reduce(
                    mul, (apriori_dists[i][used_vals[i]] / float(s)
                          for i in range(len(used_vals))))
                actual = conditionaldict[attr_vals]
                pearson = float((actual - expected) / sqrt(expected))
                if pearson == 0:
                    ind = 0
                else:
                    ind = max(0, min(int(log(abs(pearson), 2)), 3))
                color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind]
                rect(x0, y0, x1 - x0, y1 - y0, -20, color)
                outer_rect.setToolTip(
                    condition + "<hr/>" + "Expected instances: %.1f<br>"
                    "Actual instances: %d<br>"
                    "Standardized (Pearson) residual: %.1f" %
                    (expected, conditionaldict[attr_vals], pearson))
            else:
                cls_values = get_variable_values_sorted(class_var)
                prior = get_distribution(data, class_var.name)
                total = 0
                for i, value in enumerate(cls_values):
                    val = conditionaldict[attr_vals + "-" + value]
                    if val == 0:
                        continue
                    if i == len(cls_values) - 1:
                        v = y1 - y0 - total
                    else:
                        v = ((y1 - y0) * val) / conditionaldict[attr_vals]
                    rect(x0, y0 + total, x1 - x0, v, -20, colors[i])
                    total += v

                if self.use_boxes and \
                        abs(x1 - x0) > bar_width and abs(y1 - y0) > bar_width:
                    total = 0
                    line(x0 + bar_width, y0, x0 + bar_width, y1)
                    n = sum(prior)
                    for i, (val, color) in enumerate(zip(prior, colors)):
                        if i == len(prior) - 1:
                            h = y1 - y0 - total
                        else:
                            h = (y1 - y0) * val / n
                        rect(x0, y0 + total, bar_width, h, 20, color)
                        total += h

                if conditionalsubsetdict:
                    if conditionalsubsetdict[attr_vals]:
                        if self.subset_indices is not None:
                            line(x1 - bar_width, y0, x1 - bar_width, y1)
                            total = 0
                            n = conditionalsubsetdict[attr_vals]
                            if n:
                                for i, (cls, color) in \
                                        enumerate(zip(cls_values, colors)):
                                    val = conditionalsubsetdict[attr_vals +
                                                                "-" + cls]
                                    if val == 0:
                                        continue
                                    if i == len(prior) - 1:
                                        v = y1 - y0 - total
                                    else:
                                        v = ((y1 - y0) * val) / n
                                    rect(x1 - bar_width, y0 + total, bar_width,
                                         v, 15, color)
                                    total += v

                actual = [
                    conditionaldict[attr_vals + "-" + cls_values[i]]
                    for i in range(len(prior))
                ]
                n_actual = sum(actual)
                if n_actual > 0:
                    apriori = [prior[key] for key in cls_values]
                    n_apriori = sum(apriori)
                    text = "<br/>".join(
                        "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" %
                        (cls, act, 100.0 * act / n_actual,
                         apr / n_apriori * n_actual, 100.0 * apr / n_apriori)
                        for cls, act, apr in zip(cls_values, actual, apriori))
                else:
                    text = ""
                outer_rect.setToolTip("{}<hr>Instances: {}<br><br>{}".format(
                    condition, n_actual, text[:-4]))

        def create_legend():
            if self.variable_color is None:
                names = [
                    "<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8",
                    "Residuals:"
                ]
                colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:]
                edges = repeat(Qt.black)
            else:
                names = get_variable_values_sorted(class_var)
                edges = colors = [QColor(*col) for col in class_var.colors]

            items = []
            size = 8
            for name, color, edgecolor in zip(names, colors, edges):
                item = QGraphicsItemGroup()
                item.addToGroup(
                    CanvasRectangle(None, -size / 2, -size / 2, size, size,
                                    edgecolor, color))
                item.addToGroup(
                    CanvasText(None, name, size, 0, Qt.AlignVCenter))
                items.append(item)
            return wrap_legend_items(items,
                                     hspacing=20,
                                     vspacing=16 + size,
                                     max_width=self.canvas_view.width() - xoff)

        self.canvas.clear()
        self.areas = []

        data = self.discrete_data
        if data is None:
            return
        attr_list = self.get_disc_attr_list()
        class_var = data.domain.class_var
        # TODO: check this
        # data = Preprocessor_dropMissing(data)

        unique = [v.name for v in set(attr_list + [class_var]) if v]
        if len(data[:, unique]) == 0:
            self.Warning.no_valid_data()
            return
        else:
            self.Warning.no_valid_data.clear()

        attrs = [attr for attr in attr_list if not attr.values]
        if attrs:
            CanvasText(self.canvas,
                       "Feature {} has no values".format(attrs[0]),
                       (self.canvas_view.width() - 120) / 2,
                       self.canvas_view.height() / 2)
            return
        if self.variable_color is None:
            apriori_dists = [
                get_distribution(data, attr) for attr in attr_list
            ]
        else:
            apriori_dists = []

        def get_max_label_width(attr):
            values = get_variable_values_sorted(attr)
            maxw = 0
            for val in values:
                t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False)
                maxw = max(int(t.boundingRect().width()), maxw)
            return maxw

        xoff = 20

        # get the maximum width of rectangle
        width = 20
        max_ylabel_w1 = max_ylabel_w2 = 0
        if len(attr_list) > 1:
            text = CanvasText(self.canvas, attr_list[1].name, bold=1, show=0)
            max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150)
            width = 5 + text.boundingRect().height() + \
                self.ATTR_VAL_OFFSET + max_ylabel_w1
            xoff = width
            if len(attr_list) == 4:
                text = CanvasText(self.canvas,
                                  attr_list[3].name,
                                  bold=1,
                                  show=0)
                max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150)
                width += text.boundingRect().height() + \
                    self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10

        legend = create_legend()

        # get the maximum height of rectangle
        yoff = 45
        legendoff = yoff + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35
        square_size = min(
            self.canvas_view.width() - width - 20,
            self.canvas_view.height() - legendoff -
            legend.boundingRect().height())

        if square_size < 0:
            return  # canvas is too small to draw rectangles
        self.canvas_view.setSceneRect(0, 0, self.canvas_view.width(),
                                      self.canvas_view.height())

        drawn_sides = set()
        draw_positions = {}

        conditionaldict, distributiondict = \
            get_conditional_distribution(data, attr_list)
        conditionalsubsetdict = None
        if self.subset_indices:
            conditionalsubsetdict, _ = get_conditional_distribution(
                self.discrete_data[self.subset_indices], attr_list)

        # draw rectangles
        draw_data(attr_list, (xoff, xoff + square_size),
                  (yoff, yoff + square_size), 0, "", len(attr_list), [], [])

        self.canvas.addItem(legend)
        legend.setPos(
            xoff - legend.boundingRect().x() +
            max(0, (square_size - legend.boundingRect().width()) / 2),
            legendoff + square_size)
        self.update_selection_rects()

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            settings.migrate_str_to_variable(context,
                                             none_placeholder="(None)")
Exemplo n.º 33
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `grouping_changed`.

    - `grouping_changed` handles changes of grouping attribute: it enables or
    disables the box for ordering, orders attributes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute. It recomputes box data by
    calling `compute_box_data`, shows the appropriate display box
    (discrete/continuous) and then calls`layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "箱形图"
    description = "在方框图中可视化特征值的分布。"
    icon = "icons/BoxPlot.svg"
    priority = 100
    keywords = ["whisker"]

    class Inputs:
        data = Input("数据", Orange.data.Table)

    class Outputs:
        selected_data = Output("所选数据", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    conditions = ContextSetting([])

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    sort_freqs = Setting(False)
    auto_commit = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "",
        CompareMedians: "median",
        CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis,
                _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "盒式布景"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        self.attrs = VariableListModel()
        view = gui.listView(self.controlArea,
                            self,
                            "attribute",
                            box="变量",
                            model=self.attrs,
                            callback=self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        gui.separator(view.box, 6, 6)
        self.cb_order = gui.checkBox(view.box,
                                     self,
                                     "order_by_importance",
                                     "按相关性排序",
                                     tooltip="由𝜒²或方差对子群排序",
                                     callback=self.apply_sorting)
        self.group_vars = DomainModel(placeholder="无",
                                      separators=False,
                                      valid_types=Orange.data.DiscreteVariable)
        self.group_view = view = gui.listView(self.controlArea,
                                              self,
                                              "group_var",
                                              box="子群",
                                              model=self.group_vars,
                                              callback=self.grouping_changed)
        view.setEnabled(False)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(self.controlArea,
                                    "Display",
                                    sizePolicy=(QSizePolicy.Minimum,
                                                QSizePolicy.Maximum),
                                    addSpace=False)

        gui.checkBox(self.display_box,
                     self,
                     "show_annotations",
                     "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box,
            self,
            'compare',
            btnLabels=["无比较", "中位数比较", "均值比较"],
            callback=self.layout_changed)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(self.controlArea,
                                             box="显示",
                                             sizePolicy=(QSizePolicy.Minimum,
                                                         QSizePolicy.Fixed))
        self.stretching_box.sizeHint = self.display_box.sizeHint
        gui.checkBox(box,
                     self,
                     'stretched',
                     "拉杆",
                     callback=self.display_changed)
        gui.checkBox(box,
                     self,
                     'show_labels',
                     "显示框标签",
                     callback=self.display_changed)
        self.sort_cb = gui.checkBox(box,
                                    self,
                                    'sort_freqs',
                                    "按子组频率排序",
                                    callback=self.display_changed)
        gui.rubber(box)

        gui.auto_commit(self.controlArea, self, "auto_commit", "选中发送", "自动发送")

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_scene.selectionChanged.connect(self.commit)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing
                                     | QPainter.TextAntialiasing
                                     | QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>没有测试结果。</center>")
        self.mainArea.setMinimumWidth(600)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def sizeHint(self):
        return QSize(100, 500)  # Vertical size is regulated by mainArea

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    def reset_attrs(self, domain):
        self.attrs[:] = [
            var for var in chain(domain.class_vars, domain.metas,
                                 domain.attributes) if var.is_primitive()
        ]

    # noinspection PyTypeChecker
    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (not bool(dataset)
                                    or not len(dataset.domain) and
                                    not any(var.is_primitive()
                                            for var in dataset.domain.metas)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars.set_domain(domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.reset_attrs(domain)
            self.select_default_variables(domain)
            self.openContext(self.dataset)
            self.grouping_changed()
        else:
            self.reset_all_data()
        self.commit()

    def select_default_variables(self, domain):
        # visualize first non-class variable, group by class (if present)
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var
        else:
            self.group_var = None  # Reset to trigger selection via callback

    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(),
                              f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs.clear()
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)
        self.is_continuous = False
        self.update_display_box()

    def grouping_changed(self):
        self.cb_order.setEnabled(self.group_var is not None)
        self.apply_sorting()
        self.attr_changed()

    def select_box_items(self):
        temp_cond = self.conditions.copy()
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(
                    box.filter.conditions in [c.conditions for c in temp_cond])

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        self.is_continuous = attr.is_continuous
        if dataset is None or not self.is_continuous and not attr.values or \
                        self.group_var and not self.group_var.values:
            self.stats = self.dist = self.conts = []
            return
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(dataset, attr,
                                                     self.group_var)
            if self.is_continuous:
                stats, label_texts = [], []
                for i, cont in enumerate(self.conts):
                    if np.sum(cont[1]):
                        stats.append(BoxData(cont, attr, i, self.group_var))
                        label_texts.append(self.group_var.values[i])
                self.stats = stats
                self.label_txts_all = label_texts
            else:
                self.label_txts_all = \
                    [v for v, c in zip(self.group_var.values, self.conts)
                     if np.sum(c) > 0]
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist, attr, None)]
            self.label_txts_all = [""]
        self.label_txts = [
            txts for stat, txts in zip(self.stats, self.label_txts_all)
            if stat.n > 0
        ]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()
            self.sort_cb.setEnabled(self.group_var is not None)

    def clear_scene(self):
        self.closeContext()
        self.box_scene.clearSelection()
        self.box_scene.clear()
        self.box_view.viewport().update()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []
        self.openContext(self.dataset)

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.mean_labels = [
            self.mean_label(stat, attr, lab)
            for stat, lab in zip(self.stats, self.label_txts)
        ]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [
            self.label_group(stat, attr, mean_lab)
            for stat, mean_lab in zip(self.stats, self.mean_labels)
        ]
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts
        ]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width,
                   len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()
        self.select_box_items()

    def display_changed_disc(self):
        assert not self.is_continuous
        self.clear_scene()
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all
        ]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts if np.sum(cont) > 0
                ]
            else:
                self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))]

        self.order = list(range(len(self.attr_labels)))

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = \
                [self.strudel(cont, i) for i, cont in enumerate(self.conts)
                 if np.sum(cont) > 0]
            self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0]

            if self.sort_freqs:
                # pylint: disable=invalid-unary-operand-type
                self.order = sorted(
                    self.order, key=(-np.sum(self.conts, axis=1)).__getitem__)
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box_index in enumerate(self.order):
            y = (-len(self.boxes) + row) * 40 + 10
            box = self.boxes[box_index]
            bars, labels = box[::2], box[1::2]

            self.__draw_group_labels(y, box_index)
            if not self.stretched:
                self.__draw_row_counts(y, box_index)
            if self.show_labels and self.attribute is not self.group_var:
                self.__draw_bar_labels(y, bars, labels)
            self.__draw_bars(y, bars)

        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width,
                                    len(self.boxes * 40) + 90)
        self.infot1.setText("")
        self.select_box_items()

    def __draw_group_labels(self, y, row):
        """Draw group labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.attr_labels[row]
        b = label.boundingRect()
        label.setPos(-b.width() - 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_row_counts(self, y, row):
        """Draw row counts

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        assert not self.is_continuous
        label = self.labels[row]
        b = label.boundingRect()
        if self.group_var:
            right = self.scale_x * sum(self.conts[row])
        else:
            right = self.scale_x * sum(self.dist)
        label.setPos(right + 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_bar_labels(self, y, bars, labels):
        """Draw bar labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        bars: List[FilterGraphicsRectItem]
            list of bars being drawn
        labels: List[QGraphicsTextItem]
            list of labels for corresponding bars
        """
        label = bar_part = None
        for text_item, bar_part in zip(labels, bars):
            label = self.Label(text_item.toPlainText())
            label.setPos(bar_part.boundingRect().x(),
                         y - label.boundingRect().height() - 8)
            label.setMaxWidth(bar_part.boundingRect().width())
            self.box_scene.addItem(label)

    def __draw_bars(self, y, bars):
        """Draw bars

        Parameters
        ----------
        y: int
            vertical offset of bars

        bars: List[FilterGraphicsRectItem]
            list of bars to draw
        """
        for item in bars:
            item.setPos(0, y)
            self.box_scene.addItem(item)

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n == 0 or d2.n == 0:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            if pooled_var == 0:
                return np.nan, np.nan
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg)**2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance," \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [
            self.mean_label(stats[0], self.attribute, "")
        ]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        decimals = max(3, 4 - int(math.log10(step)))
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(
                repr(round(val, decimals)) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0,
                               self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        assert not self.is_continuous
        if self.stretched:
            if not self.attr_labels:
                return
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(
                sum(dist) * scale_x + 10 + lbl.boundingRect().width()
                for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width() / 2
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None and stat.q75 is not None:
            mbox = FilterGraphicsRectItem(stat.conditions, stat.q25 * scale_x,
                                          -height / 2,
                                          (stat.q75 - stat.q25) * scale_x,
                                          height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2, stat.median,
                               height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val_index=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = [FilterDiscrete(attr, None)]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            cond = [FilterDiscrete(attr, [i])]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def commit(self):
        self.conditions = [
            item.filter for item in self.box_scene.selectedItems()
            if item.filter
        ]
        selected, selection = None, []
        if self.conditions:
            selected = Values(self.conditions, conjunction=False)(self.dataset)
            selection = np.in1d(self.dataset.ids,
                                selected.ids,
                                assume_unique=True).nonzero()[0]
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if to in (last_to, frm):
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = -6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        return self.attribute.name if self.attribute else None

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)

    class Label(QGraphicsSimpleTextItem):
        """Boxplot Label with settable maxWidth"""
        # Minimum width to display label text
        MIN_LABEL_WIDTH = 25

        # padding bellow the text
        PADDING = 3

        __max_width = None

        def maxWidth(self):
            return self.__max_width

        def setMaxWidth(self, max_width):
            self.__max_width = max_width

        def paint(self, painter, option, widget):
            """Overrides QGraphicsSimpleTextItem.paint

            If label text is too long, it is elided
            to fit into the allowed region
            """
            if self.__max_width is None:
                width = option.rect.width()
            else:
                width = self.__max_width

            if width < self.MIN_LABEL_WIDTH:
                # if space is too narrow, no label
                return

            fm = painter.fontMetrics()
            text = fm.elidedText(self.text(), Qt.ElideRight, width)
            painter.drawText(
                option.rect.x(),
                option.rect.y() + self.boundingRect().height() - self.PADDING,
                text)
Exemplo n.º 34
0
        return QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)


if __name__ == '__main__':
    import sys
    from Orange.data.table import Table
    from AnyQt.QtWidgets import (  # pylint: disable=ungrouped-imports
        QGraphicsView, QGraphicsScene, QApplication, QWidget)

    app = QApplication(sys.argv)
    widget = QWidget()
    widget.resize(500, 300)
    scene = QGraphicsScene(widget)
    view = QGraphicsView(scene, widget)
    dataset = Table(sys.argv[1] if len(sys.argv) > 1 else 'iris')
    histogram = Histogram(
        dataset,
        variable=0,
        height=300,
        width=500,
        n_bins=20,
        bar_spacing=2,
        border=(0, 0, 5, 0),
        border_color='#000',
        color_attribute='iris',
    )
    scene.addItem(histogram)

    widget.show()
    app.exec()
Exemplo n.º 35
0
    def data(self, index, role):
        # type: (QModelIndex, Qt.ItemDataRole) -> Any
        # Text formatting for various data simply requires a lot of branches.
        # This is much better than overengineering various formatters...
        # pylint: disable=too-many-branches

        if not index.isValid():
            return None

        row, column = self.mapToSourceRows(index.row()), index.column()
        # Make sure we're not out of range
        if not 0 <= row <= self.n_attributes:
            return QVariant()

        attribute = self.variables[row]

        if role == Qt.BackgroundRole:
            if attribute in self.domain.attributes:
                return self.COLOR_FOR_ROLE[self.ATTRIBUTE]
            elif attribute in self.domain.metas:
                return self.COLOR_FOR_ROLE[self.META]
            elif attribute in self.domain.class_vars:
                return self.COLOR_FOR_ROLE[self.CLASS_VAR]

        elif role == Qt.TextAlignmentRole:
            if column == self.Columns.NAME:
                return Qt.AlignLeft | Qt.AlignVCenter
            return Qt.AlignRight | Qt.AlignVCenter

        output = None

        if column == self.Columns.ICON:
            if role == Qt.DecorationRole:
                return gui.attributeIconDict[attribute]
        elif column == self.Columns.NAME:
            if role == Qt.DisplayRole:
                output = attribute.name
        elif column == self.Columns.DISTRIBUTION:
            if role == Qt.DisplayRole:
                if isinstance(attribute,
                              (DiscreteVariable, ContinuousVariable)):
                    if row not in self.__distributions_cache:
                        scene = QGraphicsScene(parent=self)
                        histogram = Histogram(
                            data=self.table,
                            variable=attribute,
                            color_attribute=self.target_var,
                            border=(0, 0, 2, 0),
                            border_color='#ccc',
                        )
                        scene.addItem(histogram)
                        self.__distributions_cache[row] = scene
                    return self.__distributions_cache[row]
        elif column == self.Columns.CENTER:
            if role == Qt.DisplayRole:
                if isinstance(attribute, DiscreteVariable):
                    output = self._center[row]
                    if not np.isnan(output):
                        output = attribute.str_val(self._center[row])
                elif isinstance(attribute, TimeVariable):
                    output = attribute.str_val(self._center[row])
                else:
                    output = self._center[row]
        elif column == self.Columns.DISPERSION:
            if role == Qt.DisplayRole:
                if isinstance(attribute, TimeVariable):
                    output = format_time_diff(self._min[row], self._max[row])
                else:
                    output = self._dispersion[row]
        elif column == self.Columns.MIN:
            if role == Qt.DisplayRole:
                if isinstance(attribute, DiscreteVariable):
                    if attribute.ordered:
                        output = attribute.str_val(self._min[row])
                elif isinstance(attribute, TimeVariable):
                    output = attribute.str_val(self._min[row])
                else:
                    output = self._min[row]
        elif column == self.Columns.MAX:
            if role == Qt.DisplayRole:
                if isinstance(attribute, DiscreteVariable):
                    if attribute.ordered:
                        output = attribute.str_val(self._max[row])
                elif isinstance(attribute, TimeVariable):
                    output = attribute.str_val(self._max[row])
                else:
                    output = self._max[row]
        elif column == self.Columns.MISSING:
            if role == Qt.DisplayRole:
                output = '%d (%d%%)' % (self._missing[row], 100 *
                                        self._missing[row] / self.n_instances)

        # Consistently format the text inside the table cells
        # The easiest way to check for NaN is to compare with itself
        if output != output:  # pylint: disable=comparison-with-itself
            output = ''
        # Format ∞ properly
        elif output in (np.inf, -np.inf):
            output = '%s∞' % ['', '-'][output < 0]
        elif isinstance(output, int):
            output = locale.format_string('%d', output, grouping=True)
        elif isinstance(output, float):
            output = locale.format_string('%.2f', output, grouping=True)

        return output
Exemplo n.º 36
0
class OWNomogram(OWWidget):
    name = "Nomogram"
    description = " Nomograms for Visualization of Naive Bayesian" \
                  " and Logistic Regression Classifiers."
    icon = "icons/Nomogram.svg"
    priority = 2000

    inputs = [("Classifier", Model, "set_classifier"),
              ("Data", Table, "set_instance")]

    MAX_N_ATTRS = 1000
    POINT_SCALE = 0
    ALIGN_LEFT = 0
    ALIGN_ZERO = 1
    ACCEPTABLE = (NaiveBayesModel, LogisticRegressionClassifier)
    settingsHandler = ClassValuesContextHandler()
    target_class_index = ContextSetting(0)
    normalize_probabilities = Setting(False)
    scale = Setting(1)
    display_index = Setting(1)
    n_attributes = Setting(10)
    sort_index = Setting(SortBy.ABSOLUTE)
    cont_feature_dim_index = Setting(0)

    graph_name = "scene"

    class Error(OWWidget.Error):
        invalid_classifier = Msg("Nomogram accepts only Naive Bayes and "
                                 "Logistic Regression classifiers.")

    def __init__(self):
        super().__init__()
        self.instances = None
        self.domain = None
        self.data = None
        self.classifier = None
        self.align = OWNomogram.ALIGN_ZERO
        self.log_odds_ratios = []
        self.log_reg_coeffs = []
        self.log_reg_coeffs_orig = []
        self.log_reg_cont_data_extremes = []
        self.p = None
        self.b0 = None
        self.points = []
        self.feature_items = []
        self.feature_marker_values = []
        self.scale_back = lambda x: x
        self.scale_forth = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.old_target_class_index = self.target_class_index
        self.markers_set = False
        self.repaint = False

        # GUI
        box = gui.vBox(self.controlArea, "Target class")
        self.class_combo = gui.comboBox(box,
                                        self,
                                        "target_class_index",
                                        callback=self._class_combo_changed,
                                        contentsLength=12)
        self.norm_check = gui.checkBox(
            box,
            self,
            "normalize_probabilities",
            "Normalize probabilities",
            hidden=True,
            callback=self._norm_check_changed,
            tooltip="For multiclass data 1 vs. all probabilities do not"
            " sum to 1 and therefore could be normalized.")

        self.scale_radio = gui.radioButtons(
            self.controlArea,
            self,
            "scale", ["Point scale", "Log odds ratios"],
            box="Scale",
            callback=self._radio_button_changed)

        box = gui.vBox(self.controlArea, "Display features")
        grid = QGridLayout()
        self.display_radio = gui.radioButtonsInBox(
            box,
            self,
            "display_index", [],
            orientation=grid,
            callback=self._display_radio_button_changed)
        radio_all = gui.appendRadioButton(self.display_radio,
                                          "All:",
                                          addToLayout=False)
        radio_best = gui.appendRadioButton(self.display_radio,
                                           "Best ranked:",
                                           addToLayout=False)
        spin_box = gui.hBox(None, margin=0)
        self.n_spin = gui.spin(spin_box,
                               self,
                               "n_attributes",
                               1,
                               self.MAX_N_ATTRS,
                               label=" ",
                               controlWidth=60,
                               callback=self._n_spin_changed)
        grid.addWidget(radio_all, 1, 1)
        grid.addWidget(radio_best, 2, 1)
        grid.addWidget(spin_box, 2, 2)

        self.sort_combo = gui.comboBox(box,
                                       self,
                                       "sort_index",
                                       label="Sort by: ",
                                       items=SortBy.items(),
                                       orientation=Qt.Horizontal,
                                       callback=self._sort_combo_changed)

        self.cont_feature_dim_combo = gui.comboBox(
            box,
            self,
            "cont_feature_dim_index",
            label="Continuous features: ",
            items=["1D projection", "2D curve"],
            orientation=Qt.Horizontal,
            callback=self._cont_feature_dim_combo_changed)

        gui.rubber(self.controlArea)

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(
            self.scene,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            renderHints=QPainter.Antialiasing | QPainter.TextAntialiasing
            | QPainter.SmoothPixmapTransform,
            alignment=Qt.AlignLeft)
        self.view.viewport().installEventFilter(self)
        self.view.viewport().setMinimumWidth(300)
        self.view.sizeHint = lambda: QSize(600, 500)
        self.mainArea.layout().addWidget(self.view)

    def _class_combo_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        coeffs = [
            np.nan_to_num(p[self.target_class_index] /
                          p[self.old_target_class_index]) for p in self.points
        ]
        points = [p[self.old_target_class_index] for p in self.points]
        self.feature_marker_values = [
            self.get_points_from_coeffs(v, c, p)
            for (v, c, p) in zip(self.feature_marker_values, coeffs, points)
        ]
        self.update_scene()
        self.old_target_class_index = self.target_class_index

    def _norm_check_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        self.update_scene()

    def _radio_button_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        self.update_scene()

    def _display_radio_button_changed(self):
        self.__hide_attrs(self.n_attributes if self.display_index else None)

    def _n_spin_changed(self):
        self.display_index = 1
        self.__hide_attrs(self.n_attributes)

    def __hide_attrs(self, n_show):
        if self.nomogram_main is None:
            return
        self.nomogram_main.hide(n_show)
        if self.vertical_line:
            x = self.vertical_line.line().x1()
            y = self.nomogram_main.layout.preferredHeight() + 30
            self.vertical_line.setLine(x, -6, x, y)
            self.hidden_vertical_line.setLine(x, -6, x, y)
        rect = QRectF(self.scene.sceneRect().x(),
                      self.scene.sceneRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height())
        self.scene.setSceneRect(rect.adjusted(0, 0, 70, 70))

    def _sort_combo_changed(self):
        if self.nomogram_main is None:
            return
        self.nomogram_main.hide(None)
        self.nomogram_main.sort(self.sort_index)
        self.__hide_attrs(self.n_attributes if self.display_index else None)

    def _cont_feature_dim_combo_changed(self):
        values = [item.dot.value for item in self.feature_items]
        self.feature_marker_values = self.scale_back(values)
        self.update_scene()

    def eventFilter(self, obj, event):
        if obj is self.view.viewport() and event.type() == QEvent.Resize:
            self.repaint = True
            values = [item.dot.value for item in self.feature_items]
            self.feature_marker_values = self.scale_back(values)
            self.update_scene()
        return super().eventFilter(obj, event)

    def update_controls(self):
        self.class_combo.clear()
        self.norm_check.setHidden(True)
        self.cont_feature_dim_combo.setEnabled(True)
        if self.domain:
            self.class_combo.addItems(self.domain.class_vars[0].values)
            if len(self.domain.attributes) > self.MAX_N_ATTRS:
                self.display_index = 1
            if len(self.domain.class_vars[0].values) > 2:
                self.norm_check.setHidden(False)
            if not self.domain.has_continuous_attributes():
                self.cont_feature_dim_combo.setEnabled(False)
                self.cont_feature_dim_index = 0
        model = self.sort_combo.model()
        item = model.item(SortBy.POSITIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        item = model.item(SortBy.NEGATIVE)
        item.setFlags(item.flags() | Qt.ItemIsEnabled)
        self.align = OWNomogram.ALIGN_ZERO
        if self.classifier and isinstance(self.classifier,
                                          LogisticRegressionClassifier):
            self.align = OWNomogram.ALIGN_LEFT
            item = model.item(SortBy.POSITIVE)
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            item = model.item(SortBy.NEGATIVE)
            item.setFlags(item.flags() & ~Qt.ItemIsEnabled)
            if self.sort_index in (SortBy.POSITIVE, SortBy.POSITIVE):
                self.sort_index = SortBy.NO_SORTING

    def set_instance(self, data):
        self.instances = data
        self.feature_marker_values = []
        self.set_feature_marker_values()

    def set_classifier(self, classifier):
        self.closeContext()
        self.classifier = classifier
        self.Error.clear()
        if self.classifier and not isinstance(self.classifier,
                                              self.ACCEPTABLE):
            self.Error.invalid_classifier()
            self.classifier = None
        self.domain = self.classifier.domain if self.classifier else None
        self.data = None
        self.calculate_log_odds_ratios()
        self.calculate_log_reg_coefficients()
        self.update_controls()
        self.target_class_index = 0
        self.openContext(self.domain and self.domain.class_var)
        self.points = self.log_odds_ratios or self.log_reg_coeffs
        self.feature_marker_values = []
        self.old_target_class_index = self.target_class_index
        self.update_scene()

    def calculate_log_odds_ratios(self):
        self.log_odds_ratios = []
        self.p = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, NaiveBayesModel):
            return

        log_cont_prob = self.classifier.log_cont_prob
        class_prob = self.classifier.class_prob
        for i in range(len(self.domain.attributes)):
            ca = np.exp(log_cont_prob[i]) * class_prob[:, None]
            _or = (ca / (1 - ca)) / (class_prob / (1 - class_prob))[:, None]
            self.log_odds_ratios.append(np.log(_or))
        self.p = class_prob

    def calculate_log_reg_coefficients(self):
        self.log_reg_coeffs = []
        self.log_reg_cont_data_extremes = []
        self.b0 = None
        if self.classifier is None or self.domain is None:
            return
        if not isinstance(self.classifier, LogisticRegressionClassifier):
            return

        self.domain = self.reconstruct_domain(self.classifier.original_domain,
                                              self.domain)
        self.data = self.classifier.original_data.transform(self.domain)
        attrs, ranges, start = self.domain.attributes, [], 0
        for attr in attrs:
            stop = start + len(attr.values) if attr.is_discrete else start + 1
            ranges.append(slice(start, stop))
            start = stop

        self.b0 = self.classifier.intercept
        coeffs = self.classifier.coefficients
        if len(self.domain.class_var.values) == 2:
            self.b0 = np.hstack((self.b0 * (-1), self.b0))
            coeffs = np.vstack((coeffs * (-1), coeffs))
        self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
        self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

        min_values = nanmin(self.data.X, axis=0)
        max_values = nanmax(self.data.X, axis=0)

        for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)), min_values,
                                   max_values):
            if self.log_reg_coeffs[i].shape[1] == 1:
                coef = self.log_reg_coeffs[i]
                self.log_reg_coeffs[i] = np.hstack(
                    (coef * min_t, coef * max_t))
                self.log_reg_cont_data_extremes.append(
                    [sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
            else:
                self.log_reg_cont_data_extremes.append([None])

    def update_scene(self):
        if not self.repaint:
            return
        self.clear_scene()
        if self.domain is None or not len(self.points[0]):
            return

        name_items = [
            QGraphicsTextItem(a.name) for a in self.domain.attributes
        ]
        point_text = QGraphicsTextItem("Points")
        probs_text = QGraphicsTextItem("Probabilities (%)")
        all_items = name_items + [point_text, probs_text]
        name_offset = -max(t.boundingRect().width() for t in all_items) - 50
        w = self.view.viewport().rect().width()
        max_width = w + name_offset - 100

        points = [pts[self.target_class_index] for pts in self.points]
        minimums = [min(p) for p in points]
        if self.align == OWNomogram.ALIGN_LEFT:
            points = [p - m for m, p in zip(minimums, points)]
        max_ = np.nan_to_num(max(max(abs(p)) for p in points))
        d = 100 / max_ if max_ else 1
        if self.scale == OWNomogram.POINT_SCALE:
            points = [p * d for p in points]

        if self.scale == OWNomogram.POINT_SCALE and \
                self.align == OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: [
                p / d + m for m, p in zip(minimums, x)
            ]
            self.scale_forth = lambda x: [(p - m) * d
                                          for m, p in zip(minimums, x)]
        if self.scale == OWNomogram.POINT_SCALE and \
                self.align != OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: [p / d for p in x]
            self.scale_forth = lambda x: [p * d for p in x]
        if self.scale != OWNomogram.POINT_SCALE and \
                self.align == OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: [p + m for m, p in zip(minimums, x)]
            self.scale_forth = lambda x: [p - m for m, p in zip(minimums, x)]
        if self.scale != OWNomogram.POINT_SCALE and \
                self.align != OWNomogram.ALIGN_LEFT:
            self.scale_back = lambda x: x
            self.scale_forth = lambda x: x

        point_item, nomogram_head = self.create_main_nomogram(
            name_items, points, max_width, point_text, name_offset)
        probs_item, nomogram_foot = self.create_footer_nomogram(
            probs_text, d, minimums, max_width, name_offset)
        for item in self.feature_items:
            item.dot.point_dot = point_item.dot
            item.dot.probs_dot = probs_item.dot
            item.dot.vertical_line = self.hidden_vertical_line

        self.nomogram = nomogram = NomogramItem()
        nomogram.add_items([nomogram_head, self.nomogram_main, nomogram_foot])
        self.scene.addItem(nomogram)
        self.set_feature_marker_values()
        rect = QRectF(self.scene.itemsBoundingRect().x(),
                      self.scene.itemsBoundingRect().y(),
                      self.scene.itemsBoundingRect().width(),
                      self.nomogram.preferredSize().height())
        self.scene.setSceneRect(rect.adjusted(0, 0, 70, 70))

    def create_main_nomogram(self, name_items, points, max_width, point_text,
                             name_offset):
        cls_index = self.target_class_index
        min_p = min(min(p) for p in points)
        max_p = max(max(p) for p in points)
        values = self.get_ruler_values(min_p, max_p, max_width)
        min_p, max_p = min(values), max(values)
        diff_ = np.nan_to_num(max_p - min_p)
        scale_x = max_width / diff_ if diff_ else max_width

        nomogram_header = NomogramItem()
        point_item = RulerItem(point_text, values, scale_x, name_offset,
                               -scale_x * min_p)
        point_item.setPreferredSize(point_item.preferredWidth(), 35)
        nomogram_header.add_items([point_item])

        self.nomogram_main = SortableNomogramItem()
        cont_feature_item_class = ContinuousFeature2DItem if \
            self.cont_feature_dim_index else ContinuousFeatureItem
        self.feature_items = [
            DiscreteFeatureItem(name_items[i], [val for val in att.values],
                                points[i], scale_x, name_offset, -scale_x *
                                min_p, self.points[i][cls_index])
            if att.is_discrete else cont_feature_item_class(
                name_items[i], self.log_reg_cont_data_extremes[i][cls_index],
                self.get_ruler_values(
                    np.min(points[i]), np.max(points[i]),
                    scale_x * (np.max(points[i]) - np.min(points[i])),
                    False), scale_x, name_offset, -scale_x *
                min_p, self.log_reg_coeffs_orig[i][cls_index][0])
            for i, att in enumerate(self.domain.attributes)
        ]
        self.nomogram_main.add_items(
            self.feature_items, self.sort_index,
            self.n_attributes if self.display_index else None)

        x = -scale_x * min_p
        y = self.nomogram_main.layout.preferredHeight() + 30
        self.vertical_line = QGraphicsLineItem(x, -6, x, y)
        self.vertical_line.setPen(QPen(Qt.DotLine))
        self.vertical_line.setParentItem(point_item)
        self.hidden_vertical_line = QGraphicsLineItem(x, -6, x, y)
        pen = QPen(Qt.DashLine)
        pen.setBrush(QColor(Qt.red))
        self.hidden_vertical_line.setPen(pen)
        self.hidden_vertical_line.setParentItem(point_item)

        return point_item, nomogram_header

    def create_footer_nomogram(self, probs_text, d, minimums, max_width,
                               name_offset):
        eps, d_ = 0.05, 1
        k = -np.log(self.p / (1 - self.p)) if self.p is not None else -self.b0
        min_sum = k[self.target_class_index] - np.log((1 - eps) / eps)
        max_sum = k[self.target_class_index] - np.log(eps / (1 - eps))
        if self.align == OWNomogram.ALIGN_LEFT:
            max_sum = max_sum - sum(minimums)
            min_sum = min_sum - sum(minimums)
            for i in range(len(k)):
                k[i] = k[i] - sum(
                    [min(q) for q in [p[i] for p in self.points]])
        if self.scale == OWNomogram.POINT_SCALE:
            min_sum *= d
            max_sum *= d
            d_ = d

        values = self.get_ruler_values(min_sum, max_sum, max_width)
        min_sum, max_sum = min(values), max(values)
        diff_ = np.nan_to_num(max_sum - min_sum)
        scale_x = max_width / diff_ if diff_ else max_width
        cls_var, cls_index = self.domain.class_var, self.target_class_index
        nomogram_footer = NomogramItem()

        def get_normalized_probabilities(val):
            if not self.normalize_probabilities:
                return 1 / (1 + np.exp(k[cls_index] - val / d_))
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return 1 / (1 + np.exp(k[cls_index] - val / d_)) / p_sum

        def get_points(prob):
            if not self.normalize_probabilities:
                return (k[cls_index] - np.log(1 / prob - 1)) * d_
            totals = self.__get_totals_for_class_values(minimums)
            p_sum = np.sum(1 / (1 + np.exp(k - totals / d_)))
            return (k[cls_index] - np.log(1 / (prob * p_sum) - 1)) * d_

        self.markers_set = False
        probs_item = ProbabilitiesRulerItem(
            probs_text,
            values,
            scale_x,
            name_offset,
            -scale_x * min_sum,
            get_points=get_points,
            title="{}='{}'".format(cls_var.name, cls_var.values[cls_index]),
            get_probabilities=get_normalized_probabilities)
        self.markers_set = True
        nomogram_footer.add_items([probs_item])
        return probs_item, nomogram_footer

    def __get_totals_for_class_values(self, minimums):
        cls_index = self.target_class_index
        marker_values = [item.dot.value for item in self.feature_items]
        if not self.markers_set:
            marker_values = self.scale_forth(marker_values)
        totals = np.empty(len(self.domain.class_var.values))
        totals[cls_index] = sum(marker_values)
        marker_values = self.scale_back(marker_values)
        for i in range(len(self.domain.class_var.values)):
            if i == cls_index:
                continue
            coeffs = [np.nan_to_num(p[i] / p[cls_index]) for p in self.points]
            points = [p[cls_index] for p in self.points]
            total = sum([
                self.get_points_from_coeffs(v, c, p)
                for (v, c, p) in zip(marker_values, coeffs, points)
            ])
            if self.align == OWNomogram.ALIGN_LEFT:
                points = [p - m for m, p in zip(minimums, points)]
                total -= sum([min(p) for p in [p[i] for p in self.points]])
            d = 100 / max(max(abs(p)) for p in points)
            if self.scale == OWNomogram.POINT_SCALE:
                total *= d
            totals[i] = total
        return totals

    def set_feature_marker_values(self):
        if not (len(self.points) and len(self.feature_items)):
            return
        if not len(self.feature_marker_values):
            self._init_feature_marker_values()
        self.feature_marker_values = self.scale_forth(
            self.feature_marker_values)
        item = self.feature_items[0]
        for i, item in enumerate(self.feature_items):
            item.dot.move_to_val(self.feature_marker_values[i])
        item.dot.probs_dot.move_to_sum()

    def _init_feature_marker_values(self):
        self.feature_marker_values = []
        cls_index = self.target_class_index
        instances = Table(self.domain, self.instances) \
            if self.instances else None
        for i, attr in enumerate(self.domain.attributes):
            value, feature_val = 0, None
            if len(self.log_reg_coeffs):
                if attr.is_discrete:
                    ind, n = unique(self.data.X[:, i], return_counts=True)
                    feature_val = np.nan_to_num(ind[np.argmax(n)])
                else:
                    feature_val = mean(self.data.X[:, i])
            inst_in_dom = instances and attr in instances.domain
            if inst_in_dom and not np.isnan(instances[0][attr]):
                feature_val = instances[0][attr]
            if feature_val is not None:
                value = self.points[i][cls_index][int(feature_val)] \
                    if attr.is_discrete else \
                    self.log_reg_coeffs_orig[i][cls_index][0] * feature_val
            self.feature_marker_values.append(value)

    def clear_scene(self):
        self.feature_items = []
        self.scale_back = lambda x: x
        self.scale_forth = lambda x: x
        self.nomogram = None
        self.nomogram_main = None
        self.vertical_line = None
        self.hidden_vertical_line = None
        self.scene.clear()

    def send_report(self):
        self.report_plot()

    @staticmethod
    def reconstruct_domain(original, preprocessed):
        # abuse dict to make "in" comparisons faster
        attrs = OrderedDict()
        for attr in preprocessed.attributes:
            cv = attr._compute_value.variable._compute_value
            var = cv.variable if cv else original[attr.name]
            if var in attrs:  # the reason for OrderedDict
                continue
            attrs[var] = None  # we only need keys
        attrs = list(attrs.keys())
        return Domain(attrs, original.class_var, original.metas)

    @staticmethod
    def get_ruler_values(start, stop, max_width, round_to_nearest=True):
        if max_width == 0:
            return [0]
        diff = np.nan_to_num((stop - start) / max_width)
        if diff <= 0:
            return [0]
        decimals = int(np.floor(np.log10(diff)))
        if diff > 4 * pow(10, decimals):
            step = 5 * pow(10, decimals + 2)
        elif diff > 2 * pow(10, decimals):
            step = 2 * pow(10, decimals + 2)
        elif diff > 1 * pow(10, decimals):
            step = 1 * pow(10, decimals + 2)
        else:
            step = 5 * pow(10, decimals + 1)
        round_by = int(-np.floor(np.log10(step)))
        r = start % step
        if not round_to_nearest:
            _range = np.arange(start + step, stop + r, step) - r
            start, stop = np.floor(start * 100) / 100, np.ceil(
                stop * 100) / 100
            return np.round(np.hstack((start, _range, stop)), 2)
        return np.round(np.arange(start, stop + r + step, step) - r, round_by)

    @staticmethod
    def get_points_from_coeffs(current_value, coefficients, possible_values):
        if any(np.isnan(possible_values)):
            return 0
        indices = np.argsort(possible_values)
        sorted_values = possible_values[indices]
        sorted_coefficients = coefficients[indices]
        for i, val in enumerate(sorted_values):
            if current_value < val:
                break
        diff = sorted_values[i] - sorted_values[i - 1]
        k = 0 if diff < 1e-6 else (sorted_values[i] - current_value) / \
                                  (sorted_values[i] - sorted_values[i - 1])
        return sorted_coefficients[i - 1] * sorted_values[i - 1] * k + \
               sorted_coefficients[i] * sorted_values[i] * (1 - k)
Exemplo n.º 37
0
class OWBoxPlot(widget.OWWidget):
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100
    keywords = ["whisker"]

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data",
                               Orange.data.Table,
                               default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    class Warning(widget.OWWidget.Warning):
        no_vars = widget.Msg(
            "Data contains no categorical or numeric variables")

    buttons_area_orientation = None

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    # If this was a list, context handler would try to match its elements to
    # variable names!
    selection = ContextSetting((), schema_only=True)

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    order_grouping_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    sort_freqs = Setting(False)

    _sorting_criteria_attrs = {
        CompareNone: "",
        CompareMedians: "median",
        CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis,
                _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self._axis_font = QFont()
        self._axis_font.setPixelSize(12)
        self._label_font = QFont()
        self._label_font.setPixelSize(11)
        self.dataset = None
        self.stats = []
        self.dist = self.conts = None

        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.scale_x = 1
        self.scene_min_x = self.scene_max_x = self.scene_width = 0
        self.label_width = 0

        self.attrs = VariableListModel()
        sorted_model = SortProxyModel(sortRole=Qt.UserRole)
        sorted_model.setSourceModel(self.attrs)
        sorted_model.sort(0)
        box = gui.vBox(self.controlArea, "Variable")
        view = self.attr_list = ListViewSearch()
        view.setModel(sorted_model)
        view.setSelectionMode(view.SingleSelection)
        view.selectionModel().selectionChanged.connect(self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        box.layout().addWidget(view)
        gui.checkBox(box,
                     self,
                     "order_by_importance",
                     "Order by relevance to subgroups",
                     tooltip="Order by 𝜒² or ANOVA over the subgroups",
                     callback=self.apply_attr_sorting)

        self.group_vars = VariableListModel(placeholder="None")
        sorted_model = SortProxyModel(sortRole=Qt.UserRole)
        sorted_model.setSourceModel(self.group_vars)
        sorted_model.sort(0)

        box = gui.vBox(self.controlArea, "Subgroups")
        view = self.group_list = ListViewSearch()
        view.setModel(sorted_model)
        view.selectionModel().selectionChanged.connect(self.grouping_changed)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        box.layout().addWidget(view)
        gui.checkBox(box,
                     self,
                     "order_grouping_by_importance",
                     "Order by relevance to variable",
                     tooltip="Order by 𝜒² or ANOVA over the variable values",
                     callback=self.apply_group_sorting)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(self.controlArea,
                                    "Display",
                                    sizePolicy=(QSizePolicy.Minimum,
                                                QSizePolicy.Maximum))

        gui.checkBox(self.display_box,
                     self,
                     "show_annotations",
                     "Annotate",
                     callback=self.update_graph)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box,
            self,
            'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.update_graph)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(self.controlArea,
                                             box="Display",
                                             sizePolicy=(QSizePolicy.Minimum,
                                                         QSizePolicy.Fixed))
        self.stretching_box.sizeHint = self.display_box.sizeHint
        gui.checkBox(box,
                     self,
                     'stretched',
                     "Stretch bars",
                     callback=self.update_graph,
                     stateWhenDisabled=False)
        gui.checkBox(box,
                     self,
                     'show_labels',
                     "Show box labels",
                     callback=self.update_graph)
        self.sort_cb = gui.checkBox(box,
                                    self,
                                    'sort_freqs',
                                    "Sort by subgroup frequencies",
                                    callback=self.update_graph,
                                    stateWhenDisabled=False)

        gui.vBox(self.mainArea)
        self.box_scene = QGraphicsScene(self)
        self.box_scene.selectionChanged.connect(self.on_selection_changed)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing
                                     | QPainter.TextAntialiasing
                                     | QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        self.stat_test = ""
        self.mainArea.setMinimumWidth(300)
        self.update_box_visibilities()

    def sizeHint(self):
        return QSize(900, 500)

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.update_graph()
        return super().eventFilter(obj, event)

    @property
    def show_stretched(self):
        return self.stretched and self.group_var is not self.attribute

    def reset_attrs(self):
        domain = self.dataset.domain
        self.attrs[:] = [
            var for var in chain(domain.class_vars, domain.metas,
                                 domain.attributes)
            if var.is_primitive() and not var.attributes.get("hidden", False)
        ]

    def reset_groups(self):
        domain = self.dataset.domain
        self.group_vars[:] = [None] + [
            var for var in chain(domain.class_vars, domain.metas,
                                 domain.attributes)
            if var.is_discrete and not var.attributes.get("hidden", False)
        ]

    @Inputs.data
    def set_data(self, dataset):
        self.closeContext()
        self._reset_all_data()
        if dataset and not (len(dataset.domain.variables)
                            or any(var.is_primitive()
                                   for var in dataset.domain.metas)):
            self.Warning.no_vars()
            dataset = None

        self.dataset = dataset
        if dataset:
            self.reset_attrs()
            self.reset_groups()
            self._select_default_variables()
            self.openContext(self.dataset)
            self._set_list_view_selections()
            self.compute_box_data()
            self.apply_attr_sorting()
            self.apply_group_sorting()
            self.update_graph()
            self.select_box_items()

        self.update_box_visibilities()
        self.commit()

    def _reset_all_data(self):
        self.clear_scene()
        self.Warning.no_vars.clear()

        self.stats = []
        self.dist = self.conts = None
        self.group_var = None
        self.attribute = None
        self.stat_test = ""
        self.attrs[:] = []
        self.group_vars[:] = [None]
        self.selection = ()

    def _select_default_variables(self):
        # visualize first non-class variable, group by class (if present)
        domain = self.dataset.domain
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var

    def _set_list_view_selections(self):
        for view, var, callback in ((self.attr_list, self.attribute,
                                     self.attr_changed),
                                    (self.group_list, self.group_var,
                                     self.grouping_changed)):
            src_model = view.model().sourceModel()
            if var not in src_model:
                continue
            sel_model = view.selectionModel()
            sel_model.selectionChanged.disconnect(callback)
            row = src_model.indexOf(var)
            index = view.model().index(row, 0)
            sel_model.select(index, sel_model.ClearAndSelect)
            self._ensure_selection_visible(view)
            sel_model.selectionChanged.connect(callback)

    def apply_attr_sorting(self):
        def compute_score(attr):
            # This function and the one in apply_group_sorting are similar, but
            # different in too many details, so they are kept as separate
            # functions.
            # If you discover a bug in this function, check the other one, too.
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                p = self._chi_square(group_var, attr)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self._sort_list(self.attrs, self.attr_list, compute_score)
        else:
            self._sort_list(self.attrs, self.attr_list, None)

    def apply_group_sorting(self):
        def compute_stat(group):
            # This function and the one in apply_attr_sorting are similar, but
            # different in too many details, so they are kept as separate
            # functions.
            # If you discover a bug in this function, check the other one, too.
            if group is attr:
                return 3
            if group is None:
                return -1
            if attr.is_continuous:
                group_col = data.get_column_view(group)[0].astype(float)
                groups = (attr_col[group_col == i]
                          for i in range(len(group.values)))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                p = self._chi_square(group, attr)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        attr = self.attribute
        if self.order_grouping_by_importance:
            if attr.is_continuous:
                attr_col = data.get_column_view(attr)[0].astype(float)
            self._sort_list(self.group_vars, self.group_list, compute_stat)
        else:
            self._sort_list(self.group_vars, self.group_list, None)

    def _sort_list(self, source_model, view, key=None):
        if key is None:
            c = count()

            def key(_):  # pylint: disable=function-redefined
                return next(c)

        for i, attr in enumerate(source_model):
            source_model.setData(source_model.index(i), key(attr), Qt.UserRole)
        self._ensure_selection_visible(view)

    @staticmethod
    def _ensure_selection_visible(view):
        selection = view.selectedIndexes()
        if len(selection) == 1:
            view.scrollTo(selection[0])

    def _chi_square(self, group_var, attr):
        # Chi-square with the given distribution into groups
        if not attr.values or not group_var.values:
            return 0, 2, 0
        observed = np.array(
            contingency.get_contingency(self.dataset, group_var, attr))
        observed = observed[observed.sum(axis=1) != 0, :]
        observed = observed[:, observed.sum(axis=0) != 0]
        if min(observed.shape) < 2:
            return 0, 2, 0
        return chi2_contingency(observed)[:3]

    def grouping_changed(self, selected):
        if not selected:
            return  # should never come here
        self.group_var = selected.indexes()[0].data(gui.TableVariable)
        self._variables_changed(self.apply_attr_sorting)

    def attr_changed(self, selected):
        if not selected:
            return  # should never come here
        self.attribute = selected.indexes()[0].data(gui.TableVariable)
        self._variables_changed(self.apply_group_sorting)

    def _variables_changed(self, sorting):
        self.selection = ()
        self.compute_box_data()
        sorting()
        self.update_graph()
        self.update_box_visibilities()
        self.commit()

    def update_graph(self):
        pending_selection = self.selection
        self.box_scene.selectionChanged.disconnect(self.on_selection_changed)
        try:  # not for exceptions, just to reconnect after all possible paths
            self.clear_scene()

            if self.dataset is None or self.attribute is None:
                return

            if self.attribute.is_continuous:
                self._display_changed_cont()
            else:
                self._display_changed_disc()
            self.selection = pending_selection
            self.draw_stat()
            self.select_box_items()

            if self.attribute.is_continuous:
                heights = 90 if self.show_annotations else 60
                self.box_view.centerOn(
                    self.scene_min_x + self.scene_width / 2,
                    -30 - len(self.stats) * heights / 2 + 45)
            else:
                self.box_view.centerOn(self.scene_width / 2,
                                       -30 - len(self.boxes) * 40 / 2 + 45)
        finally:
            self.box_scene.selectionChanged.connect(self.on_selection_changed)

    def select_box_items(self):
        selection = set(self.selection)
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(box.data_range in selection)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        if dataset is None \
                or not attr.is_continuous and not attr.values \
                or self.group_var and not self.group_var.values:
            self.stats = []
            self.dist = self.conts = None
            return
        if self.group_var:
            self.dist = None
            self.conts = contingency.get_contingency(dataset, attr,
                                                     self.group_var)
            missing_val_str = f"missing '{self.group_var.name}'"
            group_var_labels = self.group_var.values + ("", )
            if self.attribute.is_continuous:
                stats, label_texts = [], []
                for cont, value in zip(self.conts.array_with_unknowns,
                                       group_var_labels):
                    if np.sum(cont[1]):
                        stats.append(BoxData(cont, value))
                        label_texts.append(value or missing_val_str)
                self.stats = stats
                self.label_txts_all = label_texts
            else:
                self.label_txts_all = [
                    v or missing_val_str for v, c in zip(
                        group_var_labels, self.conts.array_with_unknowns)
                    if np.sum(c) > 0
                ]
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = None
            if self.attribute.is_continuous:
                self.stats = [BoxData(self.dist, None)]
            self.label_txts_all = [""]
        self.label_txts = [
            txts for stat, txts in zip(self.stats, self.label_txts_all)
            if stat.n > 0
        ]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_box_visibilities(self):
        self.controls.stretched.setDisabled(self.group_var is self.attribute)

        if not self.attribute:
            self.stretching_box.hide()
            self.display_box.hide()
        elif self.attribute.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()
            self.sort_cb.setEnabled(self.group_var is not None)

    def clear_scene(self):
        self.box_scene.clear()
        self.box_view.viewport().update()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []

    def _display_changed_cont(self):
        self.mean_labels = [
            self.mean_label(stat, self.attribute, lab)
            for stat, lab in zip(self.stats, self.label_txts)
        ]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [
            self.label_group(stat, self.attribute, mean_lab)
            for stat, mean_lab in zip(self.stats, self.mean_labels)
        ]
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts
        ]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width,
                   len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self._compute_tests_cont()
        self._show_posthoc()

    def _display_changed_disc(self):
        self.clear_scene()
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all
        ]

        if not self.show_stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts.array_with_unknowns
                    if np.sum(cont) > 0
                ]
            else:
                self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))]

        self.order = list(range(len(self.attr_labels)))

        self.draw_axis_disc()
        if self.group_var:
            conts = self.conts.array_with_unknowns
            self.boxes = [
                self.strudel(cont, val)
                for cont, val in zip(conts, self.group_var.values + ("", ))
                if np.sum(cont) > 0
            ]
            sums_ = np.sum(conts, axis=1)
            sums_ = sums_[sums_ > 0]  # only bars with sum > 0 are shown

            if self.sort_freqs:
                # pylint: disable=invalid-unary-operand-type
                self.order = sorted(self.order, key=(-sums_).__getitem__)
        else:
            conts = self.dist.array_with_unknowns
            self.boxes = [self.strudel(conts)]
            sums_ = [np.sum(conts)]

        for row, box_index in enumerate(self.order):
            y = (-len(self.boxes) + row) * 40 + 10
            box = self.boxes[box_index]
            bars, labels = box[::2], box[1::2]

            self.__draw_group_labels(y, box_index)
            if not self.show_stretched:
                self.__draw_row_counts(y, self.labels[box_index],
                                       sums_[box_index])
            if self.show_labels and self.attribute is not self.group_var:
                self.__draw_bar_labels(y, bars, labels)
            self.__draw_bars(y, bars)

        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width,
                                    len(self.boxes * 40) + 90)
        self._compute_tests_disc()

    def __draw_group_labels(self, y, row):
        """Draw group labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.attr_labels[row]
        b = label.boundingRect()
        label.setPos(-b.width() - 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_row_counts(self, y, label, row_sum_):
        """Draw row counts

        Parameters
        ----------
        y: int
            vertical offset of bars
        label: QGraphicsSimpleTextItem
            Label for group
        row_sum_: int
            Sum for the group
        """
        assert not self.attribute.is_continuous
        b = label.boundingRect()
        right = self.scale_x * row_sum_
        label.setPos(right + 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_bar_labels(self, y, bars, labels):
        """Draw bar labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        bars: List[FilterGraphicsRectItem]
            list of bars being drawn
        labels: List[QGraphicsTextItem]
            list of labels for corresponding bars
        """
        for text_item, bar_part in zip(labels, bars):
            label = self.Label(text_item.toPlainText())
            label.setPos(bar_part.boundingRect().x(),
                         y - label.boundingRect().height() - 8)
            label.setMaxWidth(bar_part.boundingRect().width())
            self.box_scene.addItem(label)

    def __draw_bars(self, y, bars):
        """Draw bars

        Parameters
        ----------
        y: int
            vertical offset of bars

        bars: List[FilterGraphicsRectItem]
            list of bars to draw
        """
        for item in bars:
            item.setPos(0, y)
            self.box_scene.addItem(item)

    # noinspection PyPep8Naming
    def _compute_tests_cont(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests

        # pylint: disable=comparison-with-itself
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n < 2 or d2.n < 2:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            # pylint: disable=comparison-with-itself
            if pooled_var == 0 or np.isnan(pooled_var):
                return np.nan, np.nan
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg)**2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            if var_within == 0 or df_within == 0 or df_between == 0:
                return np.nan, np.nan
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        n = len(self.dataset)
        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, p)
            else:
                t, p = stat_ttest()
                t = "" if np.isnan(
                    t) else f"Student's t: {t:.3f} (p={p:.3f}, N={n})"
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, p)
            else:
                F, p = stat_ANOVA()
                t = "" if np.isnan(F) else f"ANOVA: {F:.3f} (p={p:.3f}, N={n})"
        self.stat_test = t

    def _compute_tests_disc(self):
        if self.group_var is None or self.attribute is None:
            self.stat_test = ""
        else:
            chi, p, dof = self._chi_square(self.group_var, self.attribute)
            if np.isnan(p):
                self.stat_test = ""
            else:
                self.stat_test = f"χ²: {chi:.2f} (p={p:.3f}, dof={dof})"

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(attr.str_val(stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [
            self.mean_label(stats[0], self.attribute, "")
        ]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_max_x = gtop * scale_x
        self.scene_width = self.scene_max_x - self.scene_min_x

        val = first_val
        last_text = self.scene_min_x
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = QGraphicsSimpleTextItem(
                self.attribute.str_val(val) if not misssing_stats else "?")
            t.setFont(self._axis_font)
            t.setFlag(QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            x_start = val * scale_x - r.width() / 2
            x_finish = x_start + r.width()
            if x_start > last_text + 10 and x_finish < self.scene_max_x:
                t.setPos(x_start, 8)
                self.box_scene.addItem(t)
                last_text = x_finish
            if val >= top:
                break
            val += step
        self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0,
                               self._pen_axis)

    def draw_stat(self):
        if self.stat_test:
            label = QGraphicsSimpleTextItem(self.stat_test)
            brect = self.box_scene.sceneRect()
            label.setPos(brect.center().x() - label.boundingRect().width() / 2,
                         8 + self._axis_font.pixelSize() * 2)
            label.setFlag(QGraphicsItem.ItemIgnoresTransformations)
            self.box_scene.addItem(label)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        assert not self.attribute.is_continuous
        if self.show_stretched:
            if not self.attr_labels:
                return
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(
                    float(np.sum(dist))
                    for dist in self.conts.array_with_unknowns)
            else:
                max_box = float(np.sum(self.dist.array_with_unknowns))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.show_stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts.array_with_unknowns, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(
                sum(dist) * scale_x + 10 + lbl.boundingRect().width()
                for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.show_stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(attr.str_val(val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width() / 2
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None or stat.q75 is not None:
            # if any of them is None it means that its value is equal to median
            box_from = stat.q25 or stat.median
            box_to = stat.q75 or stat.median
            mbox = FilterGraphicsRectItem(stat.data_range, box_from * scale_x,
                                          -height / 2,
                                          (box_to - box_from) * scale_x,
                                          height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2, stat.median,
                               height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = DiscDataRange(None, group_val)
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        missing_val_str = f"missing '{attr.name}'"
        values = attr.values + ("", )
        colors = attr.palette.qcolors_w_nan
        total = sum(dist)
        for freq, value, color in zip(dist, values, colors):
            if freq < 1e-6:
                continue
            v = freq
            if self.show_stretched:
                v /= ss
            v *= self.scale_x
            cond = DiscDataRange(value, group_val)
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(color))
            rect.setPen(QPen(Qt.NoPen))
            value = value or missing_val_str
            if self.show_stretched:
                tooltip = f"{value}: {100 * freq / total:.2f}%"
            else:
                tooltip = f"{value}: ({int(freq)})"
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(value)
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def on_selection_changed(self):
        self.selection = tuple(item.data_range
                               for item in self.box_scene.selectedItems()
                               if item.data_range)
        self.commit()

    def commit(self):
        conditions = self._gather_conditions()
        if conditions:
            selected = Values(conditions, conjunction=False)(self.dataset)
            selection = np.in1d(self.dataset.ids,
                                selected.ids,
                                assume_unique=True).nonzero()[0]
        else:
            selected, selection = None, []
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def _gather_conditions(self):
        conditions = []
        attr = self.attribute
        group_attr = self.group_var
        for data_range in self.selection:
            if attr.is_discrete:
                # If some value was removed from the data (in case settings are
                # loaded from a scheme), do not include the corresponding
                # filter; this is appropriate since data with such value does
                # not exist anyway
                if not data_range.value:
                    condition = IsDefined([attr], negate=True)
                elif data_range.value not in attr.values:
                    continue
                else:
                    condition = FilterDiscrete(attr, [data_range.value])
            else:
                condition = FilterContinuous(attr, FilterContinuous.Between,
                                             data_range.low, data_range.high)
            if data_range.group_value:
                if not data_range.group_value:
                    grp_filter = IsDefined([group_attr], negate=True)
                elif data_range.group_value not in group_attr.values:
                    continue
                else:
                    grp_filter = FilterDiscrete(group_attr,
                                                [data_range.group_value])
                condition = Values([condition, grp_filter], conjunction=True)
            conditions.append(condition)
        return conditions

    def _show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 0)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if to in (last_to, frm):
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = -6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        return self.attribute.name if self.attribute else None

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)

    class Label(QGraphicsSimpleTextItem):
        """Boxplot Label with settable maxWidth"""
        # Minimum width to display label text
        MIN_LABEL_WIDTH = 25

        # padding bellow the text
        PADDING = 3

        __max_width = None

        def maxWidth(self):
            return self.__max_width

        def setMaxWidth(self, max_width):
            self.__max_width = max_width

        def paint(self, painter, option, widget):
            """Overrides QGraphicsSimpleTextItem.paint

            If label text is too long, it is elided
            to fit into the allowed region
            """
            if self.__max_width is None:
                width = option.rect.width()
            else:
                width = self.__max_width

            if width < self.MIN_LABEL_WIDTH:
                # if space is too narrow, no label
                return

            fm = painter.fontMetrics()
            text = fm.elidedText(self.text(), Qt.ElideRight, width)
            painter.drawText(
                option.rect.x(),
                option.rect.y() + self.boundingRect().height() - self.PADDING,
                text)
Exemplo n.º 38
0
    def data(self, index, role):
        # type: (QModelIndex, Qt.ItemDataRole) -> Any
        # Text formatting for various data simply requires a lot of branches.
        # This is much better than overengineering various formatters...
        # pylint: disable=too-many-branches

        if not index.isValid():
            return None

        row, column = self.mapToSourceRows(index.row()), index.column()
        # Make sure we're not out of range
        if not 0 <= row <= self.n_attributes:
            return QVariant()

        attribute = self.variables[row]

        if role == Qt.BackgroundRole:
            if attribute in self.domain.attributes:
                return self.COLOR_FOR_ROLE[self.ATTRIBUTE]
            elif attribute in self.domain.metas:
                return self.COLOR_FOR_ROLE[self.META]
            elif attribute in self.domain.class_vars:
                return self.COLOR_FOR_ROLE[self.CLASS_VAR]

        elif role == Qt.TextAlignmentRole:
            if column == self.Columns.NAME:
                return Qt.AlignLeft | Qt.AlignVCenter
            return Qt.AlignRight | Qt.AlignVCenter

        output = None

        if column == self.Columns.ICON:
            if role == Qt.DecorationRole:
                return gui.attributeIconDict[attribute]
        elif column == self.Columns.NAME:
            if role == Qt.DisplayRole:
                output = attribute.name
        elif column == self.Columns.DISTRIBUTION:
            if role == Qt.DisplayRole:
                if isinstance(attribute, (DiscreteVariable, ContinuousVariable)):
                    if row not in self.__distributions_cache:
                        scene = QGraphicsScene(parent=self)
                        histogram = Histogram(
                            data=self.table,
                            variable=attribute,
                            color_attribute=self.target_var,
                            border=(0, 0, 2, 0),
                            border_color='#ccc',
                        )
                        scene.addItem(histogram)
                        self.__distributions_cache[row] = scene
                    return self.__distributions_cache[row]
        elif column == self.Columns.CENTER:
            if role == Qt.DisplayRole:
                if isinstance(attribute, DiscreteVariable):
                    output = self._center[row]
                    if not np.isnan(output):
                        output = attribute.str_val(self._center[row])
                elif isinstance(attribute, TimeVariable):
                    output = attribute.str_val(self._center[row])
                else:
                    output = self._center[row]
        elif column == self.Columns.DISPERSION:
            if role == Qt.DisplayRole:
                if isinstance(attribute, TimeVariable):
                    output = format_time_diff(self._min[row], self._max[row])
                else:
                    output = self._dispersion[row]
        elif column == self.Columns.MIN:
            if role == Qt.DisplayRole:
                if isinstance(attribute, DiscreteVariable):
                    if attribute.ordered:
                        output = attribute.str_val(self._min[row])
                elif isinstance(attribute, TimeVariable):
                    output = attribute.str_val(self._min[row])
                else:
                    output = self._min[row]
        elif column == self.Columns.MAX:
            if role == Qt.DisplayRole:
                if isinstance(attribute, DiscreteVariable):
                    if attribute.ordered:
                        output = attribute.str_val(self._max[row])
                elif isinstance(attribute, TimeVariable):
                    output = attribute.str_val(self._max[row])
                else:
                    output = self._max[row]
        elif column == self.Columns.MISSING:
            if role == Qt.DisplayRole:
                output = '%d (%d%%)' % (
                    self._missing[row],
                    100 * self._missing[row] / self.n_instances
                )

        # Consistently format the text inside the table cells
        # The easiest way to check for NaN is to compare with itself
        if output != output:  # pylint: disable=comparison-with-itself
            output = ''
        # Format ∞ properly
        elif output in (np.inf, -np.inf):
            output = '%s∞' % ['', '-'][output < 0]
        elif isinstance(output, int):
            output = locale.format('%d', output, grouping=True)
        elif isinstance(output, float):
            output = locale.format('%.2f', output, grouping=True)

        return output
Exemplo n.º 39
0
class OWVennDiagram(widget.OWWidget):
    name = "Venn Diagram"
    description = "A graphical visualization of the overlap of data instances " "from a collection of input data sets."
    icon = "icons/VennDiagram.svg"
    priority = 280

    inputs = [("Data", Orange.data.Table, "setData", widget.Multiple)]
    outputs = [("Selected Data", Orange.data.Table)]

    # Selected disjoint subset indices
    selection = settings.Setting([])
    #: Stored input set hints
    #: {(index, inputname, attributes): (selectedattrname, itemsettitle)}
    #: The 'selectedattrname' can be None
    inputhints = settings.Setting({})
    #: Use identifier columns for instance matching
    useidentifiers = settings.Setting(True)
    #: Output unique items (one output row for every unique instance `key`)
    #: or preserve all duplicates in the output.
    output_duplicates = settings.Setting(False)
    autocommit = settings.Setting(True)

    graph_name = "scene"

    def __init__(self):
        super().__init__()

        # Diagram update is in progress
        self._updating = False
        # Input update is in progress
        self._inputUpdate = False
        # All input tables have the same domain.
        self.samedomain = True
        # Input datasets in the order they were 'connected'.
        self.data = OrderedDict()
        # Extracted input item sets in the order they were 'connected'
        self.itemsets = OrderedDict()

        # GUI
        box = gui.vBox(self.controlArea, "Info")
        self.info = gui.widgetLabel(box, "No data on input.\n")

        self.identifiersBox = gui.radioButtonsInBox(
            self.controlArea,
            self,
            "useidentifiers",
            [],
            box="Data Instance Identifiers",
            callback=self._on_useidentifiersChanged,
        )
        self.useequalityButton = gui.appendRadioButton(self.identifiersBox, "Use instance equality")
        self.useidentifiersButton = rb = gui.appendRadioButton(self.identifiersBox, "Use identifiers")
        self.inputsBox = gui.indentedBox(self.identifiersBox, sep=gui.checkButtonOffsetHint(rb))
        self.inputsBox.setEnabled(bool(self.useidentifiers))

        for i in range(5):
            box = gui.vBox(self.inputsBox, "Data set #%i" % (i + 1), addSpace=False)
            box.setFlat(True)
            model = itemmodels.VariableListModel(parent=self)
            cb = QComboBox(minimumContentsLength=12, sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon)
            cb.setModel(model)
            cb.activated[int].connect(self._on_inputAttrActivated)
            box.setEnabled(False)
            # Store the combo in the box for later use.
            box.combo_box = cb
            box.layout().addWidget(cb)

        gui.rubber(self.controlArea)

        box = gui.vBox(self.controlArea, "Output")
        gui.checkBox(box, self, "output_duplicates", "Output duplicates", callback=lambda: self.commit())
        gui.auto_commit(box, self, "autocommit", "Send Selection", "Send Automatically", box=False)

        # Main area view
        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing)
        self.view.setBackgroundRole(QPalette.Window)
        self.view.setFrameStyle(QGraphicsView.StyledPanel)

        self.mainArea.layout().addWidget(self.view)
        self.vennwidget = VennDiagram()
        self.vennwidget.resize(400, 400)
        self.vennwidget.itemTextEdited.connect(self._on_itemTextEdited)
        self.scene.selectionChanged.connect(self._on_selectionChanged)

        self.scene.addItem(self.vennwidget)

        self.resize(self.controlArea.sizeHint().width() + 550, max(self.controlArea.sizeHint().height(), 550))

        self._queue = []

    @check_sql_input
    def setData(self, data, key=None):
        self.error()
        if not self._inputUpdate:
            # Store hints only on the first setData call.
            self._storeHints()
            self._inputUpdate = True

        if key in self.data:
            if data is None:
                # Remove the input
                self._remove(key)
            else:
                # Update existing item
                self._update(key, data)
        elif data is not None:
            # TODO: Allow setting more them 5 inputs and let the user
            # select the 5 to display.
            if len(self.data) == 5:
                self.error("Venn diagram accepts at most five data sets.")
                return
            # Add a new input
            self._add(key, data)

    def handleNewSignals(self):
        self._inputUpdate = False

        # Check if all inputs are from the same domain.
        domains = [input.table.domain for input in self.data.values()]
        samedomain = all(domain_eq(d1, d2) for d1, d2 in pairwise(domains))

        self.samedomain = samedomain

        has_identifiers = all(source_attributes(input.table.domain) for input in self.data.values())
        has_any_identifiers = any(source_attributes(input.table.domain) for input in self.data.values())
        self.useequalityButton.setEnabled(samedomain)
        self.useidentifiersButton.setEnabled(has_any_identifiers or len(self.data) == 0)
        self.inputsBox.setEnabled(has_any_identifiers)

        if not samedomain and has_any_identifiers and not self.useidentifiers:
            self.useidentifiers = 1
        elif samedomain and not has_identifiers:
            self.useidentifiers = 0

        incremental = all(inc for _, inc in self._queue)

        if incremental:
            # Only received updated data on existing link.
            self._updateItemsets()
        else:
            # Links were removed and/or added.
            self._createItemsets()
            self._restoreHints()
            self._updateItemsets()

        del self._queue[:]

        self._createDiagram()
        if self.data:
            self.info.setText("{} data sets on input.\n".format(len(self.data)))
        else:
            self.info.setText("No data on input\n")

        self._updateInfo()
        super().handleNewSignals()

    def _invalidate(self, keys=None, incremental=True):
        """
        Invalidate input for a list of input keys.
        """
        if keys is None:
            keys = list(self.data.keys())

        self._queue.extend((key, incremental) for key in keys)

    def itemsetAttr(self, key):
        index = list(self.data.keys()).index(key)
        _, combo = self._controlAtIndex(index)
        model = combo.model()
        attr_index = combo.currentIndex()
        if attr_index >= 0:
            return model[attr_index]
        else:
            return None

    def _controlAtIndex(self, index):
        group_box = self.inputsBox.layout().itemAt(index).widget()
        combo = group_box.combo_box
        return group_box, combo

    def _setAttributes(self, index, attrs):
        box, combo = self._controlAtIndex(index)
        model = combo.model()

        if attrs is None:
            model[:] = []
            box.setEnabled(False)
        else:
            if model[:] != attrs:
                model[:] = attrs

            box.setEnabled(True)

    def _add(self, key, table):
        name = table.name
        index = len(self.data)
        attrs = source_attributes(table.domain)

        self.data[key] = _InputData(key, name, table)

        self._setAttributes(index, attrs)

        self._invalidate([key], incremental=False)

        item = self.inputsBox.layout().itemAt(index)
        box = item.widget()
        box.setTitle("Data set: {}".format(name))

    def _remove(self, key):
        index = list(self.data.keys()).index(key)

        # Clear possible warnings.
        self.warning()

        self._setAttributes(index, None)

        del self.data[key]

        layout = self.inputsBox.layout()
        item = layout.takeAt(index)
        layout.addItem(item)
        inputs = list(self.data.values())

        for i in range(5):
            box, _ = self._controlAtIndex(i)
            if i < len(inputs):
                title = "Data set: {}".format(inputs[i].name)
            else:
                title = "Data set #{}".format(i + 1)
            box.setTitle(title)

        self._invalidate([key], incremental=False)

    def _update(self, key, table):
        name = table.name
        index = list(self.data.keys()).index(key)
        attrs = source_attributes(table.domain)

        self.data[key] = self.data[key]._replace(name=name, table=table)

        self._setAttributes(index, attrs)
        self._invalidate([key])

        item = self.inputsBox.layout().itemAt(index)
        box = item.widget()
        box.setTitle("Data set: {}".format(name))

    def _itemsForInput(self, key):
        useidentifiers = self.useidentifiers or not self.samedomain

        def items_by_key(key, input):
            attr = self.itemsetAttr(key)
            if attr is not None:
                return [str(inst[attr]) for inst in input.table if not numpy.isnan(inst[attr])]
            else:
                return []

        def items_by_eq(key, input):
            return list(map(ComparableInstance, input.table))

        input = self.data[key]
        if useidentifiers:
            items = items_by_key(key, input)
        else:
            items = items_by_eq(key, input)
        return items

    def _updateItemsets(self):
        assert list(self.data.keys()) == list(self.itemsets.keys())
        for key, input in list(self.data.items()):
            items = self._itemsForInput(key)
            item = self.itemsets[key]
            item = item._replace(items=items)
            name = input.name
            if item.name != name:
                item = item._replace(name=name, title=name)
            self.itemsets[key] = item

    def _createItemsets(self):
        olditemsets = dict(self.itemsets)
        self.itemsets.clear()

        for key, input in self.data.items():
            items = self._itemsForInput(key)
            name = input.name
            if key in olditemsets and olditemsets[key].name == name:
                # Reuse the title (which might have been changed by the user)
                title = olditemsets[key].title
            else:
                title = name

            itemset = _ItemSet(key=key, name=name, title=title, items=items)
            self.itemsets[key] = itemset

    def _storeHints(self):
        if self.data:
            self.inputhints.clear()
            for i, (key, input) in enumerate(self.data.items()):
                attrs = source_attributes(input.table.domain)
                attrs = tuple(attr.name for attr in attrs)
                selected = self.itemsetAttr(key)
                if selected is not None:
                    attr_name = selected.name
                else:
                    attr_name = None
                itemset = self.itemsets[key]
                self.inputhints[(i, input.name, attrs)] = (attr_name, itemset.title)

    def _restoreHints(self):
        settings = []
        for i, (key, input) in enumerate(self.data.items()):
            attrs = source_attributes(input.table.domain)
            attrs = tuple(attr.name for attr in attrs)
            hint = self.inputhints.get((i, input.name, attrs), None)
            if hint is not None:
                attr, name = hint
                attr_ind = attrs.index(attr) if attr is not None else -1
                settings.append((attr_ind, name))
            else:
                return

        # all inputs match the stored hints
        for i, key in enumerate(self.itemsets):
            attr, itemtitle = settings[i]
            self.itemsets[key] = self.itemsets[key]._replace(title=itemtitle)
            _, cb = self._controlAtIndex(i)
            cb.setCurrentIndex(attr)

    def _createDiagram(self):
        self._updating = True

        oldselection = list(self.selection)

        self.vennwidget.clear()
        n = len(self.itemsets)
        self.disjoint = disjoint(set(s.items) for s in self.itemsets.values())

        vennitems = []
        colors = colorpalette.ColorPaletteHSV(n)

        for i, (key, item) in enumerate(self.itemsets.items()):
            count = len(set(item.items))
            count_all = len(item.items)
            if count != count_all:
                fmt = "{} <i>(all: {})</i>"
            else:
                fmt = "{}"
            counts = fmt.format(count, count_all)
            gr = VennSetItem(text=item.title, informativeText=counts)
            color = colors[i]
            color.setAlpha(100)
            gr.setBrush(QBrush(color))
            gr.setPen(QPen(Qt.NoPen))
            vennitems.append(gr)

        self.vennwidget.setItems(vennitems)

        for i, area in enumerate(self.vennwidget.vennareas()):
            area_items = list(map(str, list(self.disjoint[i])))
            if i:
                area.setText("{0}".format(len(area_items)))

            label = disjoint_set_label(i, n, simplify=False)
            head = "<h4>|{}| = {}</h4>".format(label, len(area_items))
            if len(area_items) > 32:
                items_str = ", ".join(map(escape, area_items[:32]))
                hidden = len(area_items) - 32
                tooltip = "{}<span>{}, ...</br>({} items not shown)<span>".format(head, items_str, hidden)
            elif area_items:
                tooltip = "{}<span>{}</span>".format(head, ", ".join(map(escape, area_items)))
            else:
                tooltip = head

            area.setToolTip(tooltip)

            area.setPen(QPen(QColor(10, 10, 10, 200), 1.5))
            area.setFlag(QGraphicsPathItem.ItemIsSelectable, True)
            area.setSelected(i in oldselection)

        self._updating = False
        self._on_selectionChanged()

    def _updateInfo(self):
        # Clear all warnings
        self.warning()

        if not len(self.data):
            self.info.setText("No data on input\n")
        else:
            self.info.setText("{0} data sets on input\n".format(len(self.data)))

        if self.useidentifiers:
            no_idx = [
                "#{}".format(i + 1)
                for i, key in enumerate(self.data)
                if not source_attributes(self.data[key].table.domain)
            ]
            if len(no_idx) == 1:
                self.warning("Data set {} has no suitable identifiers.".format(no_idx[0]))
            elif len(no_idx) > 1:
                self.warning(
                    "Data sets {} and {} have no suitable identifiers.".format(", ".join(no_idx[:-1]), no_idx[-1])
                )

    def _on_selectionChanged(self):
        if self._updating:
            return

        areas = self.vennwidget.vennareas()
        indices = [i for i, area in enumerate(areas) if area.isSelected()]

        self.selection = indices

        self.invalidateOutput()

    def _on_useidentifiersChanged(self):
        self.inputsBox.setEnabled(self.useidentifiers == 1)
        # Invalidate all itemsets
        self._invalidate()
        self._updateItemsets()
        self._createDiagram()

        self._updateInfo()

    def _on_inputAttrActivated(self, attr_index):
        combo = self.sender()
        # Find the input index to which the combo box belongs
        # (they are reordered when removing inputs).
        index = None
        inputs = list(self.data.items())
        for i in range(len(inputs)):
            _, c = self._controlAtIndex(i)
            if c is combo:
                index = i
                break

        assert index is not None

        key, _ = inputs[index]

        self._invalidate([key])
        self._updateItemsets()
        self._createDiagram()

    def _on_itemTextEdited(self, index, text):
        text = str(text)
        key = list(self.itemsets.keys())[index]
        self.itemsets[key] = self.itemsets[key]._replace(title=text)

    def invalidateOutput(self):
        self.commit()

    def commit(self):
        selected_subsets = []

        selected_items = reduce(set.union, [self.disjoint[index] for index in self.selection], set())

        def match(val):
            if numpy.isnan(val):
                return False
            else:
                return str(val) in selected_items

        source_var = Orange.data.StringVariable("source")
        item_id_var = Orange.data.StringVariable("item_id")

        names = [itemset.title.strip() for itemset in self.itemsets.values()]
        names = uniquify(names)

        for i, (key, input) in enumerate(self.data.items()):
            if self.useidentifiers:
                attr = self.itemsetAttr(key)
                if attr is not None:
                    mask = list(map(match, (inst[attr] for inst in input.table)))
                else:
                    mask = [False] * len(input.table)

                def instance_key(inst):
                    return str(inst[attr])

            else:
                mask = [ComparableInstance(inst) in selected_items for inst in input.table]
                _map = {item: str(i) for i, item in enumerate(selected_items)}

                def instance_key(inst):
                    return _map[ComparableInstance(inst)]

            mask = numpy.array(mask, dtype=bool)
            subset = input.table[mask]

            if len(subset) == 0:
                continue

            # add columns with source table id and set id

            if not self.output_duplicates:
                id_column = numpy.array([[instance_key(inst)] for inst in subset], dtype=object)
                source_names = numpy.array([[names[i]]] * len(subset), dtype=object)

                subset = append_column(subset, "M", source_var, source_names)
                subset = append_column(subset, "M", item_id_var, id_column)

            selected_subsets.append(subset)

        if selected_subsets and not self.output_duplicates:
            data = table_concat(selected_subsets)
            # Get all variables which are not constant between the same
            # item set
            varying = varying_between(data, [item_id_var])

            if source_var in varying:
                varying.remove(source_var)

            data = reshape_wide(data, varying, [item_id_var], [source_var])
            # remove the temporary item set id column
            data = drop_columns(data, [item_id_var])
        elif selected_subsets:
            data = table_concat(selected_subsets)
        else:
            data = None

        self.send("Selected Data", data)

    def getSettings(self, *args, **kwargs):
        self._storeHints()
        return super().getSettings(self, *args, **kwargs)

    def send_report(self):
        self.report_plot()
Exemplo n.º 40
0
class OWPieChart(widget.OWWidget):
    name = "Pie Chart"
    description = "Make fun of Pie Charts."
    icon = "icons/PieChart.svg"
    priority = 100

    class Inputs:
        data = Input("Data", Orange.data.Table)

    settingsHandler = DomainContextHandler()
    attribute = ContextSetting(None)
    split_var = ContextSetting(None)
    explode = Setting(False)
    graph_name = "scene"

    def __init__(self):
        super().__init__()
        self.dataset = None

        self.attrs = DomainModel(
            valid_types=Orange.data.DiscreteVariable, separators=False)
        cb = gui.comboBox(
            self.controlArea, self, "attribute", box=True,
            model=self.attrs, callback=self.update_scene, contentsLength=12)
        grid = QGridLayout()
        self.legend = gui.widgetBox(gui.indentedBox(cb.box), orientation=grid)
        grid.setColumnStretch(1, 1)
        grid.setHorizontalSpacing(6)
        self.legend_items = []
        self.split_vars = DomainModel(
            valid_types=Orange.data.DiscreteVariable, separators=False,
            placeholder="None", )
        gui.comboBox(
            self.controlArea, self, "split_var", box="Split by",
            model=self.split_vars, callback=self.update_scene)
        gui.checkBox(
            self.controlArea, self, "explode", "Explode pies", box=True,
            callback=self.update_scene)
        gui.rubber(self.controlArea)
        gui.widgetLabel(
            gui.hBox(self.controlArea, box=True),
            "The aim of this widget is to\n"
            "demonstrate that pie charts are\n"
            "a terrible visualization. Please\n"
            "don't use it for any other purpose.")

        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHints(
            QPainter.Antialiasing | QPainter.TextAntialiasing |
            QPainter.SmoothPixmapTransform)
        self.mainArea.layout().addWidget(self.view)
        self.mainArea.setMinimumWidth(600)

    def sizeHint(self):
        return QSize(200, 150)  # Horizontal size is regulated by mainArea

    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (
                not bool(dataset) or not len(dataset.domain)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.attribute = None
        self.split_var = None
        domain = dataset.domain if dataset is not None else None
        self.attrs.set_domain(domain)
        self.split_vars.set_domain(domain)
        if dataset is not None:
            self.select_default_variables(domain)
            self.openContext(self.dataset)
        self.update_scene()

    def select_default_variables(self, domain):
        if len(self.attrs) > len(domain.class_vars):
            first_attr = self.split_vars[len(domain.class_vars)]
        else:
            first_attr = None
        if len(self.attrs):
            self.attribute, self.split_var = self.attrs[0], first_attr
        else:
            self.attribute, self.split_var = self.split_var, None

    def update_scene(self):
        self.scene.clear()
        if self.dataset is None or self.attribute is None:
            return
        dists, labels = self.compute_box_data()
        colors = self.attribute.colors
        for x, (dist, label) in enumerate(zip(dists, labels)):
            self.pie_chart(SCALE * x, 0, 0.8 * SCALE, dist, colors)
            self.pie_label(SCALE * x, 0, label)
        self.update_legend(
            [QColor(*col) for col in colors], self.attribute.values)
        self.view.centerOn(SCALE * len(dists) / 2, 0)

    def update_legend(self, colors, labels):
        layout = self.legend.layout()
        while self.legend_items:
            w = self.legend_items.pop()
            layout.removeWidget(w)
            w.deleteLater()
        for row, (color, label) in enumerate(zip(colors, labels)):
            icon = QLabel()
            p = QPixmap(12, 12)
            p.fill(color)
            icon.setPixmap(p)
            label = QLabel(label)
            layout.addWidget(icon, row, 0)
            layout.addWidget(label, row, 1, alignment=Qt.AlignLeft)
            self.legend_items += (icon, label)

    def pie_chart(self, x, y, r, dist, colors):
        start_angle = 0
        dist = np.asarray(dist)
        spans = dist / (float(np.sum(dist)) or 1) * 360 * 16
        for span, color in zip(spans, colors):
            if not span:
                continue
            if self.explode:
                mid_ang = (start_angle + span / 2) / 360 / 16 * 2 * pi
                dx = r / 30 * cos(mid_ang)
                dy = r / 30 * sin(mid_ang)
            else:
                dx = dy = 0
            ellipse = QGraphicsEllipseItem(x - r / 2 + dx, y - r / 2 - dy, r, r)
            if len(spans) > 1:
                ellipse.setStartAngle(start_angle)
                ellipse.setSpanAngle(span)
            ellipse.setBrush(QColor(*color))
            self.scene.addItem(ellipse)
            start_angle += span

    def pie_label(self, x, y, label):
        if not label:
            return
        text = QGraphicsSimpleTextItem(label)
        for cut in range(1, len(label)):
            if text.boundingRect().width() < 0.95 * SCALE:
                break
            text = QGraphicsSimpleTextItem(label[:-cut] + "...")
        text.setPos(x - text.boundingRect().width() / 2, y + 0.5 * SCALE)
        self.scene.addItem(text)

    def compute_box_data(self):
        if self.split_var:
            return (
                contingency.get_contingency(
                    self.dataset, self.attribute, self.split_var),
                self.split_var.values)
        else:
            return [
                distribution.get_distribution(
                    self.dataset, self.attribute)], [""]

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute is not None:
            text += "Box plot for '{}' ".format(self.attribute.name)
        if self.split_var is not None:
            text += "split by '{}'".format(self.split_var.name)
        if text:
            self.report_caption(text)
Exemplo n.º 41
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    size_log_scale = settings.Setting(2)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    CLASSIFICATION, REGRESSION = range(2)

    def __init__(self):
        super().__init__()
        # Instance variables
        self.forest_type = self.CLASSIFICATION
        self.model = None
        self.forest_adapter = None
        self.dataset = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x * self.size_log_scale)),
        ]

        self.REGRESSION_COLOR_CALC = [
            ('None', lambda _, __: QColor(255, 255, 255)),
            ('Class mean', self._color_class_mean),
            ('Standard deviation', self._color_stddev),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info, label='')

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(
            box_display, self, 'depth_limit', label='Depth', ticks=False,
            callback=self.max_depth_changed)
        self.ui_target_class_combo = gui.comboBox(
            box_display, self, 'target_class_index', label='Target class',
            orientation=Qt.Horizontal, items=[], contentsLength=8,
            callback=self.target_colors_changed)
        self.ui_size_calc_combo = gui.comboBox(
            box_display, self, 'size_calc_idx', label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8,
            callback=self.size_calc_changed)
        self.ui_zoom_slider = gui.hSlider(
            box_display, self, 'zoom', label='Zoom', ticks=False, minValue=20,
            maxValue=150, callback=self.zoom_changed, createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            if isinstance(model, RandomForestClassifier):
                self.forest_type = self.CLASSIFICATION
            elif isinstance(model, RandomForestRegressor):
                self.forest_type = self.REGRESSION
            else:
                raise RuntimeError('Invalid type of forest.')

            self.forest_adapter = self._get_forest_adapter(self.model)
            self.color_palette = self._type_specific('_get_color_palette')()
            self._draw_trees()

            self.dataset = model.instances
            # this bit is important for the regression classifier
            if self.dataset is not None and \
                    self.dataset.domain != model.domain:
                self.clf_dataset = Table.from_table(
                    self.model.domain, self.dataset)
            else:
                self.clf_dataset = self.dataset

            self._update_info_box()
            self._type_specific('_update_target_class_combo')()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    # CONTROL AREA CALLBACKS
    def max_depth_changed(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def target_colors_changed(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_has_changed()

    def size_calc_changed(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self.grid.clear()
            self._draw_trees()
            # Keep the selected item
            if self.selected_tree_index != -1:
                self.grid_items[self.selected_tree_index].setSelected(True)
            self.max_depth_changed()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    # MODEL CHANGED METHODS
    def _update_info_box(self):
        self.ui_info.setText(
            'Trees: {}'.format(len(self.forest_adapter.get_trees()))
        )

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    # MODEL CLEARED METHODS
    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    # HELPFUL METHODS
    def _get_max_depth(self):
        return max([tree.tree_adapter.max_depth for tree in self.ptrees])

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    def _draw_trees(self):
        self.ui_size_calc_combo.setEnabled(False)
        self.grid_items, self.ptrees = [], []

        with self.progressBar(len(self.forest_adapter.get_trees())) as prg:
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None, tree,
                    node_color_func=self._type_specific('_get_node_color'),
                    interactive=False, padding=100)
                self.grid_items.append(GridItem(
                    ptree, self.grid, max_size=self._calculate_zoom(self.zoom)
                ))
                self.ptrees.append(ptree)
                prg.advance()
        self.grid.set_items(self.grid_items)
        # This is necessary when adding items for the first time
        if self.grid:
            width = (self.view.width() -
                     self.view.verticalScrollBar().width())
            self.grid.reflow(width)
            self.grid.setPreferredWidth(width)
        self.ui_size_calc_combo.setEnabled(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if len(self.scene.selectedItems()) == 0:
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        obj = self.model.trees[self.selected_tree_index]
        obj.instances = self.dataset
        obj.meta_target_class_index = self.target_class_index
        obj.meta_size_calc_idx = self.size_calc_idx
        obj.meta_size_log_scale = self.size_log_scale
        obj.meta_depth_limit = self.depth_limit

        self.send('Tree', obj)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)

    def _type_specific(self, method):
        """A best effort method getter that somewhat separates logic specific
        to classification and regression trees.
        This relies on conventional naming of specific methods, e.g.
        a method name _get_tooltip would need to be defined like so:
        _classification_get_tooltip and _regression_get_tooltip, since they are
        both specific.

        Parameters
        ----------
        method : str
            Method name that we would like to call.

        Returns
        -------
        callable or None

        """
        if self.forest_type == self.CLASSIFICATION:
            return getattr(self, '_classification' + method)
        elif self.forest_type == self.REGRESSION:
            return getattr(self, '_regression' + method)
        else:
            return None

    # CLASSIFICATION FOREST SPECIFIC METHODS
    def _classification_update_target_class_combo(self):
        self._clear_target_class_combo()
        self.ui_target_class_combo.addItem('None')
        values = [c.title() for c in
                  self.model.domain.class_vars[0].values]
        self.ui_target_class_combo.addItems(values)

    def _classification_get_color_palette(self):
        return [QColor(*c) for c in self.model.domain.class_var.colors]

    def _classification_get_node_color(self, adapter, tree_node):
        # this is taken almost directly from the existing classification tree
        # viewer
        colors = self.color_palette
        distribution = adapter.get_distribution(tree_node.label)[0]
        total = np.sum(distribution)

        if self.target_class_index:
            p = distribution[self.target_class_index - 1] / total
            color = colors[self.target_class_index - 1].lighter(200 - 100 * p)
        else:
            modus = np.argmax(distribution)
            p = distribution[modus] / (total or 1)
            color = colors[int(modus)].lighter(400 - 300 * p)
        return color

    # REGRESSION FOREST SPECIFIC METHODS
    def _regression_update_target_class_combo(self):
        self._clear_target_class_combo()
        self.ui_target_class_combo.addItems(
            list(zip(*self.REGRESSION_COLOR_CALC))[0])
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _regression_get_color_palette(self):
        return ContinuousPaletteGenerator(
            *self.forest_adapter.domain.class_var.colors)

    def _regression_get_node_color(self, adapter, tree_node):
        return self.REGRESSION_COLOR_CALC[self.target_class_index][1](
            adapter, tree_node
        )

    def _color_class_mean(self, adapter, tree_node):
        # calculate node colors relative to the mean of the node samples
        min_mean = np.min(self.clf_dataset.Y)
        max_mean = np.max(self.clf_dataset.Y)
        instances = adapter.get_instances_in_nodes(self.clf_dataset,
                                                   tree_node.label)
        mean = np.mean(instances.Y)

        return self.color_palette[(mean - min_mean) / (max_mean - min_mean)]

    def _color_stddev(self, adapter, tree_node):
        # calculate node colors relative to the standard deviation in the node
        # samples
        min_mean, max_mean = 0, np.std(self.clf_dataset.Y)
        instances = adapter.get_instances_in_nodes(self.clf_dataset,
                                                   tree_node.label)
        std = np.std(instances.Y)

        return self.color_palette[(std - min_mean) / (max_mean - min_mean)]
Exemplo n.º 42
0
class OWQualityControl(widget.OWWidget):
    name = "Quality Control"
    description = "Experiment quality control"
    icon = "../widgets/icons/QualityControl.svg"
    priority = 5000

    inputs = [("Experiment Data", Orange.data.Table, "set_data")]
    outputs = []

    DISTANCE_FUNCTIONS = [("Distance from Pearson correlation",
                           dist_pcorr),
                          ("Euclidean distance",
                           dist_eucl),
                          ("Distance from Spearman correlation",
                           dist_spearman)]

    settingsHandler = SetContextHandler()

    split_by_labels = settings.ContextSetting({})
    sort_by_labels = settings.ContextSetting({})

    selected_distance_index = settings.Setting(0)

    def __init__(self, parent=None):
        super().__init__(parent)

        ## Attributes
        self.data = None
        self.distances = None
        self.groups = None
        self.unique_pos = None
        self.base_group_index = 0

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info")
        self.info_box = gui.widgetLabel(box, "\n")

        ## Separate By box
        box = gui.widgetBox(self.controlArea, "Separate By")
        self.split_by_model = itemmodels.PyListModel(parent=self)
        self.split_by_view = QListView()
        self.split_by_view.setSelectionMode(QListView.ExtendedSelection)
        self.split_by_view.setModel(self.split_by_model)
        box.layout().addWidget(self.split_by_view)

        self.split_by_view.selectionModel().selectionChanged.connect(
            self.on_split_key_changed)

        ## Sort By box
        box = gui.widgetBox(self.controlArea, "Sort By")
        self.sort_by_model = itemmodels.PyListModel(parent=self)
        self.sort_by_view = QListView()
        self.sort_by_view.setSelectionMode(QListView.ExtendedSelection)
        self.sort_by_view.setModel(self.sort_by_model)
        box.layout().addWidget(self.sort_by_view)

        self.sort_by_view.selectionModel().selectionChanged.connect(
            self.on_sort_key_changed)

        ## Distance box
        box = gui.widgetBox(self.controlArea, "Distance Measure")
        gui.comboBox(box, self, "selected_distance_index",
                     items=[name for name, _ in self.DISTANCE_FUNCTIONS],
                     callback=self.on_distance_measure_changed)

        self.scene = QGraphicsScene()
        self.scene_view = QGraphicsView(self.scene)
        self.scene_view.setRenderHints(QPainter.Antialiasing)
        self.scene_view.setAlignment(Qt.AlignLeft | Qt.AlignVCenter)
        self.mainArea.layout().addWidget(self.scene_view)

        self.scene_view.installEventFilter(self)

        self._disable_updates = False
        self._cached_distances = {}
        self._base_index_hints = {}
        self.main_widget = None

        self.resize(800, 600)

    def clear(self):
        """Clear the widget state."""
        self.data = None
        self.distances = None
        self.groups = None
        self.unique_pos = None

        with disable_updates(self):
            self.split_by_model[:] = []
            self.sort_by_model[:] = []

        self.main_widget = None
        self.scene.clear()
        self.info_box.setText("\n")
        self._cached_distances = {}

    def set_data(self, data=None):
        """Set input experiment data."""
        self.closeContext()
        self.clear()

        self.error(0)
        self.warning(0)

        if data is not None:
            keys = self.get_suitable_keys(data)
            if not keys:
                self.error(0, "Data has no suitable feature labels.")
                data = None

        self.data = data
        if data is not None:
            self.on_new_data()

    def update_label_candidates(self):
        """Update the label candidates selection GUI 
        (Group/Sort By views).

        """
        keys = self.get_suitable_keys(self.data)
        with disable_updates(self):
            self.split_by_model[:] = keys
            self.sort_by_model[:] = keys

    def get_suitable_keys(self, data):
        """ Return suitable attr label keys from the data where
        the key has at least two unique values in the data.

        """
        attrs = [attr.attributes.items() for attr in data.domain.attributes]
        attrs = reduce(operator.iadd, attrs, [])
        # in case someone put non string values in attributes dict
        attrs = [(str(key), str(value)) for key, value in attrs]
        attrs = set(attrs)
        values = defaultdict(set)
        for key, value in attrs:
            values[key].add(value)
        keys = [key for key in values if len(values[key]) > 1]
        return keys

    def selected_split_by_labels(self):
        """Return the current selected split labels.
        """
        sel_m = self.split_by_view.selectionModel()
        indices = [r.row() for r in sel_m.selectedRows()]
        return [self.sort_by_model[i] for i in indices]

    def selected_sort_by_labels(self):
        """Return the current selected sort labels
        """
        sel_m = self.sort_by_view.selectionModel()
        indices = [r.row() for r in sel_m.selectedRows()]
        return [self.sort_by_model[i] for i in indices]

    def selected_distance(self):
        """Return the selected distance function.
        """
        return self.DISTANCE_FUNCTIONS[self.selected_distance_index][1]

    def selected_base_group_index(self):
        """Return the selected base group index
        """
        return self.base_group_index

    def selected_base_indices(self, base_group_index=None):
        indices = []
        for g, ind in self.groups:
            if base_group_index is None:
                label = group_label(self.selected_split_by_labels(), g)
                ind = [i for i in ind if i is not None]
                i = self._base_index_hints.get(label, ind[0] if ind else None)
            else:
                i = ind[base_group_index]
            indices.append(i)
        return indices

    def on_new_data(self):
        """We have new data and need to recompute all.
        """
        self.closeContext()

        self.update_label_candidates()
        self.info_box.setText(
            "%s genes \n%s experiments" %
            (len(self.data),  len(self.data.domain.attributes))
        )

        self.base_group_index = 0

        keys = self.get_suitable_keys(self.data)
        self.openContext(keys)

        ## Restore saved context settings (split/sort selection)
        split_by_labels = self.split_by_labels
        sort_by_labels = self.sort_by_labels

        def select(model, selection_model, selected_items):
            """Select items in a Qt item model view
            """
            all_items = list(model)
            try:
                indices = [all_items.index(item) for item in selected_items]
            except:
                indices = []
            for ind in indices:
                selection_model.select(model.index(ind),
                                       QItemSelectionModel.Select)

        with disable_updates(self):
            select(self.split_by_view.model(),
                   self.split_by_view.selectionModel(),
                   split_by_labels)

            select(self.sort_by_view.model(),
                   self.sort_by_view.selectionModel(),
                   sort_by_labels)

        with widget_disable(self):
            self.split_and_update()

    def on_split_key_changed(self, *args):
        """Split key has changed
        """
        with widget_disable(self):
            if not self._disable_updates:
                self.base_group_index = 0
                self.split_by_labels = self.selected_split_by_labels()
                self.split_and_update()

    def on_sort_key_changed(self, *args):
        """Sort key has changed
        """
        with widget_disable(self):
            if not self._disable_updates:
                self.base_group_index = 0
                self.sort_by_labels = self.selected_sort_by_labels()
                self.split_and_update()

    def on_distance_measure_changed(self):
        """Distance measure has changed
        """
        if self.data is not None:
            with widget_disable(self):
                self.update_distances()
                self.replot_experiments()

    def on_view_resize(self, size):
        """The view with the quality plot has changed
        """
        if self.main_widget:
            current = self.main_widget.size()
            self.main_widget.resize(size.width() - 6,
                                    current.height())

            self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def on_rug_item_clicked(self, item):
        """An ``item`` in the quality plot has been clicked.
        """
        update = False
        sort_by_labels = self.selected_sort_by_labels()
        if sort_by_labels and item.in_group:
            ## The item is part of the group
            if item.group_index != self.base_group_index:
                self.base_group_index = item.group_index
                update = True

        else:
            if sort_by_labels:
                # If the user clicked on an background item it
                # invalidates the sorted labels selection
                with disable_updates(self):
                    self.sort_by_view.selectionModel().clear()
                    update = True

            index = item.index
            group = item.group
            label = group_label(self.selected_split_by_labels(), group)

            if self._base_index_hints.get(label, 0) != index:
                self._base_index_hints[label] = index
                update = True

        if update:
            with widget_disable(self):
                self.split_and_update()

    def eventFilter(self, obj, event):
        if obj is self.scene_view and event.type() == QEvent.Resize:
            self.on_view_resize(event.size())
        return super().eventFilter(obj, event)

    def split_and_update(self):
        """
        Split the data based on the selected sort/split labels
        and update the quality plot.

        """
        split_labels = self.selected_split_by_labels()
        sort_labels = self.selected_sort_by_labels()

        self.warning(0)
        if not split_labels:
            self.warning(0, "No separate by label selected.")

        self.groups, self.unique_pos = \
                exp.separate_by(self.data, split_labels,
                                consider=sort_labels,
                                add_empty=True)

        self.groups = sorted(self.groups.items(),
                             key=lambda t: list(map(float_if_posible, t[0])))
        self.unique_pos = sorted(self.unique_pos.items(),
                                 key=lambda t: list(map(float_if_posible, t[0])))

        if self.groups:
            if sort_labels:
                group_base = self.selected_base_group_index()
                base_indices = self.selected_base_indices(group_base)
            else:
                base_indices = self.selected_base_indices()
            self.update_distances(base_indices)
            self.replot_experiments()

    def get_cached_distances(self, measure):
        if measure not in self._cached_distances:
            attrs = self.data.domain.attributes
            mat = numpy.zeros((len(attrs), len(attrs)))

            self._cached_distances[measure] = \
                (mat, set(zip(range(len(attrs)), range(len(attrs)))))

        return self._cached_distances[measure]

    def get_cached_distance(self, measure, i, j):
        matrix, computed = self.get_cached_distances(measure)
        key = (i, j) if i < j else (j, i)
        if key in computed:
            return matrix[i, j]
        else:
            return None

    def get_distance(self, measure, i, j):
        d = self.get_cached_distance(measure, i, j)
        if d is None:
            vec_i = take_columns(self.data, [i])
            vec_j = take_columns(self.data, [j])
            d = measure(vec_i, vec_j)

            mat, computed = self.get_cached_distances(measure)
            mat[i, j] = d
            key = key = (i, j) if i < j else (j, i)
            computed.add(key)
        return d

    def store_distance(self, measure, i, j, dist):
        matrix, computed = self.get_cached_distances(measure)
        key = (i, j) if i < j else (j, i)
        matrix[j, i] = matrix[i, j] = dist
        computed.add(key)

    def update_distances(self, base_indices=()):
        """Recompute the experiment distances.
        """
        distance = self.selected_distance()
        if base_indices == ():
            base_group_index = self.selected_base_group_index()
            base_indices = [ind[base_group_index] \
                            for _, ind in self.groups]

        assert(len(base_indices) == len(self.groups))

        base_distances = []
        attributes = self.data.domain.attributes
        pb = gui.ProgressBar(self, len(self.groups) * len(attributes))

        for (group, indices), base_index in zip(self.groups, base_indices):
            # Base column of the group
            if base_index is not None:
                base_vec = take_columns(self.data, [base_index])
                distances = []
                # Compute the distances between base column
                # and all the rest data columns.
                for i in range(len(attributes)):
                    if i == base_index:
                        distances.append(0.0)
                    elif self.get_cached_distance(distance, i, base_index) is not None:
                        distances.append(self.get_cached_distance(distance, i, base_index))
                    else:
                        vec_i = take_columns(self.data, [i])
                        dist = distance(base_vec, vec_i)
                        self.store_distance(distance, i, base_index, dist)
                        distances.append(dist)
                    pb.advance()

                base_distances.append(distances)
            else:
                base_distances.append(None)

        pb.finish()
        self.distances = base_distances

    def replot_experiments(self):
        """Replot the whole quality plot.
        """
        self.scene.clear()
        labels = []

        max_dist = numpy.nanmax(list(filter(None, self.distances)))
        rug_widgets = []

        group_pen = QPen(Qt.black)
        group_pen.setWidth(2)
        group_pen.setCapStyle(Qt.RoundCap)
        background_pen = QPen(QColor(0, 0, 250, 150))
        background_pen.setWidth(1)
        background_pen.setCapStyle(Qt.RoundCap)

        main_widget = QGraphicsWidget()
        layout = QGraphicsGridLayout()
        attributes = self.data.domain.attributes
        if self.data is not None:
            for (group, indices), dist_vec in zip(self.groups, self.distances):
                indices_set = set(indices)
                rug_items = []
                if dist_vec is not None:
                    for i, attr in enumerate(attributes):
                        # Is this a within group distance or background
                        in_group = i in indices_set
                        if in_group:
                            rug_item = ClickableRugItem(dist_vec[i] / max_dist,
                                           1.0, self.on_rug_item_clicked)
                            rug_item.setPen(group_pen)
                            tooltip = experiment_description(attr)
                            rug_item.setToolTip(tooltip)
                            rug_item.group_index = indices.index(i)
                            rug_item.setZValue(rug_item.zValue() + 1)
                        else:
                            rug_item = ClickableRugItem(dist_vec[i] / max_dist,
                                           0.85, self.on_rug_item_clicked)
                            rug_item.setPen(background_pen)
                            tooltip = experiment_description(attr)
                            rug_item.setToolTip(tooltip)

                        rug_item.group = group
                        rug_item.index = i
                        rug_item.in_group = in_group

                        rug_items.append(rug_item)

                rug_widget = RugGraphicsWidget(parent=main_widget)
                rug_widget.set_rug(rug_items)

                rug_widgets.append(rug_widget)

                label = group_label(self.selected_split_by_labels(), group)
                label_item = QGraphicsSimpleTextItem(label, main_widget)
                label_item = GraphicsSimpleTextLayoutItem(label_item, parent=layout)
                label_item.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
                labels.append(label_item)

        for i, (label, rug_w) in enumerate(zip(labels, rug_widgets)):
            layout.addItem(label, i, 0, Qt.AlignVCenter)
            layout.addItem(rug_w, i, 1)
            layout.setRowMaximumHeight(i, 30)

        main_widget.setLayout(layout)
        self.scene.addItem(main_widget)
        self.main_widget = main_widget
        self.rug_widgets = rug_widgets
        self.labels = labels
        self.on_view_resize(self.scene_view.size())
Exemplo n.º 43
0
class OWSOM(OWWidget):
    name = "Self-Organizing Map"
    description = "Computation of self-organizing map."
    icon = "icons/SOM.svg"
    keywords = ["SOM"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    settingsHandler = DomainContextHandler()
    auto_dimension = Setting(True)
    size_x = Setting(10)
    size_y = Setting(10)
    hexagonal = Setting(1)
    initialization = Setting(0)

    attr_color = ContextSetting(None)
    size_by_instances = Setting(True)
    pie_charts = Setting(False)
    selection = Setting(None, schema_only=True)

    graph_name = "view"

    _grid_pen = QPen(QBrush(QColor(224, 224, 224)), 2)
    _grid_pen.setCosmetic(True)

    OptControls = namedtuple(
        "OptControls",
        ("shape", "auto_dim", "spin_x", "spin_y", "initialization", "start"))

    class Warning(OWWidget.Warning):
        ignoring_disc_variables = Msg("SOM ignores discrete variables.")
        missing_colors = \
            Msg("Some data instances have undefined value of '{}'.")
        missing_values = \
            Msg("{} data instance{} with undefined value(s) {} not shown.")
        single_attribute = Msg("Data contains a single numeric column.")

    class Error(OWWidget.Error):
        no_numeric_variables = Msg("Data contains no numeric columns.")
        no_defined_rows = Msg("All rows contain at least one undefined value.")

    def __init__(self):
        super().__init__()
        self.__pending_selection = self.selection
        self._optimizer = None
        self._optimizer_thread = None
        self.stop_optimization = False

        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.selection = None
        self.colors = self.thresholds = self.bin_labels = None

        self._set_input_summary(None)
        self._set_output_summary(None)

        box = gui.vBox(self.controlArea, box="SOM")
        shape = gui.comboBox(box,
                             self,
                             "",
                             items=("Hexagonal grid", "Square grid"))
        shape.setCurrentIndex(1 - self.hexagonal)

        box2 = gui.indentedBox(box, 10)
        auto_dim = gui.checkBox(box2,
                                self,
                                "auto_dimension",
                                "Set dimensions automatically",
                                callback=self.on_auto_dimension_changed)
        self.manual_box = box3 = gui.hBox(box2)
        spinargs = dict(value="",
                        widget=box3,
                        master=self,
                        minv=5,
                        maxv=100,
                        step=5,
                        alignment=Qt.AlignRight)
        spin_x = gui.spin(**spinargs)
        spin_x.setValue(self.size_x)
        gui.widgetLabel(box3, "×")
        spin_y = gui.spin(**spinargs)
        spin_y.setValue(self.size_y)
        gui.rubber(box3)
        self.manual_box.setEnabled(not self.auto_dimension)

        initialization = gui.comboBox(box,
                                      self,
                                      "initialization",
                                      items=("Initialize with PCA",
                                             "Random initialization",
                                             "Replicable random"))

        start = gui.button(box,
                           self,
                           "Restart",
                           callback=self.restart_som_pressed,
                           sizePolicy=(QSizePolicy.MinimumExpanding,
                                       QSizePolicy.Fixed))

        self.opt_controls = self.OptControls(shape, auto_dim, spin_x, spin_y,
                                             initialization, start)

        box = gui.vBox(self.controlArea, "Color")
        gui.comboBox(box,
                     self,
                     "attr_color",
                     searchable=True,
                     callback=self.on_attr_color_change,
                     model=DomainModel(placeholder="(Same color)",
                                       valid_types=DomainModel.PRIMITIVE))
        gui.checkBox(box,
                     self,
                     "pie_charts",
                     label="Show pie charts",
                     callback=self.on_pie_chart_change)
        gui.checkBox(box,
                     self,
                     "size_by_instances",
                     label="Size by number of instances",
                     callback=self.on_attr_size_change)

        gui.rubber(self.controlArea)

        self.scene = QGraphicsScene(self)

        self.view = SomView(self.scene)
        self.view.setMinimumWidth(400)
        self.view.setMinimumHeight(400)
        self.view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.view.setRenderHint(QPainter.Antialiasing)
        self.view.selection_changed.connect(self.on_selection_change)
        self.view.selection_moved.connect(self.on_selection_move)
        self.view.selection_mark_changed.connect(self.on_selection_mark_change)
        self.mainArea.layout().addWidget(self.view)

        self.elements = None
        self.grid = None
        self.grid_cells = None
        self.legend = None

    @Inputs.data
    def set_data(self, data):
        def prepare_data():
            if len(cont_attrs) < len(attrs):
                self.Warning.ignoring_disc_variables()
            if len(cont_attrs) == 1:
                self.Warning.single_attribute()
            x = Table.from_table(Domain(cont_attrs), data).X
            if sp.issparse(x):
                self.data = data
                self.cont_x = x.tocsr()
            else:
                mask = np.all(np.isfinite(x), axis=1)
                if not np.any(mask):
                    self.Error.no_defined_rows()
                else:
                    if np.all(mask):
                        self.data = data
                        self.cont_x = x.copy()
                    else:
                        self.data = data[mask]
                        self.cont_x = x[mask]
                    self.cont_x -= np.min(self.cont_x, axis=0)[None, :]
                    sums = np.sum(self.cont_x, axis=0)[None, :]
                    sums[sums == 0] = 1
                    self.cont_x /= sums

        def set_warnings():
            missing = len(data) - len(self.data)
            if missing == 1:
                self.Warning.missing_values(1, "", "is")
            elif missing > 1:
                self.Warning.missing_values(missing, "s", "are")

        self.stop_optimization_and_wait()

        self.closeContext()
        self.clear()
        self.Error.clear()
        self.Warning.clear()

        if data is not None:
            attrs = data.domain.attributes
            cont_attrs = [var for var in attrs if var.is_continuous]
            if not cont_attrs:
                self.Error.no_numeric_variables()
            else:
                prepare_data()

        if self.data is not None:
            self.controls.attr_color.model().set_domain(data.domain)
            self.attr_color = data.domain.class_var
            set_warnings()

        self.openContext(self.data)
        self.set_color_bins()
        self.create_legend()
        self.recompute_dimensions()
        self._set_input_summary(data)
        self.start_som()

    def _set_input_summary(self, data):
        summary = len(data) if data else self.info.NoInput
        details = format_summary_details(data) if data else ""
        self.info.set_input_summary(summary, details)

    def _set_output_summary(self, output):
        summary = len(output) if output else self.info.NoOutput
        details = format_summary_details(output) if output else ""
        self.info.set_output_summary(summary, details)

    def clear(self):
        self.data = self.cont_x = None
        self.cells = self.member_data = None
        self.attr_color = None
        self.colors = self.thresholds = self.bin_labels = None
        if self.elements is not None:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.clear_selection()
        self.controls.attr_color.model().set_domain(None)
        self.Warning.clear()
        self.Error.clear()

    def recompute_dimensions(self):
        if not self.auto_dimension or self.cont_x is None:
            return
        dim = max(5, int(np.ceil(np.sqrt(5 * np.sqrt(self.cont_x.shape[0])))))
        self.opt_controls.spin_x.setValue(dim)
        self.opt_controls.spin_y.setValue(dim)

    def on_auto_dimension_changed(self):
        self.manual_box.setEnabled(not self.auto_dimension)
        if self.auto_dimension:
            self.recompute_dimensions()
        else:
            spin_x = self.opt_controls.spin_x
            spin_y = self.opt_controls.spin_y
            dimx = int(5 * np.round(spin_x.value() / 5))
            dimy = int(5 * np.round(spin_y.value() / 5))
            spin_x.setValue(dimx)
            spin_y.setValue(dimy)

    def on_attr_color_change(self):
        self.controls.pie_charts.setEnabled(self.attr_color is not None)
        self.set_color_bins()
        self.create_legend()
        self.rescale()
        self._redraw()

    def on_attr_size_change(self):
        self._redraw()

    def on_pie_chart_change(self):
        self._redraw()

    def clear_selection(self):
        self.selection = None
        self.redraw_selection()

    def on_selection_change(self, selection, action=SomView.SelectionSet):
        if self.data is None:  # clicks on empty canvas
            return
        if self.selection is None:
            self.selection = np.zeros(self.grid_cells.T.shape, dtype=np.int16)
        if action == SomView.SelectionSet:
            self.selection[:] = 0
            self.selection[selection] = 1
        elif action == SomView.SelectionAddToGroup:
            self.selection[selection] = max(1, np.max(self.selection))
        elif action == SomView.SelectionNewGroup:
            self.selection[selection] = 1 + np.max(self.selection)
        elif action & SomView.SelectionRemove:
            self.selection[selection] = 0
        self.redraw_selection()
        self.update_output()

    def on_selection_move(self, event: QKeyEvent):
        if self.selection is None or not np.any(self.selection):
            if event.key() in (Qt.Key_Right, Qt.Key_Down):
                x = y = 0
            else:
                x = self.size_x - 1
                y = self.size_y - 1
        else:
            x, y = np.nonzero(self.selection)
            if len(x) > 1:
                return
            if event.key() == Qt.Key_Up and y > 0:
                y -= 1
            if event.key() == Qt.Key_Down and y < self.size_y - 1:
                y += 1
            if event.key() == Qt.Key_Left and x:
                x -= 1
            if event.key() == Qt.Key_Right and x < self.size_x - 1:
                x += 1
            x -= self.hexagonal and x == self.size_x - 1 and y % 2

        if self.selection is not None and self.selection[x, y]:
            return
        selection = np.zeros(self.grid_cells.shape, dtype=bool)
        selection[x, y] = True
        self.on_selection_change(selection)

    def on_selection_mark_change(self, marks):
        self.redraw_selection(marks=marks)

    def redraw_selection(self, marks=None):
        if self.grid_cells is None:
            return

        sel_pen = QPen(QBrush(QColor(128, 128, 128)), 2)
        sel_pen.setCosmetic(True)
        mark_pen = QPen(QBrush(QColor(128, 128, 128)), 4)
        mark_pen.setCosmetic(True)
        pens = [self._grid_pen, sel_pen]

        mark_brush = QBrush(QColor(224, 255, 255))
        sels = self.selection is not None and np.max(self.selection)
        palette = LimitedDiscretePalette(number_of_colors=sels + 1)
        brushes = [QBrush(Qt.NoBrush)] + \
                  [QBrush(palette[i].lighter(165)) for i in range(sels)]

        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                cell = self.grid_cells[y, x]
                marked = marks is not None and marks[x, y]
                sel_group = self.selection is not None and self.selection[x, y]
                if marked:
                    cell.setBrush(mark_brush)
                    cell.setPen(mark_pen)
                else:
                    cell.setBrush(brushes[sel_group])
                    cell.setPen(pens[bool(sel_group)])
                cell.setZValue(marked or sel_group)

    def restart_som_pressed(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
            self._optimizer.stop_optimization = True
        else:
            self.start_som()

    def start_som(self):
        self.read_controls()
        self.update_layout()
        self.clear_selection()
        if self.cont_x is not None:
            self.enable_controls(False)
            self._recompute_som()
        else:
            self.update_output()

    def read_controls(self):
        c = self.opt_controls
        self.hexagonal = c.shape.currentIndex() == 0
        self.size_x = c.spin_x.value()
        self.size_y = c.spin_y.value()

    def enable_controls(self, enable):
        c = self.opt_controls
        c.shape.setEnabled(enable)
        c.auto_dim.setEnabled(enable)
        c.start.setText("Start" if enable else "Stop")

    def update_layout(self):
        self.set_legend_pos()
        if self.elements:  # Prevent having redrawn grid but with old elements
            self.scene.removeItem(self.elements)
            self.elements = None
        self.redraw_grid()
        self.rescale()

    def _redraw(self):
        self.Warning.missing_colors.clear()
        if self.elements:
            self.scene.removeItem(self.elements)
            self.elements = None
        self.view.set_dimensions(self.size_x, self.size_y, self.hexagonal)

        if self.cells is None:
            return
        sizes = self.cells[:, :, 1] - self.cells[:, :, 0]
        sizes = sizes.astype(float)
        if not self.size_by_instances:
            sizes[sizes != 0] = 0.8
        else:
            sizes *= 0.8 / np.max(sizes)

        self.elements = QGraphicsItemGroup()
        self.scene.addItem(self.elements)
        if self.attr_color is None:
            self._draw_same_color(sizes)
        elif self.pie_charts:
            self._draw_pie_charts(sizes)
        else:
            self._draw_colored_circles(sizes)

    @property
    def _grid_factors(self):
        return (0.5, sqrt3_2) if self.hexagonal else (0, 1)

    def _draw_same_color(self, sizes):
        fx, fy = self._grid_factors
        color = QColor(64, 64, 64)
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                n = len(self.get_member_indices(x, y))
                if not r:
                    continue
                ellipse = ColoredCircle(r / 2, color, 0)
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(f"{n} instances")
                self.elements.addToGroup(ellipse)

    def _get_color_column(self):
        color_column = \
            self.data.get_column_view(self.attr_color)[0].astype(float,
                                                                 copy=False)
        if self.attr_color.is_discrete:
            with np.errstate(invalid="ignore"):
                int_col = color_column.astype(int)
            int_col[np.isnan(color_column)] = len(self.colors)
        else:
            int_col = np.zeros(len(color_column), dtype=int)
            # The following line is unnecessary because rows with missing
            # numeric data are excluded. Uncomment it if you change SOM to
            # tolerate missing values.
            # int_col[np.isnan(color_column)] = len(self.colors)
            for i, thresh in enumerate(self.thresholds, start=1):
                int_col[color_column >= thresh] = i
        return int_col

    def _tooltip(self, colors, distribution):
        if self.attr_color.is_discrete:
            values = self.attr_color.values
        else:
            values = self._bin_names()
        tot = np.sum(distribution)
        nbhp = "\N{NON-BREAKING HYPHEN}"
        return '<table style="white-space: nowrap">' + "".join(f"""
            <tr>
                <td>
                    <font color={color.name()}>■</font>
                    <b>{escape(val).replace("-", nbhp)}</b>:
                </td>
                <td>
                    {n} ({n / tot * 100:.1f}&nbsp;%)
                </td>
            </tr>
            """ for color, val, n in zip(colors, values, distribution) if n) \
            + "</table>"

    def _draw_pie_charts(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        colors = self.colors.qcolors_w_nan
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    self.grid_cells[y, x].setToolTip("")
                    continue
                members = self.get_member_indices(x, y)
                color_dist = np.bincount(color_column[members],
                                         minlength=len(colors))
                rel_color_dist = color_dist.astype(float) / len(members)
                pie = PieChart(rel_color_dist, r / 2, colors)
                pie.setToolTip(self._tooltip(colors, color_dist))
                self.elements.addToGroup(pie)
                pie.setPos(x + (y % 2) * fx, y * fy)

    def _draw_colored_circles(self, sizes):
        fx, fy = self._grid_factors
        color_column = self._get_color_column()
        qcolors = self.colors.qcolors_w_nan
        for y in range(self.size_y):
            for x in range(self.size_x - self.hexagonal * (y % 2)):
                r = sizes[x, y]
                if not r:
                    continue
                members = self.get_member_indices(x, y)
                color_dist = color_column[members]
                color_dist = color_dist[color_dist < len(self.colors)]
                if len(color_dist) != len(members):
                    self.Warning.missing_colors(self.attr_color.name)
                bc = np.bincount(color_dist, minlength=len(self.colors))
                color = qcolors[np.argmax(bc)]
                ellipse = ColoredCircle(r / 2, color,
                                        np.max(bc) / len(members))
                ellipse.setPos(x + (y % 2) * fx, y * fy)
                ellipse.setToolTip(self._tooltip(qcolors, bc))
                self.elements.addToGroup(ellipse)

    def redraw_grid(self):
        if self.grid is not None:
            self.scene.removeItem(self.grid)
        self.grid = QGraphicsItemGroup()
        self.grid.setZValue(-200)
        self.grid_cells = np.full((self.size_y, self.size_x), None)
        for y in range(self.size_y):
            for x in range(self.size_x - (y % 2) * self.hexagonal):
                if self.hexagonal:
                    cell = QGraphicsPathItem(_hexagon_path)
                    cell.setPos(x + (y % 2) / 2, y * sqrt3_2)
                else:
                    cell = QGraphicsRectItem(x - 0.5, y - 0.5, 1, 1)
                self.grid_cells[y, x] = cell
                cell.setPen(self._grid_pen)
                self.grid.addToGroup(cell)
        self.scene.addItem(self.grid)

    def get_member_indices(self, x, y):
        i, j = self.cells[x, y]
        return self.member_data[i:j]

    def _recompute_som(self):
        if self.cont_x is None:
            return

        som = SOM(self.size_x,
                  self.size_y,
                  hexagonal=self.hexagonal,
                  pca_init=self.initialization == 0,
                  random_seed=0 if self.initialization == 2 else None)

        class Optimizer(QObject):
            update = Signal(float, np.ndarray, np.ndarray)
            done = Signal(SOM)
            stopped = Signal()
            stop_optimization = False

            def __init__(self, data, som):
                super().__init__()
                self.som = som
                self.data = data

            def callback(self, progress):
                self.update.emit(progress, self.som.weights.copy(),
                                 self.som.ssum_weights.copy())
                return not self.stop_optimization

            def run(self):
                try:
                    self.som.fit(self.data,
                                 N_ITERATIONS,
                                 callback=self.callback)
                    # Report an exception, but still remove the thread
                finally:
                    self.done.emit(self.som)
                    self.stopped.emit()

        def thread_finished():
            self._optimizer = None
            self._optimizer_thread = None

        self.progressBarInit()

        self._optimizer = Optimizer(self.cont_x, som)
        self._optimizer_thread = QThread()
        self._optimizer_thread.setStackSize(5 * 2**20)
        self._optimizer.update.connect(self.__update)
        self._optimizer.done.connect(self.__done)
        self._optimizer.stopped.connect(self._optimizer_thread.quit)
        self._optimizer.moveToThread(self._optimizer_thread)
        self._optimizer_thread.started.connect(self._optimizer.run)
        self._optimizer_thread.finished.connect(thread_finished)
        self.stop_optimization = False
        self._optimizer_thread.start()

    @Slot(float, object, object)
    def __update(self, _progress, weights, ssum_weights):
        self.progressBarSet(_progress)
        self._assign_instances(weights, ssum_weights)
        self._redraw()

    @Slot(object)
    def __done(self, som):
        self.enable_controls(True)
        self.progressBarFinished()
        self._assign_instances(som.weights, som.ssum_weights)
        self._redraw()
        # This is the first time we know what was selected (assuming that
        # initialization is not set to random)
        if self.__pending_selection is not None:
            self.on_selection_change(self.__pending_selection)
            self.__pending_selection = None
        self.update_output()

    def stop_optimization_and_wait(self):
        if self._optimizer_thread is not None:
            self.stop_optimization = True
            self._optimizer.stop_optimization = True
            self._optimizer_thread.quit()
            self._optimizer_thread.wait()
            self._optimizer_thread = None

    def onDeleteWidget(self):
        self.stop_optimization_and_wait()
        self.clear()
        super().onDeleteWidget()

    def _assign_instances(self, weights, ssum_weights):
        if self.cont_x is None:
            return  # the widget is shutting down while signals still processed
        assignments = SOM.winner_from_weights(self.cont_x, weights,
                                              ssum_weights, self.hexagonal)
        members = defaultdict(list)
        for i, (x, y) in enumerate(assignments):
            members[(x, y)].append(i)
        members.pop(None, None)
        self.cells = np.empty((self.size_x, self.size_y, 2), dtype=int)
        self.member_data = np.empty(self.cont_x.shape[0], dtype=int)
        index = 0
        for x in range(self.size_x):
            for y in range(self.size_y):
                nmembers = len(members[(x, y)])
                self.member_data[index:index + nmembers] = members[(x, y)]
                self.cells[x, y] = [index, index + nmembers]
                index += nmembers

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.create_legend()  # re-wrap lines if necessary
        self.rescale()

    def rescale(self):
        if self.legend:
            leg_height = self.legend.boundingRect().height()
            leg_extra = 1.5
        else:
            leg_height = 0
            leg_extra = 1

        vw, vh = self.view.width(), self.view.height() - leg_height
        scale = min(vw / (self.size_x + 1),
                    vh / ((self.size_y + leg_extra) * self._grid_factors[1]))
        self.view.setTransform(QTransform.fromScale(scale, scale))
        if self.hexagonal:
            self.view.setSceneRect(0, -1, self.size_x - 1,
                                   (self.size_y + leg_extra) * sqrt3_2 +
                                   leg_height / scale)
        else:
            self.view.setSceneRect(-0.25, -0.25, self.size_x - 0.5,
                                   self.size_y - 0.5 + leg_height / scale)

    def update_output(self):
        if self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self._set_output_summary(None)
            return

        indices = np.zeros(len(self.data), dtype=int)
        if self.selection is not None and np.any(self.selection):
            for y in range(self.size_y):
                for x in range(self.size_x):
                    rows = self.get_member_indices(x, y)
                    indices[rows] = self.selection[x, y]

        if np.any(indices):
            sel_data = create_groups_table(self.data, indices, False, "Group")
            self.Outputs.selected_data.send(sel_data)
            self._set_output_summary(sel_data)
        else:
            self.Outputs.selected_data.send(None)
            self._set_output_summary(None)

        if np.max(indices) > 1:
            annotated = create_groups_table(self.data, indices)
        else:
            annotated = create_annotated_table(self.data,
                                               np.flatnonzero(indices))
        self.Outputs.annotated_data.send(annotated)

    def set_color_bins(self):
        if self.attr_color is None:
            self.thresholds = self.bin_labels = self.colors = None
        elif self.attr_color.is_discrete:
            self.thresholds = self.bin_labels = None
            self.colors = self.attr_color.palette
        else:
            col = self.data.get_column_view(self.attr_color)[0].astype(float)
            if self.attr_color.is_time:
                binning = time_binnings(col, min_bins=4)[-1]
            else:
                binning = decimal_binnings(col, min_bins=4)[-1]
            self.thresholds = binning.thresholds[1:-1]
            self.bin_labels = (binning.labels[1:-1],
                               binning.short_labels[1:-1])
            palette = BinnedContinuousPalette.from_palette(
                self.attr_color.palette, binning.thresholds)
            self.colors = palette

    def create_legend(self):
        if self.legend is not None:
            self.scene.removeItem(self.legend)
            self.legend = None
        if self.attr_color is None:
            return

        if self.attr_color.is_discrete:
            names = self.attr_color.values
        else:
            names = self._bin_names()

        items = []
        size = 8
        for name, color in zip(names, self.colors.qcolors):
            item = QGraphicsItemGroup()
            item.addToGroup(
                CanvasRectangle(None, -size / 2, -size / 2, size, size,
                                Qt.gray, color))
            item.addToGroup(CanvasText(None, name, size, 0, Qt.AlignVCenter))
            items.append(item)

        self.legend = wrap_legend_items(items,
                                        hspacing=20,
                                        vspacing=16 + size,
                                        max_width=self.view.width() - 25)
        self.legend.setFlags(self.legend.ItemIgnoresTransformations)
        self.legend.setTransform(
            QTransform.fromTranslate(-self.legend.boundingRect().width() / 2,
                                     0))
        self.scene.addItem(self.legend)
        self.set_legend_pos()

    def _bin_names(self):
        labels, short_labels = self.bin_labels
        return \
            [f"< {labels[0]}"] \
            + [f"{x} - {y}" for x, y in zip(labels, short_labels[1:])] \
            + [f"≥ {labels[-1]}"]

    def set_legend_pos(self):
        if self.legend is None:
            return
        self.legend.setPos(self.size_x / 2,
                           (self.size_y + 0.2 + 0.3 * self.hexagonal) *
                           self._grid_factors[1])

    def send_report(self):
        self.report_plot()
        if self.attr_color:
            self.report_caption(
                f"Self-organizing map colored by '{self.attr_color.name}'")
Exemplo n.º 44
0
class OWSieveDiagram(OWWidget):
    name = "Sieve Diagram"
    description = "Visualize the observed and expected frequencies " \
                  "for a combination of values."
    icon = "icons/SieveDiagram.svg"
    priority = 200

    class Inputs:
        data = Input("Data", Table, default=True)
        features = Input("Features", AttributeList)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    graph_name = "canvas"

    want_control_area = False

    settings_version = 1
    settingsHandler = DomainContextHandler()
    attr_x = ContextSetting(None)
    attr_y = ContextSetting(None)
    selection = ContextSetting(set())

    def __init__(self):
        # pylint: disable=missing-docstring
        super().__init__()

        self.data = self.discrete_data = None
        self.attrs = []
        self.input_features = None
        self.areas = []
        self.selection = set()

        self.attr_box = gui.hBox(self.mainArea)
        self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE)
        combo_args = dict(
            widget=self.attr_box, master=self, contentsLength=12,
            callback=self.update_attr, sendSelectedValue=True, valueType=str,
            model=self.domain_model)
        fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed)
        gui.comboBox(value="attr_x", **combo_args)
        gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size)
        gui.comboBox(value="attr_y", **combo_args)
        self.vizrank, self.vizrank_button = SieveRank.add_vizrank(
            self.attr_box, self, "Score Combinations", self.set_attr)
        self.vizrank_button.setSizePolicy(*fixed_size)

        self.canvas = QGraphicsScene()
        self.canvasView = ViewWithPress(
            self.canvas, self.mainArea, handler=self.reset_selection)
        self.mainArea.layout().addWidget(self.canvasView)
        self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)

    def sizeHint(self):
        return QSize(450, 550)

    def resizeEvent(self, event):
        super().resizeEvent(event)
        self.update_graph()

    def showEvent(self, event):
        super().showEvent(event)
        self.update_graph()

    @classmethod
    def migrate_context(cls, context, version):
        if not version:
            settings.rename_setting(context, "attrX", "attr_x")
            settings.rename_setting(context, "attrY", "attr_y")
            settings.migrate_str_to_variable(context)

    @Inputs.data
    def set_data(self, data):
        """
        Discretize continuous attributes, and put all attributes and discrete
        metas into self.attrs.

        Select the first two attributes unless context overrides this.
        Method `resolve_shown_attributes` is called to use the attributes from
        the input, if it exists and matches the attributes in the data.

        Remove selection; again let the context override this.
        Initialize the vizrank dialog, but don't show it.

        Args:
            data (Table): input data
        """
        if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.areas = []
        self.selection = set()
        if self.data is None:
            self.attrs[:] = []
            self.domain_model.set_domain(None)
            self.discrete_data = None
        else:
            self.domain_model.set_domain(data.domain)
        self.attrs = [x for x in self.domain_model if isinstance(x, Variable)]
        if self.attrs:
            self.attr_x = self.attrs[0]
            self.attr_y = self.attrs[len(self.attrs) > 1]
        else:
            self.attr_x = self.attr_y = None
            self.areas = []
            self.selection = set()
        self.openContext(self.data)
        if self.data:
            self.discrete_data = self.sparse_to_dense(data, True)
        self.resolve_shown_attributes()
        self.update_graph()
        self.update_selection()

        self.vizrank.initialize()
        self.vizrank_button.setEnabled(
            self.data is not None and len(self.data) > 1 and
            len(self.data.domain.attributes) > 1 and not self.data.is_sparse())

    def set_attr(self, attr_x, attr_y):
        self.attr_x, self.attr_y = attr_x, attr_y
        self.update_attr()

    def update_attr(self):
        """Update the graph and selection."""
        self.selection = set()
        self.discrete_data = self.sparse_to_dense(self.data)
        self.update_graph()
        self.update_selection()

    def sparse_to_dense(self, data, init=False):
        """
        Extracts two selected columns from sparse matrix.
        GH-2260
        """
        def discretizer(data):
            if any(attr.is_continuous for attr in chain(data.domain.variables, data.domain.metas)):
                discretize = Discretize(
                    method=EqualFreq(n=4), remove_const=False,
                    discretize_classes=True, discretize_metas=True)
                return discretize(data).to_dense()
            return data

        if not data.is_sparse() and not init:
            return self.discrete_data
        if data.is_sparse():
            attrs = {self.attr_x,
                     self.attr_y}
            new_domain = data.domain.select_columns(attrs)
            data = Table.from_table(new_domain, data)
        return discretizer(data)

    @Inputs.features
    def set_input_features(self, attr_list):
        """
        Handler for the Features signal.

        The method stores the attributes and calls `resolve_shown_attributes`

        Args:
            attr_list (AttributeList): data from the signal
        """
        self.input_features = attr_list
        self.resolve_shown_attributes()
        self.update_selection()

    def resolve_shown_attributes(self):
        """
        Use the attributes from the input signal if the signal is present
        and at least two attributes appear in the domain. If there are
        multiple, use the first two. Combos are disabled if inputs are used.
        """
        self.warning()
        self.attr_box.setEnabled(True)
        if not self.input_features:  # None or empty
            return
        features = [f for f in self.input_features if f in self.domain_model]
        if not features:
            self.warning(
                "Features from the input signal are not present in the data")
            return
        old_attrs = self.attr_x, self.attr_y
        self.attr_x, self.attr_y = [f for f in (features * 2)[:2]]
        self.attr_box.setEnabled(False)
        if (self.attr_x, self.attr_y) != old_attrs:
            self.selection = set()
            self.update_graph()

    def reset_selection(self):
        self.selection = set()
        self.update_selection()

    def select_area(self, area, event):
        """
        Add or remove the clicked area from the selection

        Args:
            area (QRect): the area that is clicked
            event (QEvent): event description
        """
        if event.button() != Qt.LeftButton:
            return
        index = self.areas.index(area)
        if event.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection()

    def update_selection(self):
        """
        Update the graph (pen width) to show the current selection.
        Filter and output the data.
        """
        if self.areas is None or not self.selection:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(create_annotated_table(self.data, []))
            return

        filts = []
        for i, area in enumerate(self.areas):
            if i in self.selection:
                width = 4
                val_x, val_y = area.value_pair
                filts.append(
                    filter.Values([
                        filter.FilterDiscrete(self.attr_x.name, [val_x]),
                        filter.FilterDiscrete(self.attr_y.name, [val_y])
                    ]))
            else:
                width = 1
            pen = area.pen()
            pen.setWidth(width)
            area.setPen(pen)
        if len(filts) == 1:
            filts = filts[0]
        else:
            filts = filter.Values(filts, conjunction=False)
        selection = filts(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.Outputs.selected_data.send(selection)
        self.Outputs.annotated_data.send(create_annotated_table(self.data, sel_idx))

    def update_graph(self):
        # Function uses weird names like r, g, b, but it does it with utmost
        # caution, hence
        # pylint: disable=invalid-name
        """Update the graph."""

        def text(txt, *args, **kwargs):
            return CanvasText(self.canvas, "", html_text=to_html(txt),
                              *args, **kwargs)

        def width(txt):
            return text(txt, 0, 0, show=False).boundingRect().width()

        def fmt(val):
            return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val)

        def show_pearson(rect, pearson, pen_width):
            """
            Color the given rectangle according to its corresponding
            standardized Pearson residual.

            Args:
                rect (QRect): the rectangle being drawn
                pearson (float): signed standardized pearson residual
                pen_width (int): pen width (bolder pen is used for selection)
            """
            r = rect.rect()
            x, y, w, h = r.x(), r.y(), r.width(), r.height()
            if w == 0 or h == 0:
                return

            r = b = 255
            if pearson > 0:
                r = g = max(255 - 20 * pearson, 55)
            elif pearson < 0:
                b = g = max(255 + 20 * pearson, 55)
            else:
                r = g = b = 224
            rect.setBrush(QBrush(QColor(r, g, b)))
            pen_color = QColor(255 * (r == 255), 255 * (g == 255),
                               255 * (b == 255))
            pen = QPen(pen_color, pen_width)
            rect.setPen(pen)
            if pearson > 0:
                pearson = min(pearson, 10)
                dist = 20 - 1.6 * pearson
            else:
                pearson = max(pearson, -10)
                dist = 20 - 8 * pearson
            pen.setWidth(1)

            def _offseted_line(ax, ay):
                r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w),
                                      y + (ay or h))
                self.canvas.addItem(r)
                r.setPen(pen)

            ax = dist
            while ax < w:
                _offseted_line(ax, 0)
                ax += dist

            ay = dist
            while ay < h:
                _offseted_line(0, ay)
                ay += dist

        def make_tooltip():
            """Create the tooltip. The function uses local variables from
            the enclosing scope."""
            # pylint: disable=undefined-loop-variable
            def _oper(attr, txt):
                if self.data.domain[attr.name] is ddomain[attr.name]:
                    return "="
                return " " if txt[0] in "<≥" else " in "

            return (
                "<b>{attr_x}{xeq}{xval_name}</b>: {obs_x}/{n} ({p_x:.0f} %)".
                format(attr_x=to_html(attr_x.name),
                       xeq=_oper(attr_x, xval_name),
                       xval_name=to_html(xval_name),
                       obs_x=fmt(chi.probs_x[x] * n),
                       n=int(n),
                       p_x=100 * chi.probs_x[x]) +
                "<br/>" +
                "<b>{attr_y}{yeq}{yval_name}</b>: {obs_y}/{n} ({p_y:.0f} %)".
                format(attr_y=to_html(attr_y.name),
                       yeq=_oper(attr_y, yval_name),
                       yval_name=to_html(yval_name),
                       obs_y=fmt(chi.probs_y[y] * n),
                       n=int(n),
                       p_y=100 * chi.probs_y[y]) +
                "<hr/>" +
                """<b>combination of values: </b><br/>
                   &nbsp;&nbsp;&nbsp;expected {exp} ({p_exp:.0f} %)<br/>
                   &nbsp;&nbsp;&nbsp;observed {obs} ({p_obs:.0f} %)""".
                format(exp=fmt(chi.expected[y, x]),
                       p_exp=100 * chi.expected[y, x] / n,
                       obs=fmt(chi.observed[y, x]),
                       p_obs=100 * chi.observed[y, x] / n))

        for item in self.canvas.items():
            self.canvas.removeItem(item)
        if self.data is None or len(self.data) == 0 or \
                self.attr_x is None or self.attr_y is None:
            return

        ddomain = self.discrete_data.domain
        attr_x, attr_y = self.attr_x, self.attr_y
        disc_x, disc_y = ddomain[attr_x.name], ddomain[attr_y.name]
        view = self.canvasView

        chi = ChiSqStats(self.discrete_data, disc_x, disc_y)
        max_ylabel_w = max((width(val) for val in disc_y.values), default=0)
        max_ylabel_w = min(max_ylabel_w, 200)
        x_off = width(attr_x.name) + max_ylabel_w
        y_off = 15
        square_size = min(view.width() - x_off - 35, view.height() - y_off - 80)
        square_size = max(square_size, 10)
        self.canvasView.setSceneRect(0, 0, view.width(), view.height())
        if not disc_x.values or not disc_y.values:
            text_ = "Features {} and {} have no values".format(disc_x, disc_y) \
                if not disc_x.values and \
                   not disc_y.values and \
                          disc_x != disc_y \
                else \
                    "Feature {} has no values".format(
                        disc_x if not disc_x.values else disc_y)
            text(text_, view.width() / 2 + 70, view.height() / 2,
                 Qt.AlignRight | Qt.AlignVCenter)
            return
        n = chi.n
        curr_x = x_off
        max_xlabel_h = 0
        self.areas = []
        for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)):
            if px == 0:
                continue
            width = square_size * px

            curr_y = y_off
            for y in range(len(chi.probs_y) - 1, -1, -1):  # bottom-up order
                py = chi.probs_y[y]
                yval_name = disc_y.values[y]
                if py == 0:
                    continue
                height = square_size * py

                selected = len(self.areas) in self.selection
                rect = CanvasRectangle(
                    self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4,
                    z=-10, onclick=self.select_area)
                rect.value_pair = x, y
                self.areas.append(rect)
                show_pearson(rect, chi.residuals[y, x], 3 * selected)
                rect.setToolTip(make_tooltip())

                if x == 0:
                    text(yval_name, x_off, curr_y + height / 2,
                         Qt.AlignRight | Qt.AlignVCenter)
                curr_y += height

            xl = text(xval_name, curr_x + width / 2, y_off + square_size,
                      Qt.AlignHCenter | Qt.AlignTop)
            max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h)
            curr_x += width

        bottom = y_off + square_size + max_xlabel_h
        text(attr_y.name, 0, y_off + square_size / 2,
             Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True)
        text(attr_x.name, x_off + square_size / 2, bottom,
             Qt.AlignHCenter | Qt.AlignTop, bold=True)
        bottom += 30
        xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p),
                  0, bottom)
        # Assume similar height for both lines
        text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height())

    def get_widget_name_extension(self):
        if self.data is not None:
            return "{} vs {}".format(self.attr_x.name, self.attr_y.name)

    def send_report(self):
        self.report_plot()
Exemplo n.º 45
0
class OWExplainPrediction(OWWidget, ConcurrentWidgetMixin):
    name = "Explain Prediction"
    description = "Prediction explanation widget."
    icon = "icons/ExplainPred.svg"
    priority = 110

    class Inputs:
        model = Input("Model", Model)
        background_data = Input("Background Data", Table)
        data = Input("Data", Table)

    class Outputs:
        scores = Output("Scores", Table)

    class Error(OWWidget.Error):
        domain_transform_err = Msg("{}")
        unknown_err = Msg("{}")

    class Information(OWWidget.Information):
        multiple_instances = Msg("Explaining prediction for the first "
                                 "instance in 'Data'.")

    settingsHandler = ClassValuesContextHandler()
    target_index = ContextSetting(0)
    stripe_len = Setting(10)

    graph_name = "scene"

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)
        self.__results = None  # type: Optional[Results]
        self.model = None  # type: Optional[Model]
        self.background_data = None  # type: Optional[Table]
        self.data = None  # type: Optional[Table]
        self._stripe_plot = None  # type: Optional[StripePlot]
        self.mo_info = ""
        self.bv_info = ""
        self.setup_gui()

    def setup_gui(self):
        self._add_controls()
        self._add_plot()
        self.info.set_input_summary(self.info.NoInput)

    def _add_plot(self):
        self.scene = QGraphicsScene()
        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setAlignment(Qt.AlignVCenter | Qt.AlignLeft)
        self.mainArea.layout().addWidget(self.view)

    def _add_controls(self):
        box = gui.vBox(self.controlArea, "Target class")
        self._target_combo = gui.comboBox(box,
                                          self,
                                          "target_index",
                                          callback=self.__target_combo_changed,
                                          contentsLength=12)

        box = gui.hBox(self.controlArea, "Zoom")
        gui.hSlider(box,
                    self,
                    "stripe_len",
                    None,
                    minValue=1,
                    maxValue=500,
                    createLabel=False,
                    callback=self.__size_slider_changed)

        gui.rubber(self.controlArea)

        box = gui.vBox(self.controlArea, "Prediction info")
        gui.label(box, self, "%(mo_info)s")  # type: QLabel
        bv_label = gui.label(box, self, "%(bv_info)s")  # type: QLabel
        bv_label.setToolTip("The average prediction for selected class.")

    def __target_combo_changed(self):
        self.update_scene()

    def __size_slider_changed(self):
        if self._stripe_plot is not None:
            self._stripe_plot.set_height(self.stripe_len)

    @Inputs.data
    @check_sql_input
    def set_data(self, data: Optional[Table]):
        self.data = data

    @Inputs.background_data
    @check_sql_input
    def set_background_data(self, data: Optional[Table]):
        self.background_data = data

    @Inputs.model
    def set_model(self, model: Optional[Model]):
        self.closeContext()
        self.model = model
        self.setup_controls()
        self.openContext(self.model.domain.class_var if self.model else None)

    def setup_controls(self):
        self._target_combo.clear()
        self._target_combo.setEnabled(True)
        if self.model is not None:
            if self.model.domain.has_discrete_class:
                self._target_combo.addItems(self.model.domain.class_var.values)
                self.target_index = 0
            elif self.model.domain.has_continuous_class:
                self.target_index = -1
                self._target_combo.setEnabled(False)
            else:
                raise NotImplementedError

    def handleNewSignals(self):
        self.clear()
        self.check_inputs()
        data = self.data and self.data[:1]
        self.start(run, data, self.background_data, self.model)

    def clear(self):
        self.mo_info = ""
        self.bv_info = ""
        self.__results = None
        self.cancel()
        self.clear_scene()
        self.clear_messages()

    def check_inputs(self):
        if self.data and len(self.data) > 1:
            self.Information.multiple_instances()

        summary, details, kwargs = self.info.NoInput, "", {}
        if self.data or self.background_data:
            n_data = len(self.data) if self.data else 0
            n_background_data = len(self.background_data) \
                if self.background_data else 0
            summary = f"{self.info.format_number(n_background_data)}, " \
                      f"{self.info.format_number(n_data)}"
            kwargs = {"format": Qt.RichText}
            details = format_multiple_summaries([("Background data",
                                                  self.background_data),
                                                 ("Data", self.data)])
        self.info.set_input_summary(summary, details, **kwargs)

    def clear_scene(self):
        self.scene.clear()
        self.scene.setSceneRect(QRectF())
        self.view.setSceneRect(QRectF())
        self._stripe_plot = None

    def update_scene(self):
        self.clear_scene()
        self.mo_info = ""
        self.bv_info = ""
        scores = None
        if self.__results is not None:
            data = self.__results.transformed_data
            pred = self.__results.predictions
            base = self.__results.base_value
            values, _, labels, ranges = prepare_force_plot_data(
                self.__results.values, data, pred, self.target_index)

            index = 0
            HIGH, LOW = 0, 1
            plot_data = PlotData(high_values=values[index][HIGH],
                                 low_values=values[index][LOW][::-1],
                                 high_labels=labels[index][HIGH],
                                 low_labels=labels[index][LOW][::-1],
                                 value_range=ranges[index],
                                 model_output=pred[index][self.target_index],
                                 base_value=base[self.target_index])
            self.setup_plot(plot_data)

            self.mo_info = f"Model prediction: {_str(plot_data.model_output)}"
            self.bv_info = f"Base value: {_str(plot_data.base_value)}"

            assert isinstance(self.__results.values, list)
            scores = self.__results.values[self.target_index][0, :]
            names = [a.name for a in data.domain.attributes]
            scores = self.create_scores_table(scores, names)
        self.Outputs.scores.send(scores)

    def setup_plot(self, plot_data: PlotData):
        self._stripe_plot = StripePlot()
        self._stripe_plot.set_data(plot_data, self.stripe_len)
        self._stripe_plot.layout().activate()
        self._stripe_plot.geometryChanged.connect(self.update_scene_rect)
        self.scene.addItem(self._stripe_plot)
        self.update_scene_rect()

    def update_scene_rect(self):
        geom = self._stripe_plot.geometry()
        self.scene.setSceneRect(geom)
        self.view.setSceneRect(geom)

    @staticmethod
    def create_scores_table(scores: np.ndarray, names: List[str]) -> Table:
        domain = Domain([ContinuousVariable("Score")],
                        metas=[StringVariable("Feature")])
        scores_table = Table(domain,
                             scores[:, None],
                             metas=np.array(names)[:, None])
        scores_table.name = "Feature Scores"
        return scores_table

    def on_partial_result(self, _):
        pass

    def on_done(self, results: Optional[RunnerResults]):
        self.__results = results
        self.update_scene()

    def on_exception(self, ex: Exception):
        if isinstance(ex, DomainTransformationError):
            self.Error.domain_transform_err(ex)
        else:
            self.Error.unknown_err(ex)

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def sizeHint(self) -> QSizeF:
        sh = self.controlArea.sizeHint()
        return sh.expandedTo(QSize(700, 700))

    def send_report(self):
        if not self.data or not self.background_data or not self.model:
            return
        items = {"Target class": "None"}
        if self.model.domain.has_discrete_class:
            class_var = self.model.domain.class_var
            items["Target class"] = class_var.values[self.target_index]
        self.report_items(items)
        self.report_plot()
Exemplo n.º 46
0
class OWMosaicDisplay(OWWidget):
    name = "Mosaic Display"
    description = "Display data in a mosaic plot."
    icon = "icons/MosaicDisplay.svg"
    priority = 220

    inputs = [("Data", Table, "set_data", Default),
              ("Data Subset", Table, "set_subset_data")]
    outputs = [("Selected Data", Table, widget.Default),
               (ANNOTATED_DATA_SIGNAL_NAME, Table)]

    settingsHandler = DomainContextHandler()
    use_boxes = Setting(True)
    variable1 = ContextSetting("", exclude_metas=False)
    variable2 = ContextSetting("", exclude_metas=False)
    variable3 = ContextSetting("", exclude_metas=False)
    variable4 = ContextSetting("", exclude_metas=False)
    selection = ContextSetting(set())
    # interior_coloring is context setting to properly reset it
    # if the widget switches to regression and back (set setData)
    interior_coloring = ContextSetting(1)

    PEARSON, CLASS_DISTRIBUTION = 0, 1
    interior_coloring_opts = ["Pearson residuals",
                              "Class distribution"]
    BAR_WIDTH = 5
    SPACING = 4
    ATTR_NAME_OFFSET = 20
    ATTR_VAL_OFFSET = 3
    BLUE_COLORS = [QColor(255, 255, 255), QColor(210, 210, 255),
                   QColor(110, 110, 255), QColor(0, 0, 255)]
    RED_COLORS = [QColor(255, 255, 255), QColor(255, 200, 200),
                  QColor(255, 100, 100), QColor(255, 0, 0)]

    graph_name = "canvas"

    class Warning(OWWidget.Warning):
        incompatible_subset = Msg("Data subset is incompatible with Data")
        no_valid_data = Msg("No valid data")
        no_cont_selection_sql = \
            Msg("Selection of continuous variables on SQL is not supported")

    def __init__(self):
        super().__init__()

        self.data = None
        self.discrete_data = None
        self.unprocessed_subset_data = None
        self.subset_data = None

        self.areas = []

        self.canvas = QGraphicsScene()
        self.canvas_view = ViewWithPress(self.canvas,
                                         handler=self.clear_selection)
        self.mainArea.layout().addWidget(self.canvas_view)
        self.canvas_view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.canvas_view.setRenderHint(QPainter.Antialiasing)

        box = gui.vBox(self.controlArea, box=True)
        self.attr_combos = [
            gui.comboBox(
                    box, self, value="variable{}".format(i),
                    orientation=Qt.Horizontal, contentsLength=12,
                    callback=self.reset_graph,
                    sendSelectedValue=True, valueType=str)
            for i in range(1, 5)]
        self.rb_colors = gui.radioButtonsInBox(
                self.controlArea, self, "interior_coloring",
                self.interior_coloring_opts, box="Interior Coloring",
                callback=self.update_graph)
        self.bar_button = gui.checkBox(
                gui.indentedBox(self.rb_colors),
                self, 'use_boxes', label='Compare with total',
                callback=self._compare_with_total)
        gui.rubber(self.controlArea)

    def sizeHint(self):
        return QSize(530, 720)

    def _compare_with_total(self):
        if self.data and self.data.domain.has_discrete_class:
            self.interior_coloring = 1
            self.update_graph()

    def init_combos(self, data):
        for combo in self.attr_combos:
            combo.clear()
        if data is None:
            return
        for combo in self.attr_combos[1:]:
            combo.addItem("(None)")

        icons = gui.attributeIconDict
        for attr in chain(data.domain, data.domain.metas):
            if attr.is_discrete or attr.is_continuous:
                for combo in self.attr_combos:
                    combo.addItem(icons[attr], attr.name)

        if self.attr_combos[0].count() > 0:
            self.variable1 = self.attr_combos[0].itemText(0)
            self.variable2 = self.attr_combos[1].itemText(
                    2 * (self.attr_combos[1].count() > 2))
        self.variable3 = self.attr_combos[2].itemText(0)
        self.variable4 = self.attr_combos[3].itemText(0)

    def get_attr_list(self):
        return [
            a for a in [self.variable1, self.variable2,
                        self.variable3, self.variable4]
            if a and a != "(None)"]

    def resizeEvent(self, e):
        OWWidget.resizeEvent(self, e)
        self.update_graph()

    def showEvent(self, ev):
        OWWidget.showEvent(self, ev)
        self.update_graph()

    def set_data(self, data):
        if type(data) == SqlTable and data.approx_len() > LARGE_TABLE:
            data = data.sample_time(DEFAULT_SAMPLE_TIME)

        self.closeContext()
        self.data = data
        self.init_combos(self.data)
        if not self.data:
            self.discrete_data = None
            return
        if any(attr.is_continuous for attr in data.domain):
            self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
        else:
            self.discrete_data = self.data

        if self.data.domain.class_var is None:
            self.rb_colors.setDisabled(True)
            disc_class = False
        else:
            self.rb_colors.setDisabled(False)
            disc_class = self.data.domain.has_discrete_class
            self.rb_colors.group.button(2).setDisabled(not disc_class)
            self.bar_button.setDisabled(not disc_class)
        self.interior_coloring = bool(disc_class)
        self.openContext(self.data)

        # if we first received subset we now call setSubsetData to process it
        if self.unprocessed_subset_data:
            self.set_subset_data(self.unprocessed_subset_data)
            self.unprocessed_subset_data = None

    def set_subset_data(self, data):
        self.Warning.incompatible_subset.clear()
        if self.data is None:
            self.unprocessed_subset_data = data
            return
        try:
            self.subset_data = data.from_table(self.data.domain, data)
        except:
            self.subset_data = None
            self.Warning.incompatible_subset(shown=data is not None)

    # this is called by widget after setData and setSubsetData are called.
    # this way the graph is updated only once
    def handleNewSignals(self):
        self.reset_graph()

    def clear_selection(self):
        self.selection = set()
        self.update_selection_rects()
        self.send_selection()

    def reset_graph(self):
        self.clear_selection()
        self.update_graph()

    def update_selection_rects(self):
        for i, (attr, vals, area) in enumerate(self.areas):
            if i in self.selection:
                area.setPen(QPen(Qt.black, 3, Qt.DotLine))
            else:
                area.setPen(QPen())

    def select_area(self, index, ev):
        if ev.button() != Qt.LeftButton:
            return
        if ev.modifiers() & Qt.ControlModifier:
            self.selection ^= {index}
        else:
            self.selection = {index}
        self.update_selection_rects()
        self.send_selection()

    def send_selection(self):
        if not self.selection or self.data is None:
            self.send("Selected Data", None)
            self.send(ANNOTATED_DATA_SIGNAL_NAME,
                      create_annotated_table(self.data, []))
            return
        filters = []
        self.Warning.no_cont_selection_sql.clear()
        if self.discrete_data is not self.data:
            if isinstance(self.data, SqlTable):
                self.Warning.no_cont_selection_sql()
        for i in self.selection:
            cols, vals, area = self.areas[i]
            filters.append(
                filter.Values(
                    filter.FilterDiscrete(col, [val])
                    for col, val in zip(cols, vals)))
        if len(filters) > 1:
            filters = filter.Values(filters, conjunction=False)
        else:
            filters = filters[0]
        selection = filters(self.discrete_data)
        idset = set(selection.ids)
        sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset]
        if self.discrete_data is not self.data:
            selection = self.data[sel_idx]
        self.send("Selected Data", selection)
        self.send(ANNOTATED_DATA_SIGNAL_NAME,
                  create_annotated_table(self.data, sel_idx))

    def send_report(self):
        self.report_plot(self.canvas)

    def update_graph(self):
        spacing = self.SPACING
        bar_width = self.BAR_WIDTH

        def draw_data(attr_list, x0_x1, y0_y1, side, condition,
                      total_attrs, used_attrs=[], used_vals=[],
                      attr_vals=""):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if conditionaldict[attr_vals] == 0:
                add_rect(x0, x1, y0, y1, "",
                         used_attrs, used_vals, attr_vals=attr_vals)
                # store coordinates for later drawing of labels
                draw_text(side, attr_list[0], (x0, x1), (y0, y1), total_attrs,
                          used_attrs, used_vals, attr_vals)
                return

            attr = attr_list[0]
            # how much smaller rectangles do we draw
            edge = len(attr_list) * spacing
            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]  # reverse names if necessary

            if side % 2 == 0:  # we are drawing on the x axis
                # remove the space needed for separating different attr. values
                whole = max(0, (x1 - x0) - edge * (
                    len(values) - 1))
                if whole == 0:
                    edge = (x1 - x0) / float(len(values) - 1)
            else:  # we are drawing on the y axis
                whole = max(0, (y1 - y0) - edge * (len(values) - 1))
                if whole == 0:
                    edge = (y1 - y0) / float(len(values) - 1)

            if attr_vals == "":
                counts = [conditionaldict[val] for val in values]
            else:
                counts = [conditionaldict[attr_vals + "-" + val]
                          for val in values]
            total = sum(counts)

            # if we are visualizing the third attribute and the first attribute
            # has the last value, we have to reverse the order in which the
            # boxes will be drawn otherwise, if the last cell, nearest to the
            # labels of the fourth attribute, is empty, we wouldn't be able to
            # position the labels
            valrange = list(range(len(values)))
            if len(attr_list + used_attrs) == 4 and len(used_attrs) == 2:
                attr1values = get_variable_values_sorted(
                        data.domain[used_attrs[0]])
                if used_vals[0] == attr1values[-1]:
                    valrange = valrange[::-1]

            for i in valrange:
                start = i * edge + whole * float(sum(counts[:i]) / total)
                end = i * edge + whole * float(sum(counts[:i + 1]) / total)
                val = values[i]
                htmlval = to_html(val)
                if attr_vals != "":
                    newattrvals = attr_vals + "-" + val
                else:
                    newattrvals = val

                tooltip = condition + 4 * "&nbsp;" + attr + \
                    ": <b>" + htmlval + "</b><br>"
                attrs = used_attrs + [attr]
                vals = used_vals + [val]
                common_args = attrs, vals, newattrvals
                if side % 2 == 0:  # if we are moving horizontally
                    if len(attr_list) == 1:
                        add_rect(x0 + start, x0 + end, y0, y1,
                                 tooltip, *common_args)
                    else:
                        draw_data(attr_list[1:], (x0 + start, x0 + end),
                                  (y0, y1), side + 1,
                                  tooltip, total_attrs, *common_args)
                else:
                    if len(attr_list) == 1:
                        add_rect(x0, x1, y0 + start, y0 + end,
                                 tooltip, *common_args)
                    else:
                        draw_data(attr_list[1:], (x0, x1),
                                  (y0 + start, y0 + end), side + 1,
                                  tooltip, total_attrs, *common_args)

            draw_text(side, attr_list[0], (x0, x1), (y0, y1),
                      total_attrs, used_attrs, used_vals, attr_vals)

        def draw_text(side, attr, x0_x1, y0_y1,
                      total_attrs, used_attrs, used_vals, attr_vals):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if side in drawn_sides:
                return

            # the text on the right will be drawn when we are processing
            # visualization of the last value of the first attribute
            if side == 3:
                attr1values = \
                    get_variable_values_sorted(data.domain[used_attrs[0]])
                if used_vals[0] != attr1values[-1]:
                    return

            if not conditionaldict[attr_vals]:
                if side not in draw_positions:
                    draw_positions[side] = (x0, x1, y0, y1)
                return
            else:
                if side in draw_positions:
                    # restore the positions of attribute values and name
                    (x0, x1, y0, y1) = draw_positions[side]

            drawn_sides.add(side)

            values = get_variable_values_sorted(data.domain[attr])
            if side % 2:
                values = values[::-1]

            spaces = spacing * (total_attrs - side) * (len(values) - 1)
            width = x1 - x0 - spaces * (side % 2 == 0)
            height = y1 - y0 - spaces * (side % 2 == 1)

            # calculate position of first attribute
            currpos = 0

            if attr_vals == "":
                counts = [conditionaldict.get(val, 1) for val in values]
            else:
                counts = [conditionaldict.get(attr_vals + "-" + val, 1)
                          for val in values]
            total = sum(counts)
            if total == 0:
                counts = [1] * len(values)
                total = sum(counts)

            aligns = [Qt.AlignTop | Qt.AlignHCenter,
                      Qt.AlignRight | Qt.AlignVCenter,
                      Qt.AlignBottom | Qt.AlignHCenter,
                      Qt.AlignLeft | Qt.AlignVCenter]
            align = aligns[side]
            for i in range(len(values)):
                val = values[i]
                perc = counts[i] / float(total)
                if distributiondict[val] != 0:
                    if side == 0:
                        CanvasText(self.canvas, str(val),
                                   x0 + currpos + width * 0.5 * perc,
                                   y1 + self.ATTR_VAL_OFFSET, align)
                    elif side == 1:
                        CanvasText(self.canvas, str(val),
                                   x0 - self.ATTR_VAL_OFFSET,
                                   y0 + currpos + height * 0.5 * perc, align)
                    elif side == 2:
                        CanvasText(self.canvas, str(val),
                                   x0 + currpos + width * perc * 0.5,
                                   y0 - self.ATTR_VAL_OFFSET, align)
                    else:
                        CanvasText(self.canvas, str(val),
                                   x1 + self.ATTR_VAL_OFFSET,
                                   y0 + currpos + height * 0.5 * perc, align)

                if side % 2 == 0:
                    currpos += perc * width + spacing * (total_attrs - side)
                else:
                    currpos += perc * height + spacing * (total_attrs - side)

            if side == 0:
                CanvasText(
                        self.canvas, attr,
                        x0 + (x1 - x0) / 2,
                        y1 + self.ATTR_VAL_OFFSET +
                        self.ATTR_NAME_OFFSET,
                        align, bold=1)
            elif side == 1:
                CanvasText(
                        self.canvas, attr,
                        x0 - max_ylabel_w1 - self.ATTR_VAL_OFFSET,
                        y0 + (y1 - y0) / 2,
                        align, bold=1, vertical=True)
            elif side == 2:
                CanvasText(
                        self.canvas, attr,
                        x0 + (x1 - x0) / 2,
                        y0 - self.ATTR_VAL_OFFSET -
                        self.ATTR_NAME_OFFSET,
                        align, bold=1)
            else:
                CanvasText(
                        self.canvas, attr,
                        x1 + max_ylabel_w2 + self.ATTR_VAL_OFFSET,
                        y0 + (y1 - y0) / 2,
                        align, bold=1, vertical=True)

        def add_rect(x0, x1, y0, y1, condition="",
                     used_attrs=[], used_vals=[], attr_vals=""):
            area_index = len(self.areas)
            if x0 == x1:
                x1 += 1
            if y0 == y1:
                y1 += 1

            # rectangles of width and height 1 are not shown - increase
            if x1 - x0 + y1 - y0 == 2:
                y1 += 1

            if class_var and class_var.is_discrete:
                colors = [QColor(*col) for col in class_var.colors]
            else:
                colors = None

            def select_area(_, ev):
                self.select_area(area_index, ev)

            def rect(x, y, w, h, z, pen_color=None, brush_color=None, **args):
                if pen_color is None:
                    return CanvasRectangle(
                            self.canvas, x, y, w, h, z=z, onclick=select_area,
                            **args)
                if brush_color is None:
                    brush_color = pen_color
                return CanvasRectangle(
                        self.canvas, x, y, w, h, pen_color, brush_color, z=z,
                        onclick=select_area, **args)

            def line(x1, y1, x2, y2):
                r = QGraphicsLineItem(x1, y1, x2, y2, None)
                self.canvas.addItem(r)
                r.setPen(QPen(Qt.white, 2))
                r.setZValue(30)

            outer_rect = rect(x0, y0, x1 - x0, y1 - y0, 30)
            self.areas.append((used_attrs, used_vals, outer_rect))
            if not conditionaldict[attr_vals]:
                return

            if self.interior_coloring == self.PEARSON:
                s = sum(apriori_dists[0])
                expected = s * reduce(
                        mul,
                        (apriori_dists[i][used_vals[i]] / float(s)
                         for i in range(len(used_vals))))
                actual = conditionaldict[attr_vals]
                pearson = (actual - expected) / sqrt(expected)
                if pearson == 0:
                    ind = 0
                else:
                    ind = max(0, min(int(log(abs(pearson), 2)), 3))
                color = [self.RED_COLORS, self.BLUE_COLORS][pearson > 0][ind]
                rect(x0, y0, x1 - x0, y1 - y0, -20, color)
                outer_rect.setToolTip(
                        condition + "<hr/>" +
                        "Expected instances: %.1f<br>"
                        "Actual instances: %d<br>"
                        "Standardized (Pearson) residual: %.1f" %
                        (expected, conditionaldict[attr_vals], pearson))
            else:
                cls_values = get_variable_values_sorted(class_var)
                prior = get_distribution(data, class_var.name)
                total = 0
                for i, value in enumerate(cls_values):
                    val = conditionaldict[attr_vals + "-" + value]
                    if val == 0:
                        continue
                    if i == len(cls_values) - 1:
                        v = y1 - y0 - total
                    else:
                        v = ((y1 - y0) * val) / conditionaldict[attr_vals]
                    rect(x0, y0 + total, x1 - x0, v, -20, colors[i])
                    total += v

                if self.use_boxes and \
                        abs(x1 - x0) > bar_width and \
                        abs(y1 - y0) > bar_width:
                    total = 0
                    line(x0 + bar_width, y0, x0 + bar_width, y1)
                    n = sum(prior)
                    for i, (val, color) in enumerate(zip(prior, colors)):
                        if i == len(prior) - 1:
                            h = y1 - y0 - total
                        else:
                            h = (y1 - y0) * val / n
                        rect(x0, y0 + total, bar_width, h, 20, color)
                        total += h

                if conditionalsubsetdict:
                    if conditionalsubsetdict[attr_vals]:
                        counts = [conditionalsubsetdict[attr_vals + "-" + val]
                                  for val in cls_values]
                        if sum(counts) == 1:
                            rect(x0 - 2, y0 - 2, x1 - x0 + 5, y1 - y0 + 5, -550,
                                 colors[counts.index(1)], Qt.white,
                                 penWidth=2, penStyle=Qt.DashLine)
                        if self.subset_data is not None:
                            line(x1 - bar_width, y0, x1 - bar_width, y1)
                            total = 0
                            n = conditionalsubsetdict[attr_vals]
                            if n:
                                for i, (cls, color) in \
                                        enumerate(zip(cls_values, colors)):
                                    val = conditionalsubsetdict[
                                        attr_vals + "-" + cls]
                                    if val == 0:
                                        continue
                                    if i == len(prior) - 1:
                                        v = y1 - y0 - total
                                    else:
                                        v = ((y1 - y0) * val) / n
                                    rect(x1 - bar_width, y0 + total,
                                         bar_width, v, 15, color)
                                    total += v

                actual = [conditionaldict[attr_vals + "-" + cls_values[i]]
                          for i in range(len(prior))]
                n_actual = sum(actual)
                if n_actual > 0:
                    apriori = [prior[key] for key in cls_values]
                    n_apriori = sum(apriori)
                    text = "<br/>".join(
                            "<b>%s</b>: %d / %.1f%% (Expected %.1f / %.1f%%)" %
                            (cls, act, 100.0 * act / n_actual,
                             apr / n_apriori * n_actual, 100.0 * apr / n_apriori
                             )
                            for cls, act, apr in zip(cls_values, actual, apriori
                                                     ))
                else:
                    text = ""
                outer_rect.setToolTip(
                        "{}<hr>Instances: {}<br><br>{}".format(
                                condition, n_actual, text[:-4]))

        def draw_legend(x0_x1, y0_y1):
            x0, x1 = x0_x1
            y0, y1 = y0_y1
            if self.interior_coloring == self.PEARSON:
                names = ["<-8", "-8:-4", "-4:-2", "-2:2", "2:4", "4:8", ">8",
                         "Residuals:"]
                colors = self.RED_COLORS[::-1] + self.BLUE_COLORS[1:]
            else:
                names = get_variable_values_sorted(class_var) + \
                        [class_var.name + ":"]
                colors = [QColor(*col) for col in class_var.colors]

            names = [CanvasText(self.canvas, name, alignment=Qt.AlignVCenter)
                     for name in names]
            totalwidth = sum(text.boundingRect().width() for text in names)

            # compute the x position of the center of the legend
            y = y1 + self.ATTR_NAME_OFFSET + self.ATTR_VAL_OFFSET + 35
            distance = 30
            startx = (x0 + x1) / 2 - (totalwidth + (len(names)) * distance) / 2

            names[-1].setPos(startx + 15, y)
            names[-1].show()
            xoffset = names[-1].boundingRect().width() + distance

            size = 8

            for i in range(len(names) - 1):
                if self.interior_coloring == self.PEARSON:
                    edgecolor = Qt.black
                else:
                    edgecolor = colors[i]

                CanvasRectangle(self.canvas, startx + xoffset, y - size / 2,
                                size, size, edgecolor, colors[i])
                names[i].setPos(startx + xoffset + 10, y)
                xoffset += distance + names[i].boundingRect().width()

        self.canvas.clear()
        self.areas = []

        data = self.discrete_data
        if data is None:
            return
        subset = self.subset_data
        attr_list = self.get_attr_list()
        class_var = data.domain.class_var
        if class_var:
            sql = type(data) == SqlTable
            name = not sql and data.name
            # save class_var because it is removed in the next line
            data = data[:, attr_list + [class_var]]
            data.domain.class_var = class_var
            if not sql:
                data.name = name
        else:
            data = data[:, attr_list]
        # TODO: check this
        # data = Preprocessor_dropMissing(data)
        if len(data) == 0:
            self.Warning.no_valid_data()
            return
        else:
            self.Warning.no_valid_data.clear()

        if self.interior_coloring == self.PEARSON:
            apriori_dists = [get_distribution(data, attr) for attr in attr_list]
        else:
            apriori_dists = []

        def get_max_label_width(attr):
            values = get_variable_values_sorted(data.domain[attr])
            maxw = 0
            for val in values:
                t = CanvasText(self.canvas, val, 0, 0, bold=0, show=False)
                maxw = max(int(t.boundingRect().width()), maxw)
            return maxw

        # get the maximum width of rectangle
        xoff = 20
        width = 20
        if len(attr_list) > 1:
            text = CanvasText(self.canvas, attr_list[1], bold=1, show=0)
            max_ylabel_w1 = min(get_max_label_width(attr_list[1]), 150)
            width = 5 + text.boundingRect().height() + \
                self.ATTR_VAL_OFFSET + max_ylabel_w1
            xoff = width
            if len(attr_list) == 4:
                text = CanvasText(self.canvas, attr_list[3], bold=1, show=0)
                max_ylabel_w2 = min(get_max_label_width(attr_list[3]), 150)
                width += text.boundingRect().height() + \
                    self.ATTR_VAL_OFFSET + max_ylabel_w2 - 10

        # get the maximum height of rectangle
        height = 100
        yoff = 45
        square_size = min(self.canvas_view.width() - width - 20,
                          self.canvas_view.height() - height - 20)

        if square_size < 0:
            return  # canvas is too small to draw rectangles
        self.canvas_view.setSceneRect(
                0, 0, self.canvas_view.width(), self.canvas_view.height())

        drawn_sides = set()
        draw_positions = {}

        conditionaldict, distributiondict = \
            get_conditional_distribution(data, attr_list)
        conditionalsubsetdict = None
        if subset:
            conditionalsubsetdict, _ = \
                get_conditional_distribution(subset, attr_list)

        # draw rectangles
        draw_data(
            attr_list, (xoff, xoff + square_size), (yoff, yoff + square_size),
            0, "", len(attr_list))
        draw_legend((xoff, xoff + square_size), (yoff, yoff + square_size))
        self.update_selection_rects()
Exemplo n.º 47
0
class OWPythagoreanForest(OWWidget):
    name = 'Pythagorean Forest'
    description = 'Pythagorean forest for visualising random forests.'
    icon = 'icons/PythagoreanForest.svg'

    priority = 1001

    inputs = [('Random forest', RandomForestModel, 'set_rf')]
    outputs = [('Tree', TreeModel)]

    # Enable the save as feature
    graph_name = 'scene'

    # Settings
    depth_limit = settings.ContextSetting(10)
    target_class_index = settings.ContextSetting(0)
    size_calc_idx = settings.Setting(0)
    zoom = settings.Setting(50)
    selected_tree_index = settings.ContextSetting(-1)

    def __init__(self):
        super().__init__()
        self.model = None
        self.forest_adapter = None
        self.instances = None
        self.clf_dataset = None
        # We need to store refernces to the trees and grid items
        self.grid_items, self.ptrees = [], []
        # In some rare cases, we need to prevent commiting, the only one
        # that this currently helps is that when changing the size calculation
        # the trees are all recomputed, but we don't want to output a new tree
        # to keep things consistent with other ui controls.
        self.__prevent_commit = False

        self.color_palette = None

        # Different methods to calculate the size of squares
        self.SIZE_CALCULATION = [
            ('Normal', lambda x: x),
            ('Square root', lambda x: sqrt(x)),
            ('Logarithmic', lambda x: log(x + 1)),
        ]

        # CONTROL AREA
        # Tree info area
        box_info = gui.widgetBox(self.controlArea, 'Forest')
        self.ui_info = gui.widgetLabel(box_info)

        # Display controls area
        box_display = gui.widgetBox(self.controlArea, 'Display')
        self.ui_depth_slider = gui.hSlider(box_display,
                                           self,
                                           'depth_limit',
                                           label='Depth',
                                           ticks=False,
                                           callback=self.update_depth)
        self.ui_target_class_combo = gui.comboBox(box_display,
                                                  self,
                                                  'target_class_index',
                                                  label='Target class',
                                                  orientation=Qt.Horizontal,
                                                  items=[],
                                                  contentsLength=8,
                                                  callback=self.update_colors)
        self.ui_size_calc_combo = gui.comboBox(
            box_display,
            self,
            'size_calc_idx',
            label='Size',
            orientation=Qt.Horizontal,
            items=list(zip(*self.SIZE_CALCULATION))[0],
            contentsLength=8,
            callback=self.update_size_calc)
        self.ui_zoom_slider = gui.hSlider(box_display,
                                          self,
                                          'zoom',
                                          label='Zoom',
                                          ticks=False,
                                          minValue=20,
                                          maxValue=150,
                                          callback=self.zoom_changed,
                                          createLabel=False)

        # Stretch to fit the rest of the unsused area
        gui.rubber(self.controlArea)

        self.controlArea.setSizePolicy(QSizePolicy.Preferred,
                                       QSizePolicy.Expanding)

        # MAIN AREA
        self.scene = QGraphicsScene(self)
        self.scene.selectionChanged.connect(self.commit)
        self.grid = OWGrid()
        self.grid.geometryChanged.connect(self._update_scene_rect)
        self.scene.addItem(self.grid)

        self.view = QGraphicsView(self.scene)
        self.view.setRenderHint(QPainter.Antialiasing, True)
        self.view.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        self.mainArea.layout().addWidget(self.view)

        self.resize(800, 500)

        self.clear()

    def set_rf(self, model=None):
        """When a different forest is given."""
        self.clear()
        self.model = model

        if model is not None:
            self.forest_adapter = self._get_forest_adapter(self.model)
            self._draw_trees()
            self.color_palette = self.forest_adapter.get_trees()[0]

            self.instances = model.instances
            # this bit is important for the regression classifier
            if self.instances is not None and self.instances.domain != model.domain:
                self.clf_dataset = Table.from_table(self.model.domain,
                                                    self.instances)
            else:
                self.clf_dataset = self.instances

            self._update_info_box()
            self._update_target_class_combo()
            self._update_depth_slider()

            self.selected_tree_index = -1

    def clear(self):
        """Clear all relevant data from the widget."""
        self.model = None
        self.forest_adapter = None
        self.ptrees = []
        self.grid_items = []
        self.grid.clear()

        self._clear_info_box()
        self._clear_target_class_combo()
        self._clear_depth_slider()

    def update_depth(self):
        """When the max depth slider is changed."""
        for tree in self.ptrees:
            tree.set_depth_limit(self.depth_limit)

    def update_colors(self):
        """When the target class or coloring method is changed."""
        for tree in self.ptrees:
            tree.target_class_changed(self.target_class_index)

    def update_size_calc(self):
        """When the size calculation of the trees is changed."""
        if self.model is not None:
            with self._prevent_commit():
                self.grid.clear()
                self._draw_trees()
                # Keep the selected item
                if self.selected_tree_index != -1:
                    self.grid_items[self.selected_tree_index].setSelected(True)
                self.update_depth()

    def zoom_changed(self):
        """When we update the "Zoom" slider."""
        for item in self.grid_items:
            item.set_max_size(self._calculate_zoom(self.zoom))

        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

    @contextmanager
    def _prevent_commit(self):
        try:
            self.__prevent_commit = True
            yield
        finally:
            self.__prevent_commit = False

    def _update_info_box(self):
        self.ui_info.setText('Trees: {}'.format(
            len(self.forest_adapter.get_trees())))

    def _update_depth_slider(self):
        self.depth_limit = self._get_max_depth()

        self.ui_depth_slider.parent().setEnabled(True)
        self.ui_depth_slider.setMaximum(self.depth_limit)
        self.ui_depth_slider.setValue(self.depth_limit)

    def _clear_info_box(self):
        self.ui_info.setText('No forest on input.')

    def _clear_target_class_combo(self):
        self.ui_target_class_combo.clear()
        self.target_class_index = 0
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def _clear_depth_slider(self):
        self.ui_depth_slider.parent().setEnabled(False)
        self.ui_depth_slider.setMaximum(0)

    def _get_max_depth(self):
        return max(tree.tree_adapter.max_depth for tree in self.ptrees)

    def _get_forest_adapter(self, model):
        return SklRandomForestAdapter(model)

    @contextmanager
    def disable_ui(self):
        """Temporarly disable the UI while trees may be redrawn."""
        try:
            self.ui_size_calc_combo.setEnabled(False)
            self.ui_depth_slider.setEnabled(False)
            self.ui_target_class_combo.setEnabled(False)
            self.ui_zoom_slider.setEnabled(False)
            yield
        finally:
            self.ui_size_calc_combo.setEnabled(True)
            self.ui_depth_slider.setEnabled(True)
            self.ui_target_class_combo.setEnabled(True)
            self.ui_zoom_slider.setEnabled(True)

    def _draw_trees(self):
        self.grid_items, self.ptrees = [], []

        num_trees = len(self.forest_adapter.get_trees())
        with self.progressBar(num_trees) as prg, self.disable_ui():
            for tree in self.forest_adapter.get_trees():
                ptree = PythagorasTreeViewer(
                    None,
                    tree,
                    interactive=False,
                    padding=100,
                    target_class_index=self.target_class_index,
                    weight_adjustment=self.SIZE_CALCULATION[
                        self.size_calc_idx][1])
                grid_item = GridItem(ptree,
                                     self.grid,
                                     max_size=self._calculate_zoom(self.zoom))
                # We don't want to show flickering while the trees are being
                grid_item.setVisible(False)

                self.grid_items.append(grid_item)
                self.ptrees.append(ptree)
                prg.advance()

            self.grid.set_items(self.grid_items)
            # This is necessary when adding items for the first time
            if self.grid:
                width = (self.view.width() -
                         self.view.verticalScrollBar().width())
                self.grid.reflow(width)
                self.grid.setPreferredWidth(width)
                # After drawing is complete, we show the trees
                for grid_item in self.grid_items:
                    grid_item.setVisible(True)

    @staticmethod
    def _calculate_zoom(zoom_level):
        """Calculate the max size for grid items from zoom level setting."""
        return zoom_level * 5

    def onDeleteWidget(self):
        """When deleting the widget."""
        super().onDeleteWidget()
        self.clear()

    def commit(self):
        """Commit the selected tree to output."""
        if self.__prevent_commit:
            return

        if not self.scene.selectedItems():
            self.send('Tree', None)
            # The selected tree index should only reset when model changes
            if self.model is None:
                self.selected_tree_index = -1
            return

        selected_item = self.scene.selectedItems()[0]
        self.selected_tree_index = self.grid_items.index(selected_item)
        tree = self.model.trees[self.selected_tree_index]
        tree.instances = self.instances
        tree.meta_target_class_index = self.target_class_index
        tree.meta_size_calc_idx = self.size_calc_idx
        tree.meta_depth_limit = self.depth_limit

        self.send('Tree', tree)

    def send_report(self):
        """Send report."""
        self.report_plot()

    def _update_scene_rect(self):
        self.scene.setSceneRect(self.scene.itemsBoundingRect())

    def _update_target_class_combo(self):
        self._clear_target_class_combo()
        label = [
            x for x in self.ui_target_class_combo.parent().children()
            if isinstance(x, QLabel)
        ][0]

        if self.instances.domain.has_discrete_class:
            label_text = 'Target class'
            values = [
                c.title() for c in self.instances.domain.class_vars[0].values
            ]
            values.insert(0, 'None')
        else:
            label_text = 'Node color'
            values = list(ContinuousTreeNode.COLOR_METHODS.keys())
        label.setText(label_text)
        self.ui_target_class_combo.addItems(values)
        self.ui_target_class_combo.setCurrentIndex(self.target_class_index)

    def resizeEvent(self, ev):
        width = (self.view.width() - self.view.verticalScrollBar().width())
        self.grid.reflow(width)
        self.grid.setPreferredWidth(width)

        super().resizeEvent(ev)
Exemplo n.º 48
0
        return QRectF(0, 0, self.width, self.height)

    def sizeHint(self, which, constraint):
        return QSizeF(self.width, self.height)

    def sizePolicy(self):
        return QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)


if __name__ == '__main__':
    import sys
    from Orange.data.table import Table
    from AnyQt.QtWidgets import (  # pylint: disable=ungrouped-imports
        QGraphicsView, QGraphicsScene, QApplication, QWidget
    )

    app = QApplication(sys.argv)
    widget = QWidget()
    widget.resize(500, 300)
    scene = QGraphicsScene(widget)
    view = QGraphicsView(scene, widget)
    dataset = Table(sys.argv[1] if len(sys.argv) > 1 else 'iris')
    histogram = Histogram(
        dataset, variable=0, height=300, width=500, n_bins=20, bar_spacing=2,
        border=(0, 0, 5, 0), border_color='#000', color_attribute='iris',
    )
    scene.addItem(histogram)

    widget.show()
    app.exec_()