Exemplo n.º 1
0
        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected),
                 list(self.model_other)])
            state = {
                key: (source_ind, np.inf)
                for key, (source_ind, _) in state.items()
            }

            self.openContext(data.domain)
            selected_keys = [
                key for key, (sind, _) in self.variable_state.items()
                if sind == 0
            ]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            if self.__pending_selection_restore is not None:
                self._selection = np.array(self.__pending_selection_restore,
                                           dtype=int)
                self.__pending_selection_restore = None

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected),
                        list(self.model_other)])
            return selected, other
Exemplo n.º 2
0
 def _add_controls_variables(self):
     self.variables_selection = VariablesSelection(
         self, self.model_selected, self.model_other, self.controlArea
     )
     self.variables_selection.add_remove.layout().addWidget(
         self.btn_vizrank
     )
Exemplo n.º 3
0
        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()}

            self.openContext(data.domain)
            selected_keys = [key for key, (sind, _) in self.variable_state.items() if sind == 0]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            if self.__pending_selection_restore is not None:
                self._selection = np.array(self.__pending_selection_restore, dtype=int)
                self.__pending_selection_restore = None

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)])
            return selected, other
Exemplo n.º 4
0
    def _load_settings(self):
        domain = self.data.domain
        variables = [
            v for v in domain.attributes + domain.metas if v.is_primitive()
        ]
        self.model_selected[:] = variables[:5]
        self.model_other[:] = variables[5:] + list(domain.class_vars)

        state = VariablesSelection.encode_var_state(
            [list(self.model_selected),
             list(self.model_other)])
        state = {key: (ind, np.inf) for key, (ind, _) in state.items()}
        state.update(self.variable_state)
        return VariablesSelection.decode_var_state(
            state, [list(self.model_selected),
                    list(self.model_other)])
Exemplo n.º 5
0
 def _add_controls(self):
     self.variables_selection = VariablesSelection(
         self, self.model_selected, self.model_other, self.controlArea
     )
     self.variables_selection.add_remove.layout().addWidget(
         self.btn_vizrank
     )
     super()._add_controls()
     self.controlArea.layout().removeWidget(self.control_area_stretch)
     self.control_area_stretch.setParent(None)
Exemplo n.º 6
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self.subset_indices = None
        self._embedding_coords = None
        self._rand_indices = None

        self.__replot_requested = False

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box)
        box.layout().addWidget(self.graph.plot_widget)

        self.variables_selection = VariablesSelection()
        self.model_selected = selected = VariableListModel(enable_dnd=True)
        self.model_other = other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, selected, other, self.controlArea)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            None, self, "Suggest features", self.vizrank_set_attrs)
        # Todo: this button introduces some margin at the bottom?!
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)

        g = self.graph.gui
        g.point_properties_box(self.controlArea)
        g.effects_box(self.controlArea)
        g.plot_properties_box(self.controlArea)

        self.graph.box_zoom_select(self.controlArea)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        self.graph.view_box.started.connect(self._randomize_indices)
        self.graph.view_box.moved.connect(self._manual_move)
        self.graph.view_box.finished.connect(self._finish_manual_move)
Exemplo n.º 7
0
class OWRadviz(widget.OWWidget):
    name = "Radviz"
    description = "Radviz"

    icon = "icons/Radviz.svg"
    priority = 240

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    settings_version = 1
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})

    auto_commit = settings.Setting(True)
    graph = settings.SettingProvider(OWRadvizGraph)
    vizrank = settings.SettingProvider(RadvizVizRank)

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    ReplotRequest = QEvent.registerEventType()

    graph_name = "graph.plot_widget.plotItem"

    class Information(widget.OWWidget.Information):
        sql_sampled_data = widget.Msg("Data has been sampled")

    class Warning(widget.OWWidget.Warning):
        no_features = widget.Msg("At least 2 features have to be chosen")

    class Error(widget.OWWidget.Error):
        sparse_data = widget.Msg("Sparse data is not supported")
        no_features = widget.Msg("At least 3 numeric or categorical variables are required")
        no_instances = widget.Msg("At least 2 data instances are required")

    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None  # np.array
        self.__replot_requested = False
        self._new_plotdata()

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox)
        self.graph.hide_axes()

        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            self.controlArea, self, "Suggest features", self.vizrank_set_attrs)
        self.btn_vizrank.setSizePolicy(*SIZE_POLICY)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        pp_box = g.point_properties_box(self.controlArea)
        pp_box.setSizePolicy(*SIZE_POLICY)
        self.models = g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)
        g.add_widget(g.JitterSizeSlider, box)

        g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box)

        zoom_select = self.graph.box_zoom_select(self.controlArea)
        zoom_select.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection",
                        auto_label="Send Automatically")

        self.graph.zoom_actions(self)

        self._circle = QGraphicsEllipseItem()
        self._circle.setRect(QRectF(-1., -1., 2., 2.))
        self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2))

    def resizeEvent(self, event):
        self._update_points_labels()

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [v for v in self.model_other if v not in attrs]

    def _new_plotdata(self):
        self.plotdata = namespace(
            valid_mask=None,
            embedding_coords=None,
            points=None,
            arcarrows=[],
            point_labels=[],
            rand=None,
            data=None,
        )

    def update_colors(self):
        self._vizrank_color_change()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def sizeHint(self):
        return QSize(800, 500)

    def clear(self):
        """
        Clear/reset the widget state
        """
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()

    def _clear_plot(self):
        self._new_plotdata()
        self.graph.plot_widget.clear()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10)

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        attr_color = self.graph.attr_color
        is_enabled = self.data is not None and not self.data.is_sparse() and \
                     (len(self.model_other) + len(self.model_selected)) > 3 and len(self.data) > 1
        self.btn_vizrank.setEnabled(
            is_enabled and attr_color is not None
            and not np.isnan(self.data.get_column_view(attr_color)[0].astype(float)).all())
        self.vizrank.initialize()

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset and check if data is valid.

        Args:
            data (Orange.data.table): data instances
        """
        def sql(data):
            self.Information.sql_sampled_data.clear()
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.Information.sql_sampled_data()
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()}

            self.openContext(data.domain)
            selected_keys = [key
                             for key, (sind, _) in self.variable_state.items()
                             if sind == 0]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)])
            return selected, other

        def is_sparse(data):
            if data.is_sparse():
                self.Error.sparse_data()
                data = None
            return data

        def are_features(data):
            domain = data.domain
            vars = [var for var in chain(domain.class_vars, domain.metas, domain.attributes)
                    if var.is_primitive()]
            if len(vars) < 3:
                self.Error.no_features()
                data = None
            return data

        def are_instances(data):
            if len(data) < 2:
                self.Error.no_instances()
                data = None
            return data

        self.clear_messages()
        self.btn_vizrank.setEnabled(False)
        self.closeContext()
        self.clear()
        self.information()
        self.Error.clear()
        for f in [sql, is_sparse, are_features, are_instances]:
            if data is None:
                break
            data = f(data)

        if data is not None:
            self.data = data
            self.init_attr_values()
            domain = data.domain
            vars = [v for v in chain(domain.metas, domain.attributes)
                    if v.is_primitive()]
            self.model_selected[:] = vars[:5]
            self.model_other[:] = vars[5:] + list(domain.class_vars)
            self.model_selected[:], self.model_other[:] = settings(data)
            self._selection = np.zeros(len(data), dtype=np.uint8)
            self.invalidate_plot()
        else:
            self.data = None

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None:
            self._clear_plot()
            if self.subset_data is not None and self._subset_mask is None:
                dataids = self.data.ids.ravel()
                subsetids = np.unique(self.subset_data.ids)
                self._subset_mask = np.in1d(
                    dataids, subsetids, assume_unique=True)
            self.setup_plot(reset_view=True)
            self.cb_class_density.setEnabled(self.graph.can_draw_density())
        else:
            self.init_attr_values()
            self.graph.new_data(None)
        self._vizrank_color_change()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWRadviz.ReplotRequest:
            self.__replot_requested = False
            self._clear_plot()
            self.setup_plot(reset_view=True)
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected), list(self.model_other)]
        )
        super().closeContext()

    def prepare_radviz_data(self, variables):
        ec, points, valid_mask = radviz(self.data, variables, self.plotdata.points)
        self.plotdata.embedding_coords = ec
        self.plotdata.points = points
        self.plotdata.valid_mask = valid_mask

    def setup_plot(self, reset_view=True):
        if self.data is None:
            return
        self.graph.jitter_continuous = True
        self.__replot_requested = False

        variables = list(self.model_selected)
        if len(variables) < 2:
            self.Warning.no_features()
            self.graph.new_data(None)
            return

        self.Warning.clear()
        self.prepare_radviz_data(variables)

        if self.plotdata.embedding_coords is None:
            return

        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(attributes=domain.attributes,
                        class_vars=domain.class_vars,
                        metas=new_metas)
        mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[mask] = self.plotdata.embedding_coords
        data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = data[self._subset_mask & mask]\
            if self._subset_mask is not None and len(self._subset_mask) else None
        self.plotdata.data = data
        self.graph.new_data(data[mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[self.plotdata.valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0],
            y=self.plotdata.points[:, 1]
        )
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def randomize_indices(self):
        ec = self.plotdata.embedding_coords
        self.plotdata.rand = np.random.choice(len(ec), MAX_POINTS, replace=False) \
            if len(ec) > MAX_POINTS else None

    def manual_move(self):
        self.__replot_requested = False

        if self.plotdata.rand is not None:
            rand = self.plotdata.rand
            valid_mask = self.plotdata.valid_mask
            data = self.data[valid_mask]
            selection = self._selection[valid_mask]
            selection = selection[rand]
            ec, _, valid_mask = radviz(data, list(self.model_selected), self.plotdata.points)
            assert sum(valid_mask) == len(data)
            data = data[rand]
            ec = ec[rand]
            data_x = data.X
            data_y = data.Y
            data_metas = data.metas
        else:
            self.prepare_radviz_data(list(self.model_selected))
            ec = self.plotdata.embedding_coords
            valid_mask = self.plotdata.valid_mask
            data_x = self.data.X[valid_mask]
            data_y = self.data.Y[valid_mask]
            data_metas = self.data.metas[valid_mask]
            selection = self._selection[valid_mask]

        attributes = (self.variable_x, self.variable_y) + self.data.domain.attributes
        domain = Domain(attributes=attributes,
                        class_vars=self.data.domain.class_vars,
                        metas=self.data.domain.metas)
        data = Table.from_numpy(domain, X=np.hstack((ec, data_x)), Y=data_y, metas=data_metas)
        self.graph.new_data(data, None)
        self.graph.selection = selection
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=True)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1])
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def _update_points_labels(self):
        if self.plotdata.points is None:
            return
        for point_label in self.plotdata.point_labels:
            self.graph.plot_widget.removeItem(point_label)
        self.plotdata.point_labels = []
        sx, sy = self.graph.view_box.viewPixelSize()

        for row in self.plotdata.points:
            ti = TextItem()
            metrics = QFontMetrics(ti.textItem.font())
            text_width = ((RANGE.width())/2. - np.abs(row[0])) / sx
            name = row[2].name
            ti.setText(name)
            ti.setTextWidth(text_width)
            ti.setColor(QColor(0, 0, 0))
            br = ti.boundingRect()
            width = metrics.width(name) if metrics.width(name) < br.width() else br.width()
            width = sx * (width + 5)
            height = sy * br.height()
            ti.setPos(row[0] - (row[0] < 0) * width, row[1] + (row[1] > 0) * height)
            self.plotdata.point_labels.append(ti)
            self.graph.plot_widget.addItem(ti)

    def _update_jitter(self):
        self.invalidate_plot()

    def reset_graph_data(self, *_):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph()

    def _update_graph(self, reset_view=True, **_):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)

    def update_density(self):
        self._update_graph(reset_view=True)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection[self.plotdata.valid_mask] = self.graph.selection
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        selected = annotated = components = None
        graph = self.graph
        if self.plotdata.data is not None:
            name = self.data.name
            data = self.plotdata.data
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
                else [False * len(mask)]
            selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes
            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

            comp_domain = Domain(
                self.plotdata.points[:, 2],
                metas=[StringVariable(name='component')])

            metas = np.array([["RX"], ["RY"], ["angle"]])
            angle = np.arctan2(np.array(self.plotdata.points[:, 1].T, dtype=float),
                               np.array(self.plotdata.points[:, 0].T, dtype=float))
            components = Table.from_numpy(
                comp_domain,
                X=np.row_stack((self.plotdata.points[:, :2].T, angle)),
                metas=metas)
            components.name = name + ": components"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        caption = report.render_items_vert((
            ("Color", name(self.graph.attr_color)),
            ("Label", name(self.graph.attr_label)),
            ("Shape", name(self.graph.attr_shape)),
            ("Size", name(self.graph.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)
Exemplo n.º 8
0
 def closeContext(self):
     self.variable_state = VariablesSelection.encode_var_state(
         [list(self.model_selected),
          list(self.model_other)])
     super().closeContext()
Exemplo n.º 9
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.projection = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None
        self.__replot_requested = False
        self.n_cont_var = 0
        #: Remember the saved state to restore
        self.__pending_selection_restore = self.selection_indices
        self.selection_indices = None

        self.variable_x = None
        self.variable_y = None

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWLinProjGraph(self,
                                    box,
                                    "Plot",
                                    view_box=LinProjInteractiveViewBox)
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            self.controlArea, self, "Suggest Features", self._vizrank)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)

        box = gui.widgetBox(self.controlArea,
                            "Placement",
                            sizePolicy=SIZE_POLICY)
        self.radio_placement = gui.radioButtonsInBox(
            box,
            self,
            "placement",
            btnLabels=[
                "Circular Placement", "Linear Discriminant Analysis",
                "Principal Component Analysis", "Use input projection"
            ],
            callback=self._change_placement)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        box = g.point_properties_box(self.controlArea)
        self.models = g.points_models
        g.add_widget(g.JitterSizeSlider, box)
        box.setSizePolicy(*SIZE_POLICY)

        box = gui.widgetBox(self.controlArea,
                            "Hide axes",
                            sizePolicy=SIZE_POLICY)
        self.rslider = gui.hSlider(box,
                                   self,
                                   "radius",
                                   minValue=0,
                                   maxValue=100,
                                   step=5,
                                   label="Radius",
                                   createLabel=False,
                                   ticks=True,
                                   callback=self.update_radius)
        self.rslider.setTickInterval(0)
        self.rslider.setPageStep(10)

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)

        g.add_widgets([
            g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity,
            g.LabelOnlySelected
        ], box)

        box = self.graph.box_zoom_select(self.controlArea)
        box.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)
        gui.auto_commit(self.controlArea,
                        self,
                        "auto_commit",
                        "Send Selection",
                        auto_label="Send Automatically")
        self.graph.zoom_actions(self)

        self._new_plotdata()
        self._change_placement()
        self.graph.jitter_continuous = True
Exemplo n.º 10
0
class OWLinearProjection(widget.OWWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    selection_indices = settings.Setting(None, schema_only=True)

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)
        projection = Input("Projection", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    Placement = Enum("Placement",
                     dict(Circular=0, LDA=1, PCA=2, Projection=3),
                     type=int,
                     qualname="OWLinearProjection.Placement")

    Component_name = {
        Placement.Circular: "C",
        Placement.LDA: "LD",
        Placement.PCA: "PC"
    }
    Variable_name = {
        Placement.Circular: "circular",
        Placement.LDA: "lda",
        Placement.PCA: "pca",
        Placement.Projection: "projection"
    }

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    settings_version = 3
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})
    placement = settings.Setting(Placement.Circular)
    radius = settings.Setting(0)
    auto_commit = settings.Setting(True)

    resolution = 256

    graph = settings.SettingProvider(OWLinProjGraph)
    ReplotRequest = QEvent.registerEventType()
    vizrank = settings.SettingProvider(LinearProjectionVizRank)
    graph_name = "graph.plot_widget.plotItem"

    class Warning(widget.OWWidget.Warning):
        no_cont_features = widget.Msg("Plotting requires numeric features")
        not_enough_components = widget.Msg(
            "Input projection has less than 2 components")
        trivial_components = widget.Msg(
            "All components of the PCA are trivial (explain 0 variance). "
            "Input data is constant (or near constant).")

    class Error(widget.OWWidget.Error):
        proj_and_domain_match = widget.Msg(
            "Projection and Data domains do not match")
        no_valid_data = widget.Msg("No projection due to invalid data")

    def __init__(self):
        super().__init__()

        self.data = None
        self.projection = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None
        self.__replot_requested = False
        self.n_cont_var = 0
        #: Remember the saved state to restore
        self.__pending_selection_restore = self.selection_indices
        self.selection_indices = None

        self.variable_x = None
        self.variable_y = None

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWLinProjGraph(self,
                                    box,
                                    "Plot",
                                    view_box=LinProjInteractiveViewBox)
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            self.controlArea, self, "Suggest Features", self._vizrank)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)

        box = gui.widgetBox(self.controlArea,
                            "Placement",
                            sizePolicy=SIZE_POLICY)
        self.radio_placement = gui.radioButtonsInBox(
            box,
            self,
            "placement",
            btnLabels=[
                "Circular Placement", "Linear Discriminant Analysis",
                "Principal Component Analysis", "Use input projection"
            ],
            callback=self._change_placement)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        box = g.point_properties_box(self.controlArea)
        self.models = g.points_models
        g.add_widget(g.JitterSizeSlider, box)
        box.setSizePolicy(*SIZE_POLICY)

        box = gui.widgetBox(self.controlArea,
                            "Hide axes",
                            sizePolicy=SIZE_POLICY)
        self.rslider = gui.hSlider(box,
                                   self,
                                   "radius",
                                   minValue=0,
                                   maxValue=100,
                                   step=5,
                                   label="Radius",
                                   createLabel=False,
                                   ticks=True,
                                   callback=self.update_radius)
        self.rslider.setTickInterval(0)
        self.rslider.setPageStep(10)

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)

        g.add_widgets([
            g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity,
            g.LabelOnlySelected
        ], box)

        box = self.graph.box_zoom_select(self.controlArea)
        box.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)
        gui.auto_commit(self.controlArea,
                        self,
                        "auto_commit",
                        "Send Selection",
                        auto_label="Send Automatically")
        self.graph.zoom_actions(self)

        self._new_plotdata()
        self._change_placement()
        self.graph.jitter_continuous = True

    def reset_graph_data(self):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph(reset_view=True)

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def _vizrank(self, attrs):
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [
            var for var in self.model_other if var not in attrs
        ]

    def _change_placement(self):
        placement = self.placement
        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        self.variables_selection.set_enabled(placement in [p_Circular, p_LDA])
        self._vizrank_color_change()
        self.rslider.setEnabled(placement != p_Circular)
        self._setup_plot()
        self.commit()

    def _get_min_radius(self):
        return self.radius * np.max(np.linalg.norm(self.plotdata.axes,
                                                   axis=1)) / 100 + 1e-5

    def update_radius(self):
        # Update the anchor/axes visibility
        pd = self.plotdata
        assert pd is not None
        if pd.hidecircle is None:
            return
        min_radius = self._get_min_radius()
        for anchor, item in zip(pd.axes, pd.axisitems):
            item.setVisible(np.linalg.norm(anchor) > min_radius)
        pd.hidecircle.setRect(
            QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

    def _new_plotdata(self):
        self.plotdata = namespace(valid_mask=None,
                                  embedding_coords=None,
                                  axisitems=[],
                                  axes=[],
                                  variables=[],
                                  data=None,
                                  hidecircle=None)

    def _anchor_circle(self, variables):
        # minimum visible anchor radius (radius)
        min_radius = self._get_min_radius()
        axisitems = []
        for anchor, var in zip(self.plotdata.axes, variables[:]):
            axitem = AnchorItem(
                line=QLineF(0, 0, *anchor),
                text=var.name,
            )
            axitem.setVisible(np.linalg.norm(anchor) > min_radius)
            axitem.setPen(pg.mkPen((100, 100, 100)))
            axitem.setArrowVisible(True)
            self.viewbox.addItem(axitem)
            axisitems.append(axitem)

        self.plotdata.axisitems = axisitems
        if self.placement == self.Placement.Circular:
            return

        hidecircle = QGraphicsEllipseItem()
        hidecircle.setRect(
            QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

        _pen = QPen(Qt.lightGray, 1)
        _pen.setCosmetic(True)
        hidecircle.setPen(_pen)

        self.viewbox.addItem(hidecircle)
        self.plotdata.hidecircle = hidecircle

    def update_colors(self):
        self._vizrank_color_change()

    def clear(self):
        # Clear/reset the widget state
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()
        self.selection_indices = None

    def _clear_plot(self):
        self.Warning.trivial_components.clear()
        for axisitem in self.plotdata.axisitems:
            self.viewbox.removeItem(axisitem)
        if self.plotdata.hidecircle:
            self.viewbox.removeItem(self.plotdata.hidecircle)
        self._new_plotdata()
        self.graph.hide_axes()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest),
                                   Qt.LowEventPriority - 10)

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        is_enabled = False
        if self.data is None:
            self.btn_vizrank.setToolTip("There is no data.")
            return
        vars = [
            v
            for v in chain(self.data.domain.variables, self.data.domain.metas)
            if v.is_primitive and v is not self.graph.attr_color
        ]
        self.n_cont_var = len(vars)
        if self.placement not in [self.Placement.Circular, self.Placement.LDA]:
            msg = "Suggest Features works only for Circular and " \
                  "Linear Discriminant Analysis Projection"
        elif self.graph.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.graph.attr_color.is_continuous and self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear Discriminant Analysis Projection " \
                  "when continuous color variable is selected."
        elif len(vars) < 3:
            msg = "Not enough available continuous variables"
        else:
            is_enabled = True
            msg = ""
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        self.vizrank.stop_and_reset(is_enabled)

    @Inputs.projection
    def set_projection(self, projection):
        self.Warning.not_enough_components.clear()
        if projection and len(projection) < 2:
            self.Warning.not_enough_components()
            projection = None
        if projection is not None:
            self.placement = self.Placement.Projection
        self.projection = projection

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset.

        Args:
            data (Orange.data.table): data instances
        """
        def sql(data):
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.information("Data has been sampled")
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected),
                 list(self.model_other)])
            state = {
                key: (source_ind, np.inf)
                for key, (source_ind, _) in state.items()
            }

            self.openContext(data.domain)
            selected_keys = [
                key for key, (sind, _) in self.variable_state.items()
                if sind == 0
            ]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            if self.__pending_selection_restore is not None:
                self._selection = np.array(self.__pending_selection_restore,
                                           dtype=int)
                self.__pending_selection_restore = None

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected),
                        list(self.model_other)])
            return selected, other

        self.closeContext()
        self.clear()
        self.Warning.no_cont_features.clear()
        self.information()
        data = sql(data)
        if data is not None:
            domain = data.domain
            vars = [
                var for var in chain(domain.variables, domain.metas)
                if var.is_continuous
            ]
            if not len(vars):
                self.Warning.no_cont_features()
                data = None
        self.data = data
        self.init_attr_values()
        if data is not None and len(data):
            self._initialize(data)
            self.model_selected[:], self.model_other[:] = settings(data)
            self.vizrank.stop_and_reset()
            self.vizrank.attrs = self.data.domain.attributes if self.data is not None else []

    def _check_possible_opt(self):
        def set_enabled(is_enabled):
            for btn in self.radio_placement.buttons:
                btn.setEnabled(is_enabled)
            self.variables_selection.set_enabled(is_enabled)

        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        p_Input = self.Placement.Projection
        if self.data:
            set_enabled(True)
            domain = self.data.domain
            if not domain.has_discrete_class or len(
                    domain.class_var.values) < 2:
                self.radio_placement.buttons[p_LDA].setEnabled(False)
                if self.placement == p_LDA:
                    self.placement = p_Circular
            if not self.projection:
                self.radio_placement.buttons[p_Input].setEnabled(False)
                if self.placement == p_Input:
                    self.placement = p_Circular
            self._setup_plot()
        else:
            self.graph.new_data(None)
            self.rslider.setEnabled(False)
            set_enabled(False)
        self.commit()

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None and self.subset_data is not None:
            # Update the plot's highlight items
            dataids = self.data.ids.ravel()
            subsetids = np.unique(self.subset_data.ids)
            self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True)
        self._check_possible_opt()
        self._change_placement()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWLinearProjection.ReplotRequest:
            self.__replot_requested = False
            self._setup_plot()
            self.commit()
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected),
             list(self.model_other)])
        super().closeContext()

    def _initialize(self, data):
        # Initialize the GUI controls from data's domain.
        vars = [
            v for v in chain(data.domain.metas, data.domain.attributes)
            if v.is_continuous
        ]
        self.model_other[:] = vars[3:]
        self.model_selected[:] = vars[:3]

    def prepare_plot_data(self, variables):
        def projection(variables):
            if set(self.projection.domain.attributes).issuperset(variables):
                axes = self.projection[:2, variables].X
            elif set(f.name
                     for f in self.projection.domain.attributes).issuperset(
                         f.name for f in variables):
                axes = self.projection[:2, [f.name for f in variables]].X
            else:
                self.Error.proj_and_domain_match()
                axes = None
            return axes

        def get_axes(variables):
            self.Error.proj_and_domain_match.clear()
            axes = None
            if self.placement == self.Placement.Circular:
                axes = LinProj.defaultaxes(len(variables))
            elif self.placement == self.Placement.LDA:
                axes = self._get_lda(self.data, variables)
            elif self.placement == self.Placement.Projection and self.projection:
                axes = projection(variables)
            return axes

        coords = [
            column_data(self.data, var, dtype=float) for var in variables
        ]
        coords = np.vstack(coords)
        p, N = coords.shape
        assert N == len(self.data), p == len(variables)

        axes = get_axes(variables)
        if axes is None:
            return None, None, None
        assert axes.shape == (2, p)

        valid_mask = ~np.isnan(coords).any(axis=0)
        coords = coords[:, valid_mask]

        X, Y = np.dot(axes, coords)
        if X.size and Y.size:
            X = normalized(X)
            Y = normalized(Y)

        return valid_mask, np.stack((X, Y), axis=1), axes.T

    def _setup_plot(self):
        self._clear_plot()
        if self.data is None:
            return
        self.__replot_requested = False
        names = get_unique_names([
            v.name
            for v in chain(self.data.domain.variables, self.data.domain.metas)
        ], [
            "{}-x".format(self.Variable_name[self.placement]), "{}-y".format(
                self.Variable_name[self.placement])
        ])
        self.variable_x = ContinuousVariable(names[0])
        self.variable_y = ContinuousVariable(names[1])
        if self.placement in [self.Placement.Circular, self.Placement.LDA]:
            variables = list(self.model_selected)
        elif self.placement == self.Placement.Projection:
            variables = self.model_selected[:] + self.model_other[:]
        elif self.placement == self.Placement.PCA:
            variables = [
                var for var in self.data.domain.attributes if var.is_continuous
            ]
        if not variables:
            self.graph.new_data(None)
            return
        if self.placement == self.Placement.PCA:
            valid_mask, ec, axes = self._get_pca()
            variables = self._pca.orig_domain.attributes
        else:
            valid_mask, ec, axes = self.prepare_plot_data(variables)

        self.plotdata.variables = variables
        self.plotdata.valid_mask = valid_mask
        self.plotdata.embedding_coords = ec
        self.plotdata.axes = axes
        if any(e is None for e in (valid_mask, ec, axes)):
            return

        if not sum(valid_mask):
            self.Error.no_valid_data()
            self.graph.new_data(None, None)
            return
        self.Error.no_valid_data.clear()

        self._anchor_circle(variables=variables)
        self._plot()

    def _plot(self):
        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(attributes=domain.attributes,
                        class_vars=domain.class_vars,
                        metas=new_metas)
        valid_mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[valid_mask] = self.plotdata.embedding_coords
        self.plotdata.data = data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = data[self._subset_mask & valid_mask]\
            if self._subset_mask is not None and len(self._subset_mask) else None
        self.plotdata.data = data
        self.graph.new_data(data[valid_mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, False)

    def _get_lda(self, data, variables):
        domain = Domain(attributes=variables,
                        class_vars=data.domain.class_vars)
        data = data.transform(domain)
        lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
        lda.fit(data.X, data.Y)
        scalings = lda.scalings_[:, :2].T
        if scalings.shape == (1, 1):
            scalings = np.array([[1.], [0.]])
        return scalings

    def _get_pca(self):
        data = self.data
        MAX_COMPONENTS = 2
        ncomponents = 2
        DECOMPOSITIONS = [PCA]  # TruncatedSVD
        cls = DECOMPOSITIONS[0]
        pca_projector = cls(n_components=MAX_COMPONENTS)
        pca_projector.component = ncomponents
        pca_projector.preprocessors = cls.preprocessors + [Normalize()]

        pca = pca_projector(data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min(
            [np.abs(np.min(coords, axis=0)),
             np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes

    def _update_graph(self, reset_view=False):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view)

    def update_density(self):
        self._update_graph(reset_view=False)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection = np.zeros(len(self.data), dtype=np.uint8)
            self._selection[self.plotdata.valid_mask] = self.graph.selection
            self.selection_indices = self._selection.tolist()
        else:
            self._selection = self.selection_indices = None
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        def prepare_components():
            if self.placement in [self.Placement.Circular, self.Placement.LDA]:
                attrs = [a for a in self.model_selected[:]]
                axes = self.plotdata.axes
            elif self.placement == self.Placement.PCA:
                axes = self._pca.components_.T
                attrs = [a for a in self._pca.orig_domain.attributes]
            if self.placement != self.Placement.Projection:
                domain = Domain([
                    ContinuousVariable(a.name, compute_value=lambda _: None)
                    for a in attrs
                ],
                                metas=[StringVariable(name='component')])
                metas = np.array([[
                    "{}{}".format(self.Component_name[self.placement], i + 1)
                    for i in range(axes.shape[1])
                ]],
                                 dtype=object).T
                components = Table(domain, axes.T, metas=metas)
                components.name = 'components'
            else:
                components = self.projection
            return components

        selected = annotated = components = None
        if self.data is not None and self.plotdata.data is not None:
            components = prepare_components()

            graph = self.graph
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
            else [False * len(mask)]

            selection = np.array(
                [], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            name = self.data.name
            data = self.plotdata.data
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes

            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        def projection_name():
            name = ("Circular Placement", "Linear Discriminant Analysis",
                    "Principal Component Analysis", "Input projection")
            return name[self.placement]

        caption = report.render_items_vert(
            (("Projection", projection_name()), ("Color",
                                                 name(self.graph.attr_color)),
             ("Label", name(self.graph.attr_label)),
             ("Shape", name(self.graph.attr_shape)),
             ("Size", name(self.graph.attr_size)),
             ("Jittering", self.graph.jitter_size != 0
              and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index",
                                         "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
Exemplo n.º 11
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None  # np.array
        self.__replot_requested = False
        self._new_plotdata()

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox)
        self.graph.hide_axes()

        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            self.controlArea, self, "Suggest features", self.vizrank_set_attrs
        )
        self.btn_vizrank.setSizePolicy(*SIZE_POLICY)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        pp_box = g.point_properties_box(self.controlArea)
        pp_box.setSizePolicy(*SIZE_POLICY)
        self.models = g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)
        g.add_widget(g.JitterSizeSlider, box)

        g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box)

        zoom_select = self.graph.box_zoom_select(self.controlArea)
        zoom_select.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(
            self.controlArea,
            self,
            "auto_commit",
            "Send Selection",
            auto_label="Send Automatically",
        )

        self.graph.zoom_actions(self)

        self._circle = QGraphicsEllipseItem()
        self._circle.setRect(QRectF(-1.0, -1.0, 2.0, 2.0))
        self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2))
Exemplo n.º 12
0
class OWRadviz(OWProjectionWidget):
    name = "Radviz"
    description = "Display Radviz projection"
    icon = "icons/Radviz.svg"
    priority = 241
    keywords = ["viz"]

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    settings_version = 2
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})
    auto_commit = settings.Setting(True)

    vizrank = settings.SettingProvider(RadvizVizRank)
    graph = settings.SettingProvider(OWRadvizGraph)
    graph_name = "graph.plot_widget.plotItem"

    ReplotRequest = QEvent.registerEventType()

    class Information(OWProjectionWidget.Information):
        sql_sampled_data = widget.Msg("Data has been sampled")

    class Warning(OWProjectionWidget.Warning):
        no_features = widget.Msg("At least 2 features have to be chosen")
        invalid_embedding = widget.Msg("No projection for selected features")

    class Error(OWProjectionWidget.Error):
        sparse_data = widget.Msg("Sparse data is not supported")
        no_features = widget.Msg(
            "At least 3 numeric or categorical variables are required")
        no_instances = widget.Msg("At least 2 data instances are required")

    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self.subset_indices = None
        self._embedding_coords = None
        self._rand_indices = None

        self.__replot_requested = False

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box)
        box.layout().addWidget(self.graph.plot_widget)

        self.variables_selection = VariablesSelection()
        self.model_selected = selected = VariableListModel(enable_dnd=True)
        self.model_other = other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, selected, other, self.controlArea)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            None, self, "Suggest features", self.vizrank_set_attrs)
        # Todo: this button introduces some margin at the bottom?!
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)

        g = self.graph.gui
        g.point_properties_box(self.controlArea)
        g.effects_box(self.controlArea)
        g.plot_properties_box(self.controlArea)

        self.graph.box_zoom_select(self.controlArea)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        self.graph.view_box.started.connect(self._randomize_indices)
        self.graph.view_box.moved.connect(self._manual_move)
        self.graph.view_box.finished.connect(self._finish_manual_move)

    def vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [v for v in self.model_other if v not in attrs]

    def update_colors(self):
        self._vizrank_color_change()
        self.cb_class_density.setEnabled(self.can_draw_density())

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest),
                                   Qt.LowEventPriority - 10)

    def _vizrank_color_change(self):
        is_enabled = self.data is not None and not self.data.is_sparse() and \
            len(self.model_other) + len(self.model_selected) > 3 and \
            len(self.data[self.valid_data]) > 1 and \
            np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.btn_vizrank.setEnabled(
            is_enabled and self.attr_color is not None and not np.isnan(
                self.data.get_column_view(
                    self.attr_color)[0].astype(float)).all())
        self.vizrank.initialize()

    def clear(self):
        self.data = None
        self.valid_data = None
        self._embedding_coords = None
        self._rand_indices = None
        self.model_selected.clear()
        self.model_other.clear()

        self.graph.set_attributes(())
        self.graph.set_points(None)
        self.graph.update_coordinates()
        self.graph.clear()

    @Inputs.data
    def set_data(self, data):
        self.clear_messages()
        self.btn_vizrank.setEnabled(False)
        self.closeContext()
        self.clear()
        self.data = data
        self._check_data()
        self.init_attr_values()
        self.openContext(self.data)
        if self.data is not None:
            self.model_selected[:], self.model_other[:] = self._load_settings()

    def _check_data(self):
        if self.data is not None:
            domain = self.data.domain
            if self.data.is_sparse():
                self.Error.sparse_data()
                self.data = None
            elif isinstance(self.data, SqlTable):
                if self.data.approx_len() < 4000:
                    self.data = Table(self.data)
                else:
                    self.Information.sql_sampled_data()
                    data_sample = self.data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    self.data = Table(data_sample)
            elif len(self.data) < 2:
                self.Error.no_instances()
                self.data = None
            elif len([
                    v for v in domain.variables + domain.metas
                    if v.is_primitive()
            ]) < 3:
                self.Error.no_features()
                self.data = None

    def _load_settings(self):
        domain = self.data.domain
        variables = [
            v for v in domain.attributes + domain.metas if v.is_primitive()
        ]
        self.model_selected[:] = variables[:5]
        self.model_other[:] = variables[5:] + list(domain.class_vars)

        state = VariablesSelection.encode_var_state(
            [list(self.model_selected),
             list(self.model_other)])
        state = {key: (ind, np.inf) for key, (ind, _) in state.items()}
        state.update(self.variable_state)
        return VariablesSelection.decode_var_state(
            state, [list(self.model_selected),
                    list(self.model_other)])

    @Inputs.data_subset
    def set_subset_data(self, subset):
        self.subset_data = subset
        self.subset_indices = {e.id for e in subset} \
            if subset is not None else {}
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        self.setup_plot()
        self._vizrank_color_change()
        self.commit()

    def get_coordinates_data(self):
        ec = self._embedding_coords
        if ec is None or np.any(np.isnan(ec)):
            return None, None
        return ec[:, 0], ec[:, 1]

    def get_subset_mask(self):
        if self.subset_indices:
            return np.array([
                ex.id in self.subset_indices
                for ex in self.data[self.valid_data]
            ])

    def customEvent(self, event):
        if event.type() == OWRadviz.ReplotRequest:
            self.__replot_requested = False
            self.setup_plot()
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected),
             list(self.model_other)])
        super().closeContext()

    def setup_plot(self):
        if self.data is None:
            return
        self.__replot_requested = False

        self.clear_messages()
        if len(self.model_selected) < 2:
            self.Warning.no_features()
            self.graph.clear()
            return

        r = radviz(self.data, self.model_selected)
        self._embedding_coords = r[0]
        self.graph.set_points(r[1])
        self.valid_data = r[2]
        if self._embedding_coords is None or \
                np.any(np.isnan(self._embedding_coords)):
            self.Warning.invalid_embedding()
        self.graph.reset_graph()

    def _randomize_indices(self):
        n = len(self._embedding_coords)
        if n > MAX_POINTS:
            self._rand_indices = np.random.choice(n, MAX_POINTS, replace=False)
            self._rand_indices = sorted(self._rand_indices)

    def _manual_move(self):
        self.__replot_requested = False

        res = radviz(self.data, self.model_selected, self.graph.get_points())
        self._embedding_coords = res[0]
        if self._rand_indices is not None:
            # save widget state
            selection = self.graph.selection
            valid_data = self.valid_data.copy()
            data = self.data.copy()
            ec = self._embedding_coords.copy()

            # plot subset
            self.__plot_random_subset(selection)

            # restore widget state
            self.graph.selection = selection
            self.valid_data = valid_data
            self.data = data
            self._embedding_coords = ec
        else:
            self.graph.update_coordinates()

    def __plot_random_subset(self, selection):
        self._embedding_coords = self._embedding_coords[self._rand_indices]
        self.data = self.data[self._rand_indices]
        self.valid_data = self.valid_data[self._rand_indices]
        self.graph.reset_graph()
        if selection is not None:
            self.graph.selection = selection[self._rand_indices]
            self.graph.update_selection_colors()

    def _finish_manual_move(self):
        if self._rand_indices is not None:
            selection = self.graph.selection
            self.graph.reset_graph()
            if selection is not None:
                self.graph.selection = selection
                self.graph.select_by_index(self.graph.get_selection())

    def selection_changed(self):
        self.commit()

    def commit(self):
        selected = annotated = components = None
        if self.data is not None and np.sum(self.valid_data):
            name = self.data.name
            domain = self.data.domain
            metas = domain.metas + (self.variable_x, self.variable_y)
            domain = Domain(domain.attributes, domain.class_vars, metas)
            embedding_coords = np.zeros((len(self.data), 2), dtype=np.float)
            embedding_coords[self.valid_data] = self._embedding_coords

            data = self.data.transform(domain)
            data[:, self.variable_x] = embedding_coords[:, 0][:, None]
            data[:, self.variable_y] = embedding_coords[:, 1][:, None]

            selection = self.graph.get_selection()
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes
            if self.graph.selection is not None and \
                    np.max(self.graph.selection) > 1:
                annotated = create_groups_table(data, self.graph.selection)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

            points = self.graph.get_points()
            comp_domain = Domain(points[:, 2],
                                 metas=[StringVariable(name='component')])

            metas = np.array([["RX"], ["RY"], ["angle"]])
            angle = np.arctan2(np.array(points[:, 1].T, dtype=float),
                               np.array(points[:, 0].T, dtype=float))
            components = Table.from_numpy(comp_domain,
                                          X=np.row_stack(
                                              (points[:, :2].T, angle)),
                                          metas=metas)
            components.name = name + ": components"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        caption = report.render_items_vert(
            (("Color", name(self.attr_color)), ("Label",
                                                name(self.attr_label)),
             ("Shape", name(self.attr_shape)), ("Size", name(self.attr_size)),
             ("Jittering", self.graph.jitter_size != 0
              and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    @classmethod
    def migrate_context(cls, context, version):
        if version < 3:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 13
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.projection = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None
        self.__replot_requested = False
        self.n_cont_var = 0
        #: Remember the saved state to restore
        self.__pending_selection_restore = self.selection_indices
        self.selection_indices = None

        self.variable_x = None
        self.variable_y = None

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWLinProjGraph(self, box, "Plot", view_box=LinProjInteractiveViewBox)
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            self.controlArea, self, "Suggest Features", self._vizrank)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        box = gui.widgetBox(
            self.controlArea, "Placement", sizePolicy=SIZE_POLICY)
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=["Circular Placement",
                       "Linear Discriminant Analysis",
                       "Principal Component Analysis",
                       "Use input projection"],
            callback=self._change_placement
        )

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        box = g.point_properties_box(self.controlArea)
        self.models = g.points_models
        g.add_widget(g.JitterSizeSlider, box)
        box.setSizePolicy(*SIZE_POLICY)

        box = gui.widgetBox(self.controlArea, "Hide axes", sizePolicy=SIZE_POLICY)
        self.rslider = gui.hSlider(
            box, self, "radius", minValue=0, maxValue=100,
            step=5, label="Radius", createLabel=False, ticks=True,
            callback=self.update_radius)
        self.rslider.setTickInterval(0)
        self.rslider.setPageStep(10)

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)

        g.add_widgets([g.ShowLegend,
                       g.ToolTipShowsAll,
                       g.ClassDensity,
                       g.LabelOnlySelected], box)

        box = self.graph.box_zoom_select(self.controlArea)
        box.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)
        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection",
                        auto_label="Send Automatically")
        self.graph.zoom_actions(self)

        self._new_plotdata()
        self._change_placement()
        self.graph.jitter_continuous = True
Exemplo n.º 14
0
class OWLinearProjection(widget.OWWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240

    selection_indices = settings.Setting(None, schema_only=True)

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)
        projection = Input("Projection", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    Placement = Enum("Placement",
                     dict(Circular=0,
                          LDA=1,
                          PCA=2,
                          Projection=3),
                     type=int,
                     qualname="OWLinearProjection.Placement")

    Component_name = {Placement.Circular: "C", Placement.LDA: "LD", Placement.PCA: "PC"}
    Variable_name = {Placement.Circular: "circular",
                     Placement.LDA: "lda",
                     Placement.PCA: "pca",
                     Placement.Projection: "projection"}

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    settings_version = 3
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})
    placement = settings.Setting(Placement.Circular)
    radius = settings.Setting(0)
    auto_commit = settings.Setting(True)

    resolution = 256

    graph = settings.SettingProvider(OWLinProjGraph)
    ReplotRequest = QEvent.registerEventType()
    vizrank = settings.SettingProvider(LinearProjectionVizRank)
    graph_name = "graph.plot_widget.plotItem"

    class Warning(widget.OWWidget.Warning):
        no_cont_features = widget.Msg("Plotting requires numeric features")
        not_enough_components = widget.Msg("Input projection has less than 2 components")
        trivial_components = widget.Msg(
            "All components of the PCA are trivial (explain 0 variance). "
            "Input data is constant (or near constant).")

    class Error(widget.OWWidget.Error):
        proj_and_domain_match = widget.Msg("Projection and Data domains do not match")
        no_valid_data = widget.Msg("No projection due to invalid data")

    def __init__(self):
        super().__init__()

        self.data = None
        self.projection = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None
        self.__replot_requested = False
        self.n_cont_var = 0
        #: Remember the saved state to restore
        self.__pending_selection_restore = self.selection_indices
        self.selection_indices = None

        self.variable_x = None
        self.variable_y = None

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWLinProjGraph(self, box, "Plot", view_box=LinProjInteractiveViewBox)
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            self.controlArea, self, "Suggest Features", self._vizrank)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        box = gui.widgetBox(
            self.controlArea, "Placement", sizePolicy=SIZE_POLICY)
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=["Circular Placement",
                       "Linear Discriminant Analysis",
                       "Principal Component Analysis",
                       "Use input projection"],
            callback=self._change_placement
        )

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        box = g.point_properties_box(self.controlArea)
        self.models = g.points_models
        g.add_widget(g.JitterSizeSlider, box)
        box.setSizePolicy(*SIZE_POLICY)

        box = gui.widgetBox(self.controlArea, "Hide axes", sizePolicy=SIZE_POLICY)
        self.rslider = gui.hSlider(
            box, self, "radius", minValue=0, maxValue=100,
            step=5, label="Radius", createLabel=False, ticks=True,
            callback=self.update_radius)
        self.rslider.setTickInterval(0)
        self.rslider.setPageStep(10)

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)

        g.add_widgets([g.ShowLegend,
                       g.ToolTipShowsAll,
                       g.ClassDensity,
                       g.LabelOnlySelected], box)

        box = self.graph.box_zoom_select(self.controlArea)
        box.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)
        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection",
                        auto_label="Send Automatically")
        self.graph.zoom_actions(self)

        self._new_plotdata()
        self._change_placement()
        self.graph.jitter_continuous = True

    def reset_graph_data(self):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph(reset_view=True)

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def _vizrank(self, attrs):
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.model_other if var not in attrs]

    def _change_placement(self):
        placement = self.placement
        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        self.variables_selection.set_enabled(placement in [p_Circular, p_LDA])
        self._vizrank_color_change()
        self.rslider.setEnabled(placement != p_Circular)
        self._setup_plot()
        self.commit()

    def _get_min_radius(self):
        return self.radius * np.max(np.linalg.norm(self.plotdata.axes, axis=1)) / 100 + 1e-5

    def update_radius(self):
        # Update the anchor/axes visibility
        pd = self.plotdata
        assert pd is not None
        if pd.hidecircle is None:
            return
        min_radius = self._get_min_radius()
        for anchor, item in zip(pd.axes, pd.axisitems):
            item.setVisible(np.linalg.norm(anchor) > min_radius)
        pd.hidecircle.setRect(QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

    def _new_plotdata(self):
        self.plotdata = namespace(
            valid_mask=None,
            embedding_coords=None,
            axisitems=[],
            axes=[],
            variables=[],
            data=None,
            hidecircle=None
        )

    def _anchor_circle(self, variables):
        # minimum visible anchor radius (radius)
        min_radius = self._get_min_radius()
        axisitems = []
        for anchor, var in zip(self.plotdata.axes, variables[:]):
            axitem = AnchorItem(line=QLineF(0, 0, *anchor), text=var.name,)
            axitem.setVisible(np.linalg.norm(anchor) > min_radius)
            axitem.setPen(pg.mkPen((100, 100, 100)))
            axitem.setArrowVisible(True)
            self.viewbox.addItem(axitem)
            axisitems.append(axitem)

        self.plotdata.axisitems = axisitems
        if self.placement == self.Placement.Circular:
            return

        hidecircle = QGraphicsEllipseItem()
        hidecircle.setRect(QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

        _pen = QPen(Qt.lightGray, 1)
        _pen.setCosmetic(True)
        hidecircle.setPen(_pen)

        self.viewbox.addItem(hidecircle)
        self.plotdata.hidecircle = hidecircle

    def update_colors(self):
        self._vizrank_color_change()

    def clear(self):
        # Clear/reset the widget state
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()
        self.selection_indices = None

    def _clear_plot(self):
        self.Warning.trivial_components.clear()
        for axisitem in self.plotdata.axisitems:
            self.viewbox.removeItem(axisitem)
        if self.plotdata.hidecircle:
            self.viewbox.removeItem(self.plotdata.hidecircle)
        self._new_plotdata()
        self.graph.hide_axes()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10)

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        is_enabled = False
        if self.data is None:
            self.btn_vizrank.setToolTip("There is no data.")
            return
        vars = [v for v in chain(self.data.domain.variables, self.data.domain.metas) if
                v.is_primitive and v is not self.graph.attr_color]
        self.n_cont_var = len(vars)
        if self.placement not in [self.Placement.Circular, self.Placement.LDA]:
            msg = "Suggest Features works only for Circular and " \
                  "Linear Discriminant Analysis Projection"
        elif self.graph.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.graph.attr_color.is_continuous and self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear Discriminant Analysis Projection " \
                  "when continuous color variable is selected."
        elif len(vars) < 3:
            msg = "Not enough available continuous variables"
        else:
            is_enabled = True
            msg = ""
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        self.vizrank.stop_and_reset(is_enabled)

    @Inputs.projection
    def set_projection(self, projection):
        self.Warning.not_enough_components.clear()
        if projection and len(projection) < 2:
            self.Warning.not_enough_components()
            projection = None
        if projection is not None:
            self.placement = self.Placement.Projection
        self.projection = projection

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset.

        Args:
            data (Orange.data.table): data instances
        """
        def sql(data):
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.information("Data has been sampled")
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()}

            self.openContext(data.domain)
            selected_keys = [key for key, (sind, _) in self.variable_state.items() if sind == 0]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            if self.__pending_selection_restore is not None:
                self._selection = np.array(self.__pending_selection_restore, dtype=int)
                self.__pending_selection_restore = None

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)])
            return selected, other

        self.closeContext()
        self.clear()
        self.Warning.no_cont_features.clear()
        self.information()
        data = sql(data)
        if data is not None:
            domain = data.domain
            vars = [var for var in chain(domain.variables, domain.metas) if var.is_continuous]
            if not len(vars):
                self.Warning.no_cont_features()
                data = None
        self.data = data
        self.init_attr_values()
        if data is not None and len(data):
            self._initialize(data)
            self.model_selected[:], self.model_other[:] = settings(data)
            self.vizrank.stop_and_reset()
            self.vizrank.attrs = self.data.domain.attributes if self.data is not None else []

    def _check_possible_opt(self):
        def set_enabled(is_enabled):
            for btn in self.radio_placement.buttons:
                btn.setEnabled(is_enabled)
            self.variables_selection.set_enabled(is_enabled)

        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        p_Input = self.Placement.Projection
        if self.data:
            set_enabled(True)
            domain = self.data.domain
            if not domain.has_discrete_class or len(domain.class_var.values) < 2:
                self.radio_placement.buttons[p_LDA].setEnabled(False)
                if self.placement == p_LDA:
                    self.placement = p_Circular
            if not self.projection:
                self.radio_placement.buttons[p_Input].setEnabled(False)
                if self.placement == p_Input:
                    self.placement = p_Circular
            self._setup_plot()
        else:
            self.graph.new_data(None)
            self.rslider.setEnabled(False)
            set_enabled(False)
        self.commit()

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None and self.subset_data is not None:
            # Update the plot's highlight items
            dataids = self.data.ids.ravel()
            subsetids = np.unique(self.subset_data.ids)
            self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True)
        self._check_possible_opt()
        self._change_placement()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWLinearProjection.ReplotRequest:
            self.__replot_requested = False
            self._setup_plot()
            self.commit()
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected), list(self.model_other)]
        )
        super().closeContext()

    def _initialize(self, data):
        # Initialize the GUI controls from data's domain.
        vars = [v for v in chain(data.domain.metas, data.domain.attributes) if v.is_continuous]
        self.model_other[:] = vars[3:]
        self.model_selected[:] = vars[:3]

    def prepare_plot_data(self, variables):
        def projection(variables):
            if set(self.projection.domain.attributes).issuperset(variables):
                axes = self.projection[:2, variables].X
            elif set(f.name for f in
                     self.projection.domain.attributes).issuperset(f.name for f in variables):
                axes = self.projection[:2, [f.name for f in variables]].X
            else:
                self.Error.proj_and_domain_match()
                axes = None
            return axes

        def get_axes(variables):
            self.Error.proj_and_domain_match.clear()
            axes = None
            if self.placement == self.Placement.Circular:
                axes = LinProj.defaultaxes(len(variables))
            elif self.placement == self.Placement.LDA:
                axes = self._get_lda(self.data, variables)
            elif self.placement == self.Placement.Projection and self.projection:
                axes = projection(variables)
            return axes

        coords = [column_data(self.data, var, dtype=float) for var in variables]
        coords = np.vstack(coords)
        p, N = coords.shape
        assert N == len(self.data), p == len(variables)

        axes = get_axes(variables)
        if axes is None:
            return None, None, None
        assert axes.shape == (2, p)

        valid_mask = ~np.isnan(coords).any(axis=0)
        coords = coords[:, valid_mask]

        X, Y = np.dot(axes, coords)
        if X.size and Y.size:
            X = normalized(X)
            Y = normalized(Y)

        return valid_mask, np.stack((X, Y), axis=1), axes.T

    def _setup_plot(self):
        self._clear_plot()
        if self.data is None:
            return
        self.__replot_requested = False
        names = get_unique_names([v.name for v in chain(self.data.domain.variables,
                                                        self.data.domain.metas)],
                                 ["{}-x".format(self.Variable_name[self.placement]),
                                  "{}-y".format(self.Variable_name[self.placement])])
        self.variable_x = ContinuousVariable(names[0])
        self.variable_y = ContinuousVariable(names[1])
        if self.placement in [self.Placement.Circular, self.Placement.LDA]:
            variables = list(self.model_selected)
        elif self.placement == self.Placement.Projection:
            variables = self.model_selected[:] + self.model_other[:]
        elif self.placement == self.Placement.PCA:
            variables = [var for var in self.data.domain.attributes if var.is_continuous]
        if not variables:
            self.graph.new_data(None)
            return
        if self.placement == self.Placement.PCA:
            valid_mask, ec, axes = self._get_pca()
            variables = self._pca.orig_domain.attributes
        else:
            valid_mask, ec, axes = self.prepare_plot_data(variables)

        self.plotdata.variables = variables
        self.plotdata.valid_mask = valid_mask
        self.plotdata.embedding_coords = ec
        self.plotdata.axes = axes
        if any(e is None for e in (valid_mask, ec, axes)):
            return

        if not sum(valid_mask):
            self.Error.no_valid_data()
            self.graph.new_data(None, None)
            return
        self.Error.no_valid_data.clear()

        self._anchor_circle(variables=variables)
        self._plot()

    def _plot(self):
        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas)
        valid_mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[valid_mask] = self.plotdata.embedding_coords
        self.plotdata.data = data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = data[self._subset_mask & valid_mask]\
            if self._subset_mask is not None and len(self._subset_mask) else None
        self.plotdata.data = data
        self.graph.new_data(data[valid_mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, False)

    def _get_lda(self, data, variables):
        domain = Domain(attributes=variables, class_vars=data.domain.class_vars)
        data = data.transform(domain)
        lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
        lda.fit(data.X, data.Y)
        scalings = lda.scalings_[:, :2].T
        if scalings.shape == (1, 1):
            scalings = np.array([[1.], [0.]])
        return scalings

    def _get_pca(self):
        data = self.data
        MAX_COMPONENTS = 2
        ncomponents = 2
        DECOMPOSITIONS = [PCA]  # TruncatedSVD
        cls = DECOMPOSITIONS[0]
        pca_projector = cls(n_components=MAX_COMPONENTS)
        pca_projector.component = ncomponents
        pca_projector.preprocessors = cls.preprocessors + [Normalize()]

        pca = pca_projector(data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min([np.abs(np.min(coords, axis=0)), np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes

    def _update_graph(self, reset_view=False):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view)

    def update_density(self):
        self._update_graph(reset_view=False)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection = np.zeros(len(self.data), dtype=np.uint8)
            self._selection[self.plotdata.valid_mask] = self.graph.selection
            self.selection_indices = self._selection.tolist()
        else:
            self._selection = self.selection_indices = None
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        def prepare_components():
            if self.placement in [self.Placement.Circular, self.Placement.LDA]:
                attrs = [a for a in self.model_selected[:]]
                axes = self.plotdata.axes
            elif self.placement == self.Placement.PCA:
                axes = self._pca.components_.T
                attrs = [a for a in self._pca.orig_domain.attributes]
            if self.placement != self.Placement.Projection:
                domain = Domain([ContinuousVariable(a.name, compute_value=lambda _: None)
                                 for a in attrs],
                                metas=[StringVariable(name='component')])
                metas = np.array([["{}{}".format(self.Component_name[self.placement], i + 1)
                                   for i in range(axes.shape[1])]],
                                 dtype=object).T
                components = Table(domain, axes.T, metas=metas)
                components.name = 'components'
            else:
                components = self.projection
            return components

        selected = annotated = components = None
        if self.data is not None and self.plotdata.data is not None:
            components = prepare_components()

            graph = self.graph
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
            else [False * len(mask)]

            selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            name = self.data.name
            data = self.plotdata.data
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes

            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        def projection_name():
            name = ("Circular Placement",
                    "Linear Discriminant Analysis",
                    "Principal Component Analysis",
                    "Input projection")
            return name[self.placement]

        caption = report.render_items_vert((
            ("Projection", projection_name()),
            ("Color", name(self.graph.attr_color)),
            ("Label", name(self.graph.attr_label)),
            ("Shape", name(self.graph.attr_shape)),
            ("Size", name(self.graph.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index", "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
Exemplo n.º 15
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None  # np.array
        self.__replot_requested = False
        self._new_plotdata()

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox)
        self.graph.hide_axes()

        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            self.controlArea, self, "Suggest features", self.vizrank_set_attrs)
        self.btn_vizrank.setSizePolicy(*SIZE_POLICY)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        pp_box = g.point_properties_box(self.controlArea)
        pp_box.setSizePolicy(*SIZE_POLICY)
        self.models = g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)
        g.add_widget(g.JitterSizeSlider, box)

        g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box)

        zoom_select = self.graph.box_zoom_select(self.controlArea)
        zoom_select.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection",
                        auto_label="Send Automatically")

        self.graph.zoom_actions(self)

        self._circle = QGraphicsEllipseItem()
        self._circle.setRect(QRectF(-1., -1., 2., 2.))
        self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2))
Exemplo n.º 16
0
class OWRadviz(widget.OWWidget):
    name = "Radviz"
    description = "Radviz"

    icon = "icons/Radviz.svg"
    priority = 240

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    settings_version = 1
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})

    auto_commit = settings.Setting(True)
    graph = settings.SettingProvider(OWRadvizGraph)
    vizrank = settings.SettingProvider(RadvizVizRank)

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    ReplotRequest = QEvent.registerEventType()

    graph_name = "graph.plot_widget.plotItem"

    class Information(widget.OWWidget.Information):
        sql_sampled_data = widget.Msg("Data has been sampled")

    class Warning(widget.OWWidget.Warning):
        no_features = widget.Msg("At least 2 features have to be chosen")

    class Error(widget.OWWidget.Error):
        sparse_data = widget.Msg("Sparse data is not supported")
        no_features = widget.Msg(
            "At least 3 numeric or categorical variables are required"
        )
        no_instances = widget.Msg("At least 2 data instances are required")

    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None  # np.array
        self.__replot_requested = False
        self._new_plotdata()

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox)
        self.graph.hide_axes()

        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            self.controlArea, self, "Suggest features", self.vizrank_set_attrs
        )
        self.btn_vizrank.setSizePolicy(*SIZE_POLICY)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        pp_box = g.point_properties_box(self.controlArea)
        pp_box.setSizePolicy(*SIZE_POLICY)
        self.models = g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)
        g.add_widget(g.JitterSizeSlider, box)

        g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box)

        zoom_select = self.graph.box_zoom_select(self.controlArea)
        zoom_select.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(
            self.controlArea,
            self,
            "auto_commit",
            "Send Selection",
            auto_label="Send Automatically",
        )

        self.graph.zoom_actions(self)

        self._circle = QGraphicsEllipseItem()
        self._circle.setRect(QRectF(-1.0, -1.0, 2.0, 2.0))
        self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2))

    def resizeEvent(self, event):
        self._update_points_labels()

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [v for v in self.model_other if v not in attrs]

    def _new_plotdata(self):
        self.plotdata = namespace(
            valid_mask=None,
            embedding_coords=None,
            points=None,
            arcarrows=[],
            point_labels=[],
            rand=None,
            data=None,
        )

    def update_colors(self):
        self._vizrank_color_change()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def sizeHint(self):
        return QSize(800, 500)

    def clear(self):
        """
        Clear/reset the widget state
        """
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()

    def _clear_plot(self):
        self._new_plotdata()
        self.graph.plot_widget.clear()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(
                self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10
            )

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        attr_color = self.graph.attr_color
        is_enabled = (
            self.data is not None
            and not self.data.is_sparse()
            and (len(self.model_other) + len(self.model_selected)) > 3
            and len(self.data) > 1
        )
        self.btn_vizrank.setEnabled(
            is_enabled
            and attr_color is not None
            and not np.isnan(
                self.data.get_column_view(attr_color)[0].astype(float)
            ).all()
        )
        self.vizrank.initialize()

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset and check if data is valid.

        Args:
            data (Orange.data.table): data instances
        """

        def sql(data):
            self.Information.sql_sampled_data.clear()
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.Information.sql_sampled_data()
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {
                key: (source_ind, np.inf) for key, (source_ind, _) in state.items()
            }

            self.openContext(data.domain)
            selected_keys = [
                key for key, (sind, _) in self.variable_state.items() if sind == 0
            ]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)]
            )
            return selected, other

        def is_sparse(data):
            if data.is_sparse():
                self.Error.sparse_data()
                data = None
            return data

        def are_features(data):
            domain = data.domain
            vars = [
                var
                for var in chain(domain.class_vars, domain.metas, domain.attributes)
                if var.is_primitive()
            ]
            if len(vars) < 3:
                self.Error.no_features()
                data = None
            return data

        def are_instances(data):
            if len(data) < 2:
                self.Error.no_instances()
                data = None
            return data

        self.clear_messages()
        self.btn_vizrank.setEnabled(False)
        self.closeContext()
        self.clear()
        self.information()
        self.Error.clear()
        for f in [sql, is_sparse, are_features, are_instances]:
            if data is None:
                break
            data = f(data)

        if data is not None:
            self.data = data
            self.init_attr_values()
            domain = data.domain
            vars = [
                v for v in chain(domain.metas, domain.attributes) if v.is_primitive()
            ]
            self.model_selected[:] = vars[:5]
            self.model_other[:] = vars[5:] + list(domain.class_vars)
            self.model_selected[:], self.model_other[:] = settings(data)
            self._selection = np.zeros(len(data), dtype=np.uint8)
            self.invalidate_plot()
        else:
            self.data = None

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None:
            self._clear_plot()
            if self.subset_data is not None and self._subset_mask is None:
                dataids = self.data.ids.ravel()
                subsetids = np.unique(self.subset_data.ids)
                self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True)
            self.setup_plot(reset_view=True)
            self.cb_class_density.setEnabled(self.graph.can_draw_density())
        else:
            self.init_attr_values()
            self.graph.new_data(None)
        self._vizrank_color_change()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWRadviz.ReplotRequest:
            self.__replot_requested = False
            self._clear_plot()
            self.setup_plot(reset_view=True)
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected), list(self.model_other)]
        )
        super().closeContext()

    def prepare_radviz_data(self, variables):
        ec, points, valid_mask = radviz(self.data, variables, self.plotdata.points)
        self.plotdata.embedding_coords = ec
        self.plotdata.points = points
        self.plotdata.valid_mask = valid_mask

    def setup_plot(self, reset_view=True):
        if self.data is None:
            return
        self.graph.jitter_continuous = True
        self.__replot_requested = False

        variables = list(self.model_selected)
        if len(variables) < 2:
            self.Warning.no_features()
            self.graph.new_data(None)
            return

        self.Warning.clear()
        self.prepare_radviz_data(variables)

        if self.plotdata.embedding_coords is None:
            return

        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(
            attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas
        )
        mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[mask] = self.plotdata.embedding_coords
        data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = (
            data[self._subset_mask & mask]
            if self._subset_mask is not None and len(self._subset_mask)
            else None
        )
        self.plotdata.data = data
        self.graph.new_data(data[mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[self.plotdata.valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1]
        )
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def randomize_indices(self):
        ec = self.plotdata.embedding_coords
        self.plotdata.rand = (
            np.random.choice(len(ec), MAX_POINTS, replace=False)
            if len(ec) > MAX_POINTS
            else None
        )

    def manual_move(self):
        self.__replot_requested = False

        if self.plotdata.rand is not None:
            rand = self.plotdata.rand
            valid_mask = self.plotdata.valid_mask
            data = self.data[valid_mask]
            selection = self._selection[valid_mask]
            selection = selection[rand]
            ec, _, valid_mask = radviz(
                data, list(self.model_selected), self.plotdata.points
            )
            assert sum(valid_mask) == len(data)
            data = data[rand]
            ec = ec[rand]
            data_x = data.X
            data_y = data.Y
            data_metas = data.metas
        else:
            self.prepare_radviz_data(list(self.model_selected))
            ec = self.plotdata.embedding_coords
            valid_mask = self.plotdata.valid_mask
            data_x = self.data.X[valid_mask]
            data_y = self.data.Y[valid_mask]
            data_metas = self.data.metas[valid_mask]
            selection = self._selection[valid_mask]

        attributes = (self.variable_x, self.variable_y) + self.data.domain.attributes
        domain = Domain(
            attributes=attributes,
            class_vars=self.data.domain.class_vars,
            metas=self.data.domain.metas,
        )
        data = Table.from_numpy(
            domain, X=np.hstack((ec, data_x)), Y=data_y, metas=data_metas
        )
        self.graph.new_data(data, None)
        self.graph.selection = selection
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=True)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1]
        )
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def _update_points_labels(self):
        if self.plotdata.points is None:
            return
        for point_label in self.plotdata.point_labels:
            self.graph.plot_widget.removeItem(point_label)
        self.plotdata.point_labels = []
        sx, sy = self.graph.view_box.viewPixelSize()

        for row in self.plotdata.points:
            ti = TextItem()
            metrics = QFontMetrics(ti.textItem.font())
            text_width = ((RANGE.width()) / 2.0 - np.abs(row[0])) / sx
            name = row[2].name
            ti.setText(name)
            ti.setTextWidth(text_width)
            ti.setColor(QColor(0, 0, 0))
            br = ti.boundingRect()
            width = (
                metrics.width(name) if metrics.width(name) < br.width() else br.width()
            )
            width = sx * (width + 5)
            height = sy * br.height()
            ti.setPos(row[0] - (row[0] < 0) * width, row[1] + (row[1] > 0) * height)
            self.plotdata.point_labels.append(ti)
            self.graph.plot_widget.addItem(ti)

    def _update_jitter(self):
        self.invalidate_plot()

    def reset_graph_data(self, *_):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph()

    def _update_graph(self, reset_view=True, **_):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)

    def update_density(self):
        self._update_graph(reset_view=True)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection[self.plotdata.valid_mask] = self.graph.selection
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        selected = annotated = components = None
        graph = self.graph
        if self.plotdata.data is not None:
            name = self.data.name
            data = self.plotdata.data
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = (
                graph.selection if graph.selection is not None else [False * len(mask)]
            )
            selection = (
                np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            )
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes
            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

            comp_domain = Domain(
                self.plotdata.points[:, 2], metas=[StringVariable(name="component")]
            )

            metas = np.array([["RX"], ["RY"], ["angle"]])
            angle = np.arctan2(
                np.array(self.plotdata.points[:, 1].T, dtype=float),
                np.array(self.plotdata.points[:, 0].T, dtype=float),
            )
            components = Table.from_numpy(
                comp_domain,
                X=np.row_stack((self.plotdata.points[:, :2].T, angle)),
                metas=metas,
            )
            components.name = name + ": components"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        caption = report.render_items_vert(
            (
                ("Color", name(self.graph.attr_color)),
                ("Label", name(self.graph.attr_label)),
                ("Shape", name(self.graph.attr_shape)),
                ("Size", name(self.graph.attr_size)),
                (
                    "Jittering",
                    self.graph.jitter_size != 0
                    and "{} %".format(self.graph.jitter_size),
                ),
            )
        )
        self.report_plot()
        if caption:
            self.report_caption(caption)
Exemplo n.º 17
0
 def closeContext(self):
     self.variable_state = VariablesSelection.encode_var_state(
         [list(self.model_selected), list(self.model_other)]
     )
     super().closeContext()
Exemplo n.º 18
0
class OWLinearProjection(OWAnchorProjectionWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    class Inputs(OWAnchorProjectionWidget.Inputs):
        projection_input = Input("Projection", Table)

    Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2, Projection=3),
                     type=int, qualname="OWLinearProjection.Placement")

    Component_name = {Placement.Circular: "C", Placement.LDA: "LD",
                      Placement.PCA: "PC"}
    Variable_name = {Placement.Circular: "circular",
                     Placement.LDA: "lda",
                     Placement.PCA: "pca",
                     Placement.Projection: "projection"}
    Projection_name = {Placement.Circular: "Circular Placement",
                       Placement.LDA: "Linear Discriminant Analysis",
                       Placement.PCA: "Principal Component Analysis",
                       Placement.Projection: "Use input projection"}

    settings_version = 4

    placement = Setting(Placement.Circular)
    selected_vars = ContextSetting([])
    vizrank = SettingProvider(LinearProjectionVizRank)
    GRAPH_CLASS = OWLinProjGraph
    graph = SettingProvider(OWLinProjGraph)

    class Warning(OWAnchorProjectionWidget.Warning):
        not_enough_comp = Msg("Input projection has less than two components")
        trivial_components = Msg(
            "All components of the PCA are trivial (explain zero variance). "
            "Input data is constant (or near constant).")

    class Error(OWAnchorProjectionWidget.Error):
        no_cont_features = Msg("Plotting requires numeric features")
        proj_and_domain_match = Msg("Projection and Data domains do not match")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.rowsInserted.connect(self.__model_selected_changed)
        self.model_selected.rowsRemoved.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            None, self, "Suggest Features", self.__vizrank_set_attrs)

        super().__init__()
        self.projection_input = None
        self.variables = None

    def _add_controls(self):
        self._add_controls_variables()
        self._add_controls_placement()
        super()._add_controls()
        self.graph.gui.add_control(
            self._effects_box, gui.hSlider, "Hide radius:", master=self.graph,
            value="hide_radius", minValue=0, maxValue=100, step=10,
            createLabel=False, callback=self.__radius_slider_changed
        )
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    def _add_controls_variables(self):
        self.variables_selection = VariablesSelection(
            self, self.model_selected, self.model_other, self.controlArea
        )
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank
        )

    def _add_controls_placement(self):
        box = gui.widgetBox(
            self.controlArea, True,
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)
        )
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=[self.Projection_name[x] for x in self.Placement],
            callback=self.__placement_radio_changed
        )

    @property
    def continuous_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas) if v.is_continuous]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.continuous_variables
                               if var not in attrs]

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var)) for var
                              in self.model_selected]
        self.projection = None
        self.variables = None
        self._check_options()
        self.setup_plot()
        self.commit()

    def __placement_radio_changed(self):
        self.variables_selection.set_enabled(
            self.placement in [self.Placement.Circular, self.Placement.LDA])
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)
        self.projection = None
        self.variables = None
        self._init_vizrank()
        self.setup_plot()
        self.commit()

    def __radius_slider_changed(self):
        self.graph.update_radius()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[attr] for attr in selected]
            self.model_other[:] = [d[attr.name] for attr in
                                   self.continuous_variables
                                   if attr.name not in selected]
        elif self.data is not None:
            self.model_selected[:] = self.continuous_variables[:3]
            self.model_other[:] = self.continuous_variables[3:]

        self._check_options()
        self._init_vizrank()

    def _check_options(self):
        buttons = self.radio_placement.buttons
        for btn in buttons:
            btn.setEnabled(True)
        if self.data is not None:
            has_discrete_class = self.data.domain.has_discrete_class
            if not has_discrete_class or len(np.unique(self.data.Y)) < 2:
                buttons[self.Placement.LDA].setEnabled(False)
                if self.placement == self.Placement.LDA:
                    self.placement = self.Placement.Circular
            if not self.projection_input:
                buttons[self.Placement.Projection].setEnabled(False)
                if self.placement == self.Placement.Projection:
                    self.placement = self.Placement.Circular

        self.variables_selection.set_enabled(
            self.placement in [self.Placement.Circular, self.Placement.LDA])
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)

    def _init_vizrank(self):
        is_enabled, msg = False, ""
        if self.data is None:
            msg = "There is no data."
        elif self.placement not in [self.Placement.Circular,
                                    self.Placement.LDA]:
            msg = "Suggest Features works only for Circular and " \
                  "Linear Discriminant Analysis Projection"
        elif self.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.attr_color.is_continuous and \
                self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear " \
                  "Discriminant Analysis Projection when " \
                  "continuous color variable is selected."
        elif len([v for v in self.continuous_variables
                  if v is not self.attr_color]) < 3:
            msg = "Not enough available continuous variables"
        elif len(self.data[self.valid_data]) < 2:
            msg = "Not enough valid data instances"
        else:
            is_enabled = not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all()
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if not len(self.continuous_variables):
                error(self.Error.no_cont_features)

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    @Inputs.projection_input
    def set_projection(self, projection):
        self.Warning.not_enough_comp.clear()
        if projection and len(projection) < 2:
            self.Warning.not_enough_comp()
            projection = None
        if projection is not None:
            self.placement = self.Placement.Projection
        self.projection_input = projection
        self._check_options()

    def get_embedding(self):
        self.valid_data = None
        if self.data is None or not self.variables:
            return None

        if self.placement == self.Placement.PCA:
            self.valid_data, ec, self.projection = self._get_pca()
            self.variables = self._pca.orig_domain.attributes
        else:
            self.valid_data, ec, self.projection = \
                self.prepare_projection_data(self.variables)

        self.Error.no_valid_data.clear()
        if self.valid_data is None or not sum(self.valid_data) or \
                self.projection is None or ec is None:
            self.Error.no_valid_data()
            return None

        embedding = np.zeros((len(self.data), 2), dtype=np.float)
        embedding[self.valid_data] = ec
        return embedding

    def prepare_projection_data(self, variables):
        def projection(_vars):
            attrs = self.projection_input.domain.attributes
            if set(attrs).issuperset(_vars):
                return self.projection_input[:2, _vars].X
            elif set(f.name for f in attrs).issuperset(f.name for f in _vars):
                return self.projection_input[:2, [f.name for f in _vars]].X
            else:
                self.Error.proj_and_domain_match()
                return None

        def get_axes(_vars):
            self.Error.proj_and_domain_match.clear()
            if self.placement == self.Placement.Circular:
                return LinProj.defaultaxes(len(_vars))
            elif self.placement == self.Placement.LDA:
                return self._get_lda(self.data, _vars)
            elif self.placement == self.Placement.Projection and \
                    self.projection_input is not None:
                return projection(_vars)
            else:
                return None

        coords = np.vstack(column_data(self.data, v, float) for v in variables)
        axes = get_axes(variables)
        if axes is None:
            return None, None, None

        valid_mask = ~np.isnan(coords).any(axis=0)
        X, Y = np.dot(axes, coords[:, valid_mask])
        if X.size and Y.size:
            X = normalized(X)
            Y = normalized(Y)
        return valid_mask, np.stack((X, Y), axis=1), axes.T

    def get_anchors(self):
        if self.projection is None:
            return None, None
        return self.projection, [v.name for v in self.variables]

    def setup_plot(self):
        self.init_projection_variables()
        super().setup_plot()

    def init_projection_variables(self):
        self.variables = None
        if self.data is None:
            return

        if self.placement in [self.Placement.Circular, self.Placement.LDA]:
            self.variables = self.model_selected[:]
        elif self.placement == self.Placement.Projection:
            self.variables = self.model_selected[:] + self.model_other[:]
        elif self.placement == self.Placement.PCA:
            self.variables = [var for var in self.data.domain.attributes
                              if var.is_continuous]

    def _get_lda(self, data, variables):
        data = data.transform(Domain(variables, data.domain.class_vars))
        lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
        lda.fit(data.X, data.Y)
        scalings = lda.scalings_[:, :2].T
        if scalings.shape == (1, 1):
            scalings = np.array([[1.], [0.]])
        return scalings

    def _get_pca(self):
        pca_projector = PCA(n_components=2)
        pca_projector.component = 2
        pca_projector.preprocessors = PCA.preprocessors + [Normalize()]

        pca = pca_projector(self.data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(self.data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min([np.abs(np.min(coords, axis=0)),
                             np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes

    def send_components(self):
        components = None
        if self.data is not None and self.valid_data is not None and \
                self.projection is not None:
            if self.placement in [self.Placement.Circular, self.Placement.LDA]:
                axes = self.projection
                attrs = self.model_selected
            elif self.placement == self.Placement.PCA:
                axes = self._pca.components_.T
                attrs = self._pca.orig_domain.attributes
            if self.placement != self.Placement.Projection:
                meta_attrs = [StringVariable(name='component')]
                metas = np.array(
                    [["{}{}".format(self.Component_name[self.placement], i + 1)
                      for i in range(axes.shape[1])]], dtype=object).T
                components = Table(Domain(attrs, metas=meta_attrs),
                                   axes.T, metas=metas)
                components.name = self.data.name
            else:
                components = self.projection_input
        self.Outputs.components.send(components)

    def _get_projection_variables(self):
        pn = self.Variable_name[self.placement]
        self.embedding_variables_names = ("{}-x".format(pn), "{}-y".format(pn))
        return super()._get_projection_variables()

    def _get_send_report_caption(self):
        def projection_name():
            return self.Projection_name[self.placement]

        return report.render_items_vert((
            ("Projection", projection_name()),
            ("Color", self._get_caption_var_name(self.attr_color)),
            ("Label", self._get_caption_var_name(self.attr_label)),
            ("Shape", self._get_caption_var_name(self.attr_shape)),
            ("Size", self._get_caption_var_name(self.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and
             "{} %".format(self.graph.jitter_size))))

    def clear(self):
        self.variables = None
        if self.model_selected:
            self.model_selected.clear()
        if self.model_other:
            self.model_other.clear()
        super().clear()

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph
        if version < 4:
            if "radius" in settings_:
                settings_["graph"]["hide_radius"] = settings_["radius"]
            if "selection_indices" in settings_ and \
                    settings_["selection_indices"] is not None:
                selection = settings_["selection_indices"]
                settings_["selection"] = [(i, 1) for i, selected in
                                          enumerate(selection) if selected]

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index", "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
        if version == 3:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]