Exemplo n.º 1
0
class OWLinearProjection(widget.OWWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    selection_indices = settings.Setting(None, schema_only=True)

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)
        projection = Input("Projection", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    Placement = Enum("Placement",
                     dict(Circular=0, LDA=1, PCA=2, Projection=3),
                     type=int,
                     qualname="OWLinearProjection.Placement")

    Component_name = {
        Placement.Circular: "C",
        Placement.LDA: "LD",
        Placement.PCA: "PC"
    }
    Variable_name = {
        Placement.Circular: "circular",
        Placement.LDA: "lda",
        Placement.PCA: "pca",
        Placement.Projection: "projection"
    }

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    settings_version = 3
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})
    placement = settings.Setting(Placement.Circular)
    radius = settings.Setting(0)
    auto_commit = settings.Setting(True)

    resolution = 256

    graph = settings.SettingProvider(OWLinProjGraph)
    ReplotRequest = QEvent.registerEventType()
    vizrank = settings.SettingProvider(LinearProjectionVizRank)
    graph_name = "graph.plot_widget.plotItem"

    class Warning(widget.OWWidget.Warning):
        no_cont_features = widget.Msg("Plotting requires numeric features")
        not_enough_components = widget.Msg(
            "Input projection has less than 2 components")
        trivial_components = widget.Msg(
            "All components of the PCA are trivial (explain 0 variance). "
            "Input data is constant (or near constant).")

    class Error(widget.OWWidget.Error):
        proj_and_domain_match = widget.Msg(
            "Projection and Data domains do not match")
        no_valid_data = widget.Msg("No projection due to invalid data")

    def __init__(self):
        super().__init__()

        self.data = None
        self.projection = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None
        self.__replot_requested = False
        self.n_cont_var = 0
        #: Remember the saved state to restore
        self.__pending_selection_restore = self.selection_indices
        self.selection_indices = None

        self.variable_x = None
        self.variable_y = None

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWLinProjGraph(self,
                                    box,
                                    "Plot",
                                    view_box=LinProjInteractiveViewBox)
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            self.controlArea, self, "Suggest Features", self._vizrank)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)

        box = gui.widgetBox(self.controlArea,
                            "Placement",
                            sizePolicy=SIZE_POLICY)
        self.radio_placement = gui.radioButtonsInBox(
            box,
            self,
            "placement",
            btnLabels=[
                "Circular Placement", "Linear Discriminant Analysis",
                "Principal Component Analysis", "Use input projection"
            ],
            callback=self._change_placement)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        box = g.point_properties_box(self.controlArea)
        self.models = g.points_models
        g.add_widget(g.JitterSizeSlider, box)
        box.setSizePolicy(*SIZE_POLICY)

        box = gui.widgetBox(self.controlArea,
                            "Hide axes",
                            sizePolicy=SIZE_POLICY)
        self.rslider = gui.hSlider(box,
                                   self,
                                   "radius",
                                   minValue=0,
                                   maxValue=100,
                                   step=5,
                                   label="Radius",
                                   createLabel=False,
                                   ticks=True,
                                   callback=self.update_radius)
        self.rslider.setTickInterval(0)
        self.rslider.setPageStep(10)

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)

        g.add_widgets([
            g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity,
            g.LabelOnlySelected
        ], box)

        box = self.graph.box_zoom_select(self.controlArea)
        box.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)
        gui.auto_commit(self.controlArea,
                        self,
                        "auto_commit",
                        "Send Selection",
                        auto_label="Send Automatically")
        self.graph.zoom_actions(self)

        self._new_plotdata()
        self._change_placement()
        self.graph.jitter_continuous = True

    def reset_graph_data(self):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph(reset_view=True)

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def _vizrank(self, attrs):
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [
            var for var in self.model_other if var not in attrs
        ]

    def _change_placement(self):
        placement = self.placement
        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        self.variables_selection.set_enabled(placement in [p_Circular, p_LDA])
        self._vizrank_color_change()
        self.rslider.setEnabled(placement != p_Circular)
        self._setup_plot()
        self.commit()

    def _get_min_radius(self):
        return self.radius * np.max(np.linalg.norm(self.plotdata.axes,
                                                   axis=1)) / 100 + 1e-5

    def update_radius(self):
        # Update the anchor/axes visibility
        pd = self.plotdata
        assert pd is not None
        if pd.hidecircle is None:
            return
        min_radius = self._get_min_radius()
        for anchor, item in zip(pd.axes, pd.axisitems):
            item.setVisible(np.linalg.norm(anchor) > min_radius)
        pd.hidecircle.setRect(
            QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

    def _new_plotdata(self):
        self.plotdata = namespace(valid_mask=None,
                                  embedding_coords=None,
                                  axisitems=[],
                                  axes=[],
                                  variables=[],
                                  data=None,
                                  hidecircle=None)

    def _anchor_circle(self, variables):
        # minimum visible anchor radius (radius)
        min_radius = self._get_min_radius()
        axisitems = []
        for anchor, var in zip(self.plotdata.axes, variables[:]):
            axitem = AnchorItem(
                line=QLineF(0, 0, *anchor),
                text=var.name,
            )
            axitem.setVisible(np.linalg.norm(anchor) > min_radius)
            axitem.setPen(pg.mkPen((100, 100, 100)))
            axitem.setArrowVisible(True)
            self.viewbox.addItem(axitem)
            axisitems.append(axitem)

        self.plotdata.axisitems = axisitems
        if self.placement == self.Placement.Circular:
            return

        hidecircle = QGraphicsEllipseItem()
        hidecircle.setRect(
            QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

        _pen = QPen(Qt.lightGray, 1)
        _pen.setCosmetic(True)
        hidecircle.setPen(_pen)

        self.viewbox.addItem(hidecircle)
        self.plotdata.hidecircle = hidecircle

    def update_colors(self):
        self._vizrank_color_change()

    def clear(self):
        # Clear/reset the widget state
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()
        self.selection_indices = None

    def _clear_plot(self):
        self.Warning.trivial_components.clear()
        for axisitem in self.plotdata.axisitems:
            self.viewbox.removeItem(axisitem)
        if self.plotdata.hidecircle:
            self.viewbox.removeItem(self.plotdata.hidecircle)
        self._new_plotdata()
        self.graph.hide_axes()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest),
                                   Qt.LowEventPriority - 10)

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        is_enabled = False
        if self.data is None:
            self.btn_vizrank.setToolTip("There is no data.")
            return
        vars = [
            v
            for v in chain(self.data.domain.variables, self.data.domain.metas)
            if v.is_primitive and v is not self.graph.attr_color
        ]
        self.n_cont_var = len(vars)
        if self.placement not in [self.Placement.Circular, self.Placement.LDA]:
            msg = "Suggest Features works only for Circular and " \
                  "Linear Discriminant Analysis Projection"
        elif self.graph.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.graph.attr_color.is_continuous and self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear Discriminant Analysis Projection " \
                  "when continuous color variable is selected."
        elif len(vars) < 3:
            msg = "Not enough available continuous variables"
        else:
            is_enabled = True
            msg = ""
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        self.vizrank.stop_and_reset(is_enabled)

    @Inputs.projection
    def set_projection(self, projection):
        self.Warning.not_enough_components.clear()
        if projection and len(projection) < 2:
            self.Warning.not_enough_components()
            projection = None
        if projection is not None:
            self.placement = self.Placement.Projection
        self.projection = projection

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset.

        Args:
            data (Orange.data.table): data instances
        """
        def sql(data):
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.information("Data has been sampled")
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected),
                 list(self.model_other)])
            state = {
                key: (source_ind, np.inf)
                for key, (source_ind, _) in state.items()
            }

            self.openContext(data.domain)
            selected_keys = [
                key for key, (sind, _) in self.variable_state.items()
                if sind == 0
            ]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            if self.__pending_selection_restore is not None:
                self._selection = np.array(self.__pending_selection_restore,
                                           dtype=int)
                self.__pending_selection_restore = None

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected),
                        list(self.model_other)])
            return selected, other

        self.closeContext()
        self.clear()
        self.Warning.no_cont_features.clear()
        self.information()
        data = sql(data)
        if data is not None:
            domain = data.domain
            vars = [
                var for var in chain(domain.variables, domain.metas)
                if var.is_continuous
            ]
            if not len(vars):
                self.Warning.no_cont_features()
                data = None
        self.data = data
        self.init_attr_values()
        if data is not None and len(data):
            self._initialize(data)
            self.model_selected[:], self.model_other[:] = settings(data)
            self.vizrank.stop_and_reset()
            self.vizrank.attrs = self.data.domain.attributes if self.data is not None else []

    def _check_possible_opt(self):
        def set_enabled(is_enabled):
            for btn in self.radio_placement.buttons:
                btn.setEnabled(is_enabled)
            self.variables_selection.set_enabled(is_enabled)

        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        p_Input = self.Placement.Projection
        if self.data:
            set_enabled(True)
            domain = self.data.domain
            if not domain.has_discrete_class or len(
                    domain.class_var.values) < 2:
                self.radio_placement.buttons[p_LDA].setEnabled(False)
                if self.placement == p_LDA:
                    self.placement = p_Circular
            if not self.projection:
                self.radio_placement.buttons[p_Input].setEnabled(False)
                if self.placement == p_Input:
                    self.placement = p_Circular
            self._setup_plot()
        else:
            self.graph.new_data(None)
            self.rslider.setEnabled(False)
            set_enabled(False)
        self.commit()

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None and self.subset_data is not None:
            # Update the plot's highlight items
            dataids = self.data.ids.ravel()
            subsetids = np.unique(self.subset_data.ids)
            self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True)
        self._check_possible_opt()
        self._change_placement()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWLinearProjection.ReplotRequest:
            self.__replot_requested = False
            self._setup_plot()
            self.commit()
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected),
             list(self.model_other)])
        super().closeContext()

    def _initialize(self, data):
        # Initialize the GUI controls from data's domain.
        vars = [
            v for v in chain(data.domain.metas, data.domain.attributes)
            if v.is_continuous
        ]
        self.model_other[:] = vars[3:]
        self.model_selected[:] = vars[:3]

    def prepare_plot_data(self, variables):
        def projection(variables):
            if set(self.projection.domain.attributes).issuperset(variables):
                axes = self.projection[:2, variables].X
            elif set(f.name
                     for f in self.projection.domain.attributes).issuperset(
                         f.name for f in variables):
                axes = self.projection[:2, [f.name for f in variables]].X
            else:
                self.Error.proj_and_domain_match()
                axes = None
            return axes

        def get_axes(variables):
            self.Error.proj_and_domain_match.clear()
            axes = None
            if self.placement == self.Placement.Circular:
                axes = LinProj.defaultaxes(len(variables))
            elif self.placement == self.Placement.LDA:
                axes = self._get_lda(self.data, variables)
            elif self.placement == self.Placement.Projection and self.projection:
                axes = projection(variables)
            return axes

        coords = [
            column_data(self.data, var, dtype=float) for var in variables
        ]
        coords = np.vstack(coords)
        p, N = coords.shape
        assert N == len(self.data), p == len(variables)

        axes = get_axes(variables)
        if axes is None:
            return None, None, None
        assert axes.shape == (2, p)

        valid_mask = ~np.isnan(coords).any(axis=0)
        coords = coords[:, valid_mask]

        X, Y = np.dot(axes, coords)
        if X.size and Y.size:
            X = normalized(X)
            Y = normalized(Y)

        return valid_mask, np.stack((X, Y), axis=1), axes.T

    def _setup_plot(self):
        self._clear_plot()
        if self.data is None:
            return
        self.__replot_requested = False
        names = get_unique_names([
            v.name
            for v in chain(self.data.domain.variables, self.data.domain.metas)
        ], [
            "{}-x".format(self.Variable_name[self.placement]), "{}-y".format(
                self.Variable_name[self.placement])
        ])
        self.variable_x = ContinuousVariable(names[0])
        self.variable_y = ContinuousVariable(names[1])
        if self.placement in [self.Placement.Circular, self.Placement.LDA]:
            variables = list(self.model_selected)
        elif self.placement == self.Placement.Projection:
            variables = self.model_selected[:] + self.model_other[:]
        elif self.placement == self.Placement.PCA:
            variables = [
                var for var in self.data.domain.attributes if var.is_continuous
            ]
        if not variables:
            self.graph.new_data(None)
            return
        if self.placement == self.Placement.PCA:
            valid_mask, ec, axes = self._get_pca()
            variables = self._pca.orig_domain.attributes
        else:
            valid_mask, ec, axes = self.prepare_plot_data(variables)

        self.plotdata.variables = variables
        self.plotdata.valid_mask = valid_mask
        self.plotdata.embedding_coords = ec
        self.plotdata.axes = axes
        if any(e is None for e in (valid_mask, ec, axes)):
            return

        if not sum(valid_mask):
            self.Error.no_valid_data()
            self.graph.new_data(None, None)
            return
        self.Error.no_valid_data.clear()

        self._anchor_circle(variables=variables)
        self._plot()

    def _plot(self):
        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(attributes=domain.attributes,
                        class_vars=domain.class_vars,
                        metas=new_metas)
        valid_mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[valid_mask] = self.plotdata.embedding_coords
        self.plotdata.data = data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = data[self._subset_mask & valid_mask]\
            if self._subset_mask is not None and len(self._subset_mask) else None
        self.plotdata.data = data
        self.graph.new_data(data[valid_mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, False)

    def _get_lda(self, data, variables):
        domain = Domain(attributes=variables,
                        class_vars=data.domain.class_vars)
        data = data.transform(domain)
        lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
        lda.fit(data.X, data.Y)
        scalings = lda.scalings_[:, :2].T
        if scalings.shape == (1, 1):
            scalings = np.array([[1.], [0.]])
        return scalings

    def _get_pca(self):
        data = self.data
        MAX_COMPONENTS = 2
        ncomponents = 2
        DECOMPOSITIONS = [PCA]  # TruncatedSVD
        cls = DECOMPOSITIONS[0]
        pca_projector = cls(n_components=MAX_COMPONENTS)
        pca_projector.component = ncomponents
        pca_projector.preprocessors = cls.preprocessors + [Normalize()]

        pca = pca_projector(data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min(
            [np.abs(np.min(coords, axis=0)),
             np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes

    def _update_graph(self, reset_view=False):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view)

    def update_density(self):
        self._update_graph(reset_view=False)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection = np.zeros(len(self.data), dtype=np.uint8)
            self._selection[self.plotdata.valid_mask] = self.graph.selection
            self.selection_indices = self._selection.tolist()
        else:
            self._selection = self.selection_indices = None
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        def prepare_components():
            if self.placement in [self.Placement.Circular, self.Placement.LDA]:
                attrs = [a for a in self.model_selected[:]]
                axes = self.plotdata.axes
            elif self.placement == self.Placement.PCA:
                axes = self._pca.components_.T
                attrs = [a for a in self._pca.orig_domain.attributes]
            if self.placement != self.Placement.Projection:
                domain = Domain([
                    ContinuousVariable(a.name, compute_value=lambda _: None)
                    for a in attrs
                ],
                                metas=[StringVariable(name='component')])
                metas = np.array([[
                    "{}{}".format(self.Component_name[self.placement], i + 1)
                    for i in range(axes.shape[1])
                ]],
                                 dtype=object).T
                components = Table(domain, axes.T, metas=metas)
                components.name = 'components'
            else:
                components = self.projection
            return components

        selected = annotated = components = None
        if self.data is not None and self.plotdata.data is not None:
            components = prepare_components()

            graph = self.graph
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
            else [False * len(mask)]

            selection = np.array(
                [], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            name = self.data.name
            data = self.plotdata.data
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes

            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        def projection_name():
            name = ("Circular Placement", "Linear Discriminant Analysis",
                    "Principal Component Analysis", "Input projection")
            return name[self.placement]

        caption = report.render_items_vert(
            (("Projection", projection_name()), ("Color",
                                                 name(self.graph.attr_color)),
             ("Label", name(self.graph.attr_label)),
             ("Shape", name(self.graph.attr_shape)),
             ("Size", name(self.graph.attr_size)),
             ("Jittering", self.graph.jitter_size != 0
              and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index",
                                         "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
Exemplo n.º 2
0
class OWLineChart(widget.OWWidget):
    name = 'Line Chart'
    description = "Visualize time series' sequence and progression."
    icon = 'icons/LineChart.svg'
    priority = 90

    class Inputs:
        time_series = Input("Time series", Table)
        forecast = Input("Forecast", Timeseries, multiple=True)

    attrs = settings.Setting({})  # Maps data.name -> [attrs]

    graph_name = 'chart'

    def __init__(self):
        self.data = None
        self.plots = []
        self.configs = []
        self.forecasts = OrderedDict()
        self.varmodel = VariableListModel(parent=self)
        icon = QIcon(join(dirname(__file__), 'icons', 'LineChart-plus.png'))
        self.add_button = button = QPushButton(icon, ' &Add plot', self)
        button.clicked.connect(self.add_plot)
        self.controlArea.layout().addWidget(button)
        self.configsArea = gui.vBox(self.controlArea)
        self.controlArea.layout().addStretch(1)
        # TODO: allow selecting ranges that are sent to output as subset table
        self.chart = highstock = Highstock(self, highchart='StockChart')
        self.mainArea.layout().addWidget(highstock)
        # highstock.evalJS('Highcharts.setOptions({navigator: {enabled:false}});')
        highstock.chart(
            # For some reason, these options don't work as global opts applied at Highstock init time
            # Disable top range selector
            rangeSelector_enabled=False,
            rangeSelector_inputEnabled=False,
            # Disable bottom miniview navigator (it doesn't update)
            navigator_enabled=False,
        )
        QTimer.singleShot(0, self.add_plot)

    def add_plot(self):
        ax = self.chart.addAxis()
        config = PlotConfigWidget(self, ax, self.varmodel)
        # Connect the signals
        config.sigSelection.connect(self.chart.setSeries)
        config.sigLogarithmic.connect(self.chart.setLogarithmic)
        config.sigType.connect(self.chart.setType)
        config.sigClosed.connect(self.chart.removeAxis)
        config.sigClosed.connect(lambda ax, widget: widget.setParent(None))
        config.sigClosed.connect(
            lambda ax, widget: self.add_button.setDisabled(False))
        self.configs.append(config)
        self.add_button.setDisabled(len(self.configs) >= 5)
        self.configsArea.layout().addWidget(config)

    @Inputs.time_series
    def set_data(self, data):
        # TODO: set xAxis resolution and tooltip time contents depending on
        # data.time_delta. See: http://imgur.com/yrnlgQz

        # If the same data is updated, short circuit to just updating the chart,
        # retaining all panels and list view selections ...
        if data is not None and self.data is not None and data.domain == self.data.domain:
            self.data = Timeseries.from_data_table(data)
            for config in self.configs:
                config.selection_changed()
            return

        self.data = data = None if data is None else Timeseries.from_data_table(
            data)
        if data is None:
            self.varmodel.clear()
            self.chart.clear()
            return
        if getattr(data.time_variable, 'utc_offset', False):
            offset_minutes = data.time_variable.utc_offset.total_seconds() / 60
            self.chart.evalJS(
                'Highcharts.setOptions({global: {timezoneOffset: %d}});' %
                -offset_minutes)  # Why is this negative? It works.
            self.chart.chart()

        self.chart.setXAxisType('datetime' if (data.time_variable and (
            getattr(data.time_variable, 'have_date', False)
            or getattr(data.time_variable, 'have_time', False))) else 'linear')

        self.varmodel.wrap([
            var for var in data.domain.variables
            if var.is_continuous and var != data.time_variable
        ])

    @Inputs.forecast
    def set_forecast(self, forecast, id):
        if forecast is not None:
            self.forecasts[id] = forecast
        else:
            self.forecasts.pop(id, None)
Exemplo n.º 3
0
class OWRadviz(OWAnchorProjectionWidget):
    name = "Radviz"
    description = "显示Radviz投影"
    icon = "icons/Radviz.svg"
    priority = 241
    keywords = ["viz"]

    settings_version = 2

    selected_vars = ContextSetting([])
    vizrank = SettingProvider(RadvizVizRank)
    GRAPH_CLASS = OWRadvizGraph
    graph = SettingProvider(OWRadvizGraph)

    class Warning(OWAnchorProjectionWidget.Warning):
        invalid_embedding = widget.Msg("选择的特征没有投影")
        removed_vars = widget.Msg("不显示具有两个以上值的分类变量")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.removed.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            None, self, "Suggest features", self.vizrank_set_attrs)
        super().__init__()

    def _add_controls(self):
        self.variables_selection = VariablesSelection(self,
                                                      self.model_selected,
                                                      self.model_other,
                                                      self.controlArea)
        self.variables_selection.added.connect(self.__model_selected_changed)
        self.variables_selection.removed.connect(self.__model_selected_changed)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)
        super()._add_controls()
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    @property
    def primitive_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [
            v for v in chain(dom.variables, dom.metas)
            if v.is_continuous or v.is_discrete and len(v.values) == 2
        ]

    @property
    def effective_variables(self):
        return self.model_selected[:]

    def vizrank_set_attrs(self, *attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [
            var for var in self.primitive_variables if var not in attrs
        ]
        self.__model_selected_changed()

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var))
                              for var in self.model_selected]
        self.init_projection()
        self.setup_plot()
        self.commit()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        self._init_vizrank()
        self.init_projection()

    def use_context(self):
        self.model_selected.clear()
        self.model_other.clear()
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[name] for name in selected]
            self.model_other[:] = [
                d[attr.name] for attr in self.primitive_variables
                if attr.name not in selected
            ]
        elif self.data is not None:
            d, variables = self.data.domain, self.primitive_variables
            class_var = [variables.pop(variables.index(d.class_var))] \
                if d.class_var in variables else []
            self.model_selected[:] = variables[:5]
            self.model_other[:] = variables[5:] + class_var

    def _init_vizrank(self):
        is_enabled = self.data is not None and \
            len(self.primitive_variables) > 3 and \
            self.attr_color is not None and \
            not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all() and \
            len(self.data[self.valid_data]) > 1 and \
            np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        super().check_data()
        if self.data is not None:
            domain = self.data.domain
            vars_ = chain(domain.variables, domain.metas)
            n_vars = sum(v.is_primitive() for v in vars_)
            if len(self.primitive_variables) < n_vars:
                self.Warning.removed_vars()

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    def _manual_move(self, anchor_idx, x, y):
        angle = np.arctan2(y, x)
        super()._manual_move(anchor_idx, np.cos(angle), np.sin(angle))

    def _send_components_x(self):
        components_ = super()._send_components_x()
        angle = np.arctan2(*components_[::-1])
        return np.row_stack((components_, angle))

    def _send_components_metas(self):
        return np.vstack((super()._send_components_metas(), ["angle"]))

    def clear(self):
        super().clear()
        self.projector = RadViz()

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 4
0
class OWLineChart(OWWidget):
    name = 'Line Chart'
    description = "Visualize time series' sequence and progression."
    icon = 'icons/LineChart.svg'
    priority = 90

    class Inputs:
        time_series = Input("Time series", Table)
        features = Input("Features", AttributeList)
        forecast = Input("Forecast", Timeseries, multiple=True)

    settingsHandler = LineChartContextHandler()
    attrs = ContextSetting([])  # Maps data.name -> [attrs]
    is_logit = ContextSetting([])

    graph_name = 'chart'

    def __init__(self):
        self.data = None
        self.features = None
        self.configs = []
        self.forecasts = OrderedDict()
        self.varmodel = VariableListModel(parent=self)
        icon = QIcon(join(dirname(__file__), 'icons', 'LineChart-plus.png'))
        self.add_button = button = QPushButton(icon, ' &Add plot', self)
        button.clicked.connect(self.add_plot)
        self.controlArea.layout().addWidget(button)
        self.configsArea = gui.vBox(self.controlArea)
        self.controlArea.layout().addStretch(1)
        # TODO: allow selecting ranges that are sent to output as subset table
        self.chart = highstock = Highstock(self, highchart='StockChart')
        self.mainArea.layout().addWidget(highstock)
        # highstock.evalJS('Highcharts.setOptions({navigator: {enabled:false}});')
        highstock.chart(
            # For some reason, these options don't work as global opts applied at Highstock init time
            # Disable top range selector
            rangeSelector_enabled=False,
            rangeSelector_inputEnabled=False,
            # Disable bottom miniview navigator (it doesn't update)
            navigator_enabled=False,
        )
        self.add_plot()
        self.chart.add_legend()

    def add_plot(self):
        ax = self.chart.addAxis()
        config = PlotConfigWidget(self, ax, self.varmodel)
        # Connect the signals
        config.sigSelection.connect(self.chart.setSeries)
        config.sigLogarithmic.connect(self.chart.setLogarithmic)
        config.sigType.connect(self.chart.setType)
        config.sigClosed.connect(self.chart.removeAxis)
        config.sigClosed.connect(lambda ax, widget: widget.setParent(None))
        config.sigClosed.connect(
            lambda ax, widget: self.add_button.setDisabled(False))
        self.configs.append(config)
        self.add_button.setDisabled(len(self.configs) >= 5)
        config.sigClosed.connect(lambda ax, widget: self.remove_plot(widget))

        self.configsArea.layout().addWidget(config)

    def remove_plot(self, plot):
        self.configs.remove(plot)
        # # self.configsArea.layout()
        if len(self.chart.axes) < 2:
            self.resize(QSize(925, 635))

    @Inputs.time_series
    def set_data(self, data):
        # TODO: set xAxis resolution and tooltip time contents depending on
        # data.time_delta. See: http://imgur.com/yrnlgQz

        # If the same data is updated, short circuit to just updating the chart
        # retaining all panels and list view selections ...
        new_data = None if data is None else \
                   Timeseries.from_data_table(data)
        if new_data is not None and self.data is not None \
                and new_data.domain == self.data.domain:
            self.data = new_data
            self._selections_changed()
            return

        self.data = data = new_data
        if data is None:
            self.varmodel.clear()
            self.chart.clear()
            return

        self.set_attributes()

        if getattr(data.time_variable, 'utc_offset', False):
            offset_minutes = data.time_variable.utc_offset.total_seconds() / 60
            self.chart.evalJS(
                'Highcharts.setOptions({global: {timezoneOffset: %d}});' %
                -offset_minutes)  # Why is this negative? It works.
            self.chart.chart()

        self.chart.setXAxisType('datetime' if (data.time_variable and (
            getattr(data.time_variable, 'have_date', False)
            or getattr(data.time_variable, 'have_time', False))) else 'linear')

        variables = [
            var for var in data.domain.variables
            if var.is_continuous and var != data.time_variable
        ]
        self.varmodel.wrap(variables)
        self.update_plots()

    @Inputs.forecast
    def set_forecast(self, forecast, id):
        if forecast is not None:
            self.forecasts[id] = forecast
        else:
            self.forecasts.pop(id, None)
        # TODO: update currently shown plots

    @Inputs.features
    def set_features(self, features: AttributeList) -> None:
        if features and not self.features:
            # if features are on the input and they were not before
            # context should be saved for later when features will not be
            # present anymore
            self.closeContext()
        self.features = features
        if self.data:
            self.set_attributes()
            self.update_plots()

    def set_attributes(self) -> None:
        """
        In case when features present: set shown attributes to match features
        In case when features not present: set default value and open context
        """
        self.closeContext()
        if self.features:
            self.attrs = [
                [f] for f in self.features
                if f in self.data.domain and f != self.data.time_variable
            ]
            self.is_logit = [False] * len(self.attrs)
        else:
            variables = [
                var for var in self.data.domain.variables
                if var.is_continuous and var != self.data.time_variable
            ]
            self.attrs = [[variables[0]]]
            self.is_logit = [False]
            # context is only open when features not provided on input
            # when provided features defines the selection
            self.openContext(self.data.domain)

    def update_plots(self) -> None:
        """
        Update plots when new data or new selection comes
        """
        # remove plots if too many of them
        while len(self.configs) > len(self.attrs):
            plot = self.configs[-1]
            plot.sigClosed.emit(plot.ax, plot)

        # add plots if not enough of them
        while len(self.configs) < len(self.attrs):
            self.add_plot()

        assert len(self.configs) == len(self.attrs)
        assert len(self.attrs) == len(self.is_logit)
        # select correct values
        for config, attr, log in zip(self.configs, self.attrs, self.is_logit):
            config.set_logarithmic(log)
            config.set_selection(attr)
            config.view.setEnabled(not self.features)

    def _selections_changed(self) -> None:
        for config in self.configs:
            config.selection_changed()

    def closeEvent(self, event: QCloseEvent) -> None:
        """
        When someone delets the widget closeContext must be called to gather
        settings.
        """
        self.closeContext()
        super().closeEvent(event)

    def closeContext(self) -> None:
        """
        Gather configs in contextVariables and close context.
        """
        if not self.features:
            # only close in case of when features are not present if they are
            # feature selection is defined by the input and context should
            # not have impact
            attrs, is_logit = [], []
            for config in self.configs:
                attrs.append(config.get_selection())
                is_logit.append(config.is_logarithmic)
            self.attrs = attrs
            self.is_logit = is_logit
            super().closeContext()
Exemplo n.º 5
0
class OWLinearProjection(OWAnchorProjectionWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    Placement = Enum("Placement",
                     dict(Circular=0, LDA=1, PCA=2),
                     type=int,
                     qualname="OWLinearProjection.Placement")

    Projection_name = {
        Placement.Circular: "Circular Placement",
        Placement.LDA: "Linear Discriminant Analysis",
        Placement.PCA: "Principal Component Analysis"
    }

    settings_version = 5

    placement = Setting(Placement.Circular)
    selected_vars = ContextSetting([])
    vizrank = SettingProvider(LinearProjectionVizRank)
    GRAPH_CLASS = OWLinProjGraph
    graph = SettingProvider(OWLinProjGraph)

    left_side_scrolling = True

    class Error(OWAnchorProjectionWidget.Error):
        no_cont_features = Msg("Plotting requires numeric features")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.removed.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            None, self, "Suggest Features", self.__vizrank_set_attrs)

        super().__init__()

    def _add_controls(self):
        self._add_controls_variables()
        self._add_controls_placement()
        super()._add_controls()
        self.gui.add_control(self._effects_box,
                             gui.hSlider,
                             "Hide radius:",
                             master=self.graph,
                             value="hide_radius",
                             minValue=0,
                             maxValue=100,
                             step=10,
                             createLabel=False,
                             callback=self.__radius_slider_changed)
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    def _add_controls_variables(self):
        self.variables_selection = VariablesSelection(self,
                                                      self.model_selected,
                                                      self.model_other,
                                                      self.controlArea)
        self.variables_selection.added.connect(self.__model_selected_changed)
        self.variables_selection.removed.connect(self.__model_selected_changed)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)

    def _add_controls_placement(self):
        box = gui.widgetBox(self.controlArea,
                            True,
                            sizePolicy=(QSizePolicy.Minimum,
                                        QSizePolicy.Maximum))
        self.radio_placement = gui.radioButtonsInBox(
            box,
            self,
            "placement",
            btnLabels=[self.Projection_name[x] for x in self.Placement],
            callback=self.__placement_radio_changed)

    @property
    def continuous_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas) if v.is_continuous]

    @property
    def effective_variables(self):
        return self.model_selected[:]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [
            var for var in self.continuous_variables if var not in attrs
        ]
        self.__model_selected_changed()

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var))
                              for var in self.model_selected]
        self.projection = None
        self._check_options()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def __placement_radio_changed(self):
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)
        self.projection = self.projector = None
        self._init_vizrank()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def __radius_slider_changed(self):
        self.graph.update_radius()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        self._check_options()
        self._init_vizrank()
        self.init_projection()

    def use_context(self):
        self.model_selected.clear()
        self.model_other.clear()
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[attr] for attr in selected]
            self.model_other[:] = [
                d[attr.name] for attr in self.continuous_variables
                if attr.name not in selected
            ]
        elif self.data is not None:
            self.model_selected[:] = self.continuous_variables[:3]
            self.model_other[:] = self.continuous_variables[3:]

    def _check_options(self):
        buttons = self.radio_placement.buttons
        for btn in buttons:
            btn.setEnabled(True)

        if self.data is not None:
            has_discrete_class = self.data.domain.has_discrete_class
            if not has_discrete_class or len(np.unique(self.data.Y)) < 3:
                buttons[self.Placement.LDA].setEnabled(False)
                if self.placement == self.Placement.LDA:
                    self.placement = self.Placement.Circular

        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)

    def _init_vizrank(self):
        is_enabled, msg = False, ""
        if self.data is None:
            msg = "There is no data."
        elif self.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.attr_color.is_continuous and \
                self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear " \
                  "Discriminant Analysis Projection when " \
                  "continuous color variable is selected."
        elif len(
            [v for v in self.continuous_variables if v is not self.attr_color
             ]) < 3:
            msg = "Not enough available continuous variables"
        elif np.sum(np.all(np.isfinite(self.data.X), axis=1)) < 2:
            msg = "Not enough valid data instances"
        else:
            is_enabled = not np.isnan(
                self.data.get_column_view(
                    self.attr_color)[0].astype(float)).all()
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if not len(self.continuous_variables):
                error(self.Error.no_cont_features)

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    def init_projection(self):
        if self.placement == self.Placement.Circular:
            self.projector = CircularPlacement()
        elif self.placement == self.Placement.LDA:
            self.projector = LDA(solver="eigen", n_components=2)
        elif self.placement == self.Placement.PCA:
            self.projector = PCA(n_components=2)
            self.projector.component = 2
            self.projector.preprocessors = PCA.preprocessors + [Normalize()]

        super().init_projection()

    def get_coordinates_data(self):
        def normalized(a):
            span = np.max(a, axis=0) - np.min(a, axis=0)
            span[span == 0] = 1
            return (a - np.mean(a, axis=0)) / span

        embedding = self.get_embedding()
        if embedding is None:
            return None, None
        norm_emb = normalized(embedding[self.valid_data])
        return (norm_emb.ravel(), np.zeros(len(norm_emb), dtype=float)) \
            if embedding.shape[1] == 1 else norm_emb.T

    def _get_send_report_caption(self):
        def projection_name():
            return self.Projection_name[self.placement]

        return report.render_items_vert(
            (("Projection", projection_name()),
             ("Color", self._get_caption_var_name(self.attr_color)),
             ("Label", self._get_caption_var_name(self.attr_label)),
             ("Shape", self._get_caption_var_name(self.attr_shape)),
             ("Size", self._get_caption_var_name(self.attr_size)),
             ("Jittering", self.graph.jitter_size != 0
              and "{} %".format(self.graph.jitter_size))))

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph
        if version < 4:
            if "radius" in settings_:
                settings_["graph"]["hide_radius"] = settings_["radius"]
            if "selection_indices" in settings_ and \
                    settings_["selection_indices"] is not None:
                selection = settings_["selection_indices"]
                settings_["selection"] = [
                    (i, 1) for i, selected in enumerate(selection) if selected
                ]
        if version < 5:
            if "placement" in settings_ and \
                    settings_["placement"] not in cls.Placement:
                settings_["placement"] = cls.Placement.Circular

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index",
                                         "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
        if version == 3:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 6
0
class OWRadviz(OWAnchorProjectionWidget):
    name = "Radviz"
    description = "Display Radviz projection"
    icon = "icons/Radviz.svg"
    priority = 241
    keywords = ["viz"]

    settings_version = 2

    selected_vars = ContextSetting([])
    vizrank = SettingProvider(RadvizVizRank)
    GRAPH_CLASS = OWRadvizGraph
    graph = SettingProvider(OWRadvizGraph)
    embedding_variables_names = ("radviz-x", "radviz-y")

    class Warning(OWAnchorProjectionWidget.Warning):
        no_features = widget.Msg("Radviz requires at least two features.")
        invalid_embedding = widget.Msg("No projection for selected features")

    class Error(OWAnchorProjectionWidget.Error):
        no_features = widget.Msg(
            "At least three numeric or categorical variables are required")
        no_instances = widget.Msg("At least two data instances are required")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.rowsInserted.connect(self.__model_selected_changed)
        self.model_selected.rowsRemoved.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            None, self, "Suggest features", self.__vizrank_set_attrs)
        super().__init__()

    def _add_controls(self):
        self.variables_selection = VariablesSelection(self,
                                                      self.model_selected,
                                                      self.model_other,
                                                      self.controlArea)
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank)
        super()._add_controls()
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    @property
    def primitive_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas) if v.is_primitive()]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [
            var for var in self.primitive_variables if var not in attrs
        ]

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var))
                              for var in self.model_selected]
        self.projection = None
        self.setup_plot()
        self.commit()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[name] for name in selected]
            self.model_other[:] = [
                d[attr.name] for attr in self.primitive_variables
                if attr.name not in selected
            ]
        elif self.data is not None:
            d, variables = self.data.domain, self.primitive_variables
            class_var = [variables.pop(variables.index(d.class_var))] \
                if d.class_var in variables else []
            self.model_selected[:] = variables[:5]
            self.model_other[:] = variables[5:] + class_var

        self._init_vizrank()

    def _init_vizrank(self):
        is_enabled = self.data is not None and \
            len(self.primitive_variables) > 3 and \
            self.attr_color is not None and \
            not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all() and \
            len(self.data[self.valid_data]) > 1 and \
            np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if len(self.data) < 2:
                error(self.Error.no_instances)
            elif len(self.primitive_variables) < 3:
                error(self.Error.no_features)

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    def get_embedding(self):
        self.valid_data = None
        if self.data is None:
            return None

        self.Warning.no_features.clear()
        if len(self.model_selected) < 2:
            self.Warning.no_features()
            return None

        ec, proj, msk = radviz(self.data, self.model_selected, self.projection)
        angle = np.arctan2(*proj.T[::-1])
        self.projection = np.vstack((np.cos(angle), np.sin(angle))).T
        self.valid_data = msk

        self.Warning.invalid_embedding.clear()
        if ec is None or np.any(np.isnan(ec)):
            self.Warning.invalid_embedding()
            return None

        embedding = np.full((len(self.data), 2), np.nan)
        embedding[self.valid_data] = ec
        return embedding

    def get_anchors(self):
        if self.projection is None:
            return None, None
        return self.projection, [a.name for a in self.model_selected]

    def _manual_move(self, anchor_idx, x, y):
        angle = np.arctan2(y, x)
        super()._manual_move(anchor_idx, np.cos(angle), np.sin(angle))

    def send_components(self):
        components = None
        if self.data is not None and self.valid_data is not None and \
                self.projection is not None:
            angle = np.arctan2(*self.projection.T[::-1])
            meta_attrs = [StringVariable(name='component')]
            components = Table(Domain(self.model_selected, metas=meta_attrs),
                               np.row_stack((self.projection.T, angle)),
                               metas=np.array([["RX"], ["RY"], ["angle"]]))
            components.name = self.data.name
        self.Outputs.components.send(components)

    def clear(self):
        if self.model_selected:
            self.model_selected.clear()
        if self.model_other:
            self.model_other.clear()
        super().clear()

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 7
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `grouping_changed`.

    - `grouping_changed` handles changes of grouping attribute: it enables or
    disables the box for ordering, orders attributes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute. It recomputes box data by
    calling `compute_box_data`, shows the appropriate display box
    (discrete/continuous) and then calls`layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "Box Plot"
    description = "Visualize the distribution of feature values in a box plot."
    icon = "icons/BoxPlot.svg"
    priority = 100
    keywords = ["whisker"]

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_data = Output("Selected Data", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    conditions = ContextSetting([])

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    sort_freqs = Setting(False)
    auto_commit = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "", CompareMedians: "median", CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted,
                _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "box_scene"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        self.attrs = VariableListModel()
        view = gui.listView(
            self.controlArea, self, "attribute", box="Variable",
            model=self.attrs, callback=self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        gui.separator(view.box, 6, 6)
        self.cb_order = gui.checkBox(
            view.box, self, "order_by_importance",
            "Order by relevance",
            tooltip="Order by 𝜒² or ANOVA over the subgroups",
            callback=self.apply_sorting)
        self.group_vars = DomainModel(
            placeholder="None", separators=False,
            valid_types=Orange.data.DiscreteVariable)
        self.group_view = view = gui.listView(
            self.controlArea, self, "group_var", box="Subgroups",
            model=self.group_vars, callback=self.grouping_changed)
        view.setEnabled(False)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(
            self.controlArea, "Display",
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum),
            addSpace=False)

        gui.checkBox(self.display_box, self, "show_annotations", "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box, self, 'compare',
            btnLabels=["No comparison", "Compare medians", "Compare means"],
            callback=self.layout_changed)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(
            self.controlArea, box="Display",
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Fixed))
        self.stretching_box.sizeHint = self.display_box.sizeHint
        gui.checkBox(
            box, self, 'stretched', "Stretch bars",
            callback=self.display_changed)
        gui.checkBox(
            box, self, 'show_labels', "Show box labels",
            callback=self.display_changed)
        self.sort_cb = gui.checkBox(
            box, self, 'sort_freqs', "Sort by subgroup frequencies",
            callback=self.display_changed)
        gui.rubber(box)

        gui.auto_commit(self.controlArea, self, "auto_commit",
                        "Send Selection", "Send Automatically")

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_scene.selectionChanged.connect(self.commit)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing |
                                     QPainter.TextAntialiasing |
                                     QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>")
        self.mainArea.setMinimumWidth(300)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def sizeHint(self):
        return QSize(900, 500)

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    def reset_attrs(self, domain):
        self.attrs[:] = [
            var for var in chain(
                domain.class_vars, domain.metas, domain.attributes)
            if var.is_primitive()]

    # noinspection PyTypeChecker
    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (
                not bool(dataset) or not len(dataset.domain) and not
                any(var.is_primitive() for var in dataset.domain.metas)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars.set_domain(domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.reset_attrs(domain)
            self.select_default_variables(domain)
            self.openContext(self.dataset)
            self.grouping_changed()
        else:
            self.reset_all_data()
        self.commit()

    def select_default_variables(self, domain):
        # visualize first non-class variable, group by class (if present)
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var
        else:
            self.group_var = None  # Reset to trigger selection via callback

    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(), f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs.clear()
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)
        self.is_continuous = False
        self.update_display_box()

    def grouping_changed(self):
        self.cb_order.setEnabled(self.group_var is not None)
        self.apply_sorting()
        self.attr_changed()

    def select_box_items(self):
        temp_cond = self.conditions.copy()
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(box.filter.conditions in
                                [c.conditions for c in temp_cond])

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        self.is_continuous = attr.is_continuous
        if dataset is None or not self.is_continuous and not attr.values or \
                        self.group_var and not self.group_var.values:
            self.stats = self.dist = self.conts = []
            return
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(
                dataset, attr, self.group_var)
            if self.is_continuous:
                stats, label_texts = [], []
                for i, cont in enumerate(self.conts):
                    if np.sum(cont[1]):
                        stats.append(BoxData(cont, attr, i, self.group_var))
                        label_texts.append(self.group_var.values[i])
                self.stats = stats
                self.label_txts_all = label_texts
            else:
                self.label_txts_all = \
                    [v for v, c in zip(self.group_var.values, self.conts)
                     if np.sum(c) > 0]
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist, attr, None)]
            self.label_txts_all = [""]
        self.label_txts = [txts for stat, txts in zip(self.stats,
                                                      self.label_txts_all)
                           if stat.n > 0]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()
            self.sort_cb.setEnabled(self.group_var is not None)

    def clear_scene(self):
        self.closeContext()
        self.box_scene.clearSelection()
        self.box_scene.clear()
        self.box_view.viewport().update()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []
        self.openContext(self.dataset)

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.mean_labels = [self.mean_label(stat, attr, lab)
                            for stat, lab in zip(self.stats, self.label_txts)]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [self.label_group(stat, attr, mean_lab)
                       for stat, mean_lab in zip(self.stats, self.mean_labels)]
        self.attr_labels = [QGraphicsSimpleTextItem(lab)
                            for lab in self.label_txts]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width, len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()
        self.select_box_items()

    def display_changed_disc(self):
        assert not self.is_continuous
        self.clear_scene()
        self.attr_labels = [QGraphicsSimpleTextItem(lab)
                            for lab in self.label_txts_all]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts if np.sum(cont) > 0]
            else:
                self.labels = [
                    QGraphicsTextItem(str(int(sum(self.dist))))]

        self.order = list(range(len(self.attr_labels)))

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = \
                [self.strudel(cont, i) for i, cont in enumerate(self.conts)
                 if np.sum(cont) > 0]
            self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0]

            if self.sort_freqs:
                # pylint: disable=invalid-unary-operand-type
                self.order = sorted(self.order, key=(-np.sum(self.conts, axis=1)).__getitem__)
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box_index in enumerate(self.order):
            y = (-len(self.boxes) + row) * 40 + 10
            box = self.boxes[box_index]
            bars, labels = box[::2], box[1::2]

            self.__draw_group_labels(y, box_index)
            if not self.stretched:
                self.__draw_row_counts(y, box_index)
            if self.show_labels and self.attribute is not self.group_var:
                self.__draw_bar_labels(y, bars, labels)
            self.__draw_bars(y, bars)

        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width, len(self.boxes * 40) + 90)
        self.infot1.setText("")
        self.select_box_items()

    def __draw_group_labels(self, y, row):
        """Draw group labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.attr_labels[row]
        b = label.boundingRect()
        label.setPos(-b.width() - 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_row_counts(self, y, row):
        """Draw row counts

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        assert not self.is_continuous
        label = self.labels[row]
        b = label.boundingRect()
        if self.group_var:
            right = self.scale_x * sum(self.conts[row])
        else:
            right = self.scale_x * sum(self.dist)
        label.setPos(right + 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_bar_labels(self, y, bars, labels):
        """Draw bar labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        bars: List[FilterGraphicsRectItem]
            list of bars being drawn
        labels: List[QGraphicsTextItem]
            list of labels for corresponding bars
        """
        label = bar_part = None
        for text_item, bar_part in zip(labels, bars):
            label = self.Label(
                text_item.toPlainText())
            label.setPos(bar_part.boundingRect().x(),
                         y - label.boundingRect().height() - 8)
            label.setMaxWidth(bar_part.boundingRect().width())
            self.box_scene.addItem(label)

    def __draw_bars(self, y, bars):
        """Draw bars

        Parameters
        ----------
        y: int
            vertical offset of bars

        bars: List[FilterGraphicsRectItem]
            list of bars to draw
        """
        for item in bars:
            item.setPos(0, y)
            self.box_scene.addItem(item)

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests

        # pylint: disable=comparison-with-itself
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n < 2 or d2.n < 2:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            # pylint: disable=comparison-with-itself
            if pooled_var == 0 or np.isnan(pooled_var):
                return np.nan, np.nan
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg) ** 2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            if var_within == 0 or df_within == 0 or df_between == 0:
                return np.nan, np.nan
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance, " \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, p)
            else:
                t, p = stat_ttest()
                t = "" if np.isnan(t) else f"Student's t: {t:.3f} (p={p:.3f})"
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, p)
            else:
                F, p = stat_ANOVA()
                t = "" if np.isnan(F) else f"ANOVA: {F:.3f} (p={p:.3f})"
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [self.mean_label(stats[0], self.attribute, "")]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        decimals = max(3, 4 - int(math.log10(step)))
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(
                repr(round(val, decimals)) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(
                t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(
            bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        assert not self.is_continuous
        if self.stretched:
            if not self.attr_labels:
                return
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(sum(dist) * scale_x + 10 +
                            lbl.boundingRect().width()
                            for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width() / 2
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None and stat.q75 is not None:
            mbox = FilterGraphicsRectItem(
                stat.conditions, stat.q25 * scale_x, -height / 2,
                (stat.q75 - stat.q25) * scale_x, height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2,
                               stat.median, height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val_index=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = [FilterDiscrete(attr, None)]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            cond = [FilterDiscrete(attr, [i])]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def commit(self):
        self.conditions = [item.filter for item in
                           self.box_scene.selectedItems() if item.filter]
        selected, selection = None, []
        if self.conditions:
            selected = Values(self.conditions, conjunction=False)(self.dataset)
            selection = np.in1d(
                self.dataset.ids, selected.ids, assume_unique=True).nonzero()[0]
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if to in (last_to, frm):
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = - 6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        return self.attribute.name if self.attribute else None

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)

    class Label(QGraphicsSimpleTextItem):
        """Boxplot Label with settable maxWidth"""
        # Minimum width to display label text
        MIN_LABEL_WIDTH = 25

        # padding bellow the text
        PADDING = 3

        __max_width = None

        def maxWidth(self):
            return self.__max_width

        def setMaxWidth(self, max_width):
            self.__max_width = max_width

        def paint(self, painter, option, widget):
            """Overrides QGraphicsSimpleTextItem.paint

            If label text is too long, it is elided
            to fit into the allowed region
            """
            if self.__max_width is None:
                width = option.rect.width()
            else:
                width = self.__max_width

            if width < self.MIN_LABEL_WIDTH:
                # if space is too narrow, no label
                return

            fm = painter.fontMetrics()
            text = fm.elidedText(self.text(), Qt.ElideRight, width)
            painter.drawText(
                option.rect.x(),
                option.rect.y() + self.boundingRect().height() - self.PADDING,
                text)
Exemplo n.º 8
0
class OWMovingTransform(widget.OWWidget):
    name = 'Moving Transform'
    description = 'Apply rolling window functions to the time series.'
    icon = 'icons/MovingTransform.svg'
    priority = 20

    class Inputs:
        time_series = Input("Time series", Table)

    class Outputs:
        time_series = Output("Time series", Timeseries)

    want_main_area = False

    settingsHandler = MovingTransformContextHandler()
    non_overlapping = settings.Setting(False)
    fixed_wlen = settings.Setting(5)
    transformations = settings.ContextSetting([])
    autocommit = settings.Setting(False)
    last_win_width = settings.Setting(5)

    _NON_OVERLAPPING_WINDOWS = 'Non-overlapping windows'

    UserAdviceMessages = [
        widget.Message('Get the simple moving average (SMA) of a series '
                       'by setting the aggregation function to "{}".'.format(Mean),
                       'sma-is-mean'),
        widget.Message('If "{}" is checked, the rolling windows don\t '
                       'overlap. Instead, they run through the series '
                       'side-to-side, so the resulting transformed series is '
                       'fixed-window-length-times shorter.'.format(_NON_OVERLAPPING_WINDOWS),
                       'non-overlapping')
    ]

    class Warning(OWWidget.Warning):
        no_transforms_added = Msg("At least one transform should be added.")

    def __init__(self):
        self.data = None
        box = gui.vBox(self.controlArea, 'Moving Transform')

        def _disable_fixed_wlen():
            fixed_wlen.setDisabled(not self.non_overlapping)
            self.view.repaint()
            self.on_changed()

        gui.checkBox(box, self, 'non_overlapping',
                     label=self._NON_OVERLAPPING_WINDOWS,
                     callback=_disable_fixed_wlen,
                     tooltip='If this is checked, instead of rolling windows '
                             'through the series, they are applied side-to-side, '
                             'so the resulting output series will be some '
                             'length-of-fixed-window-times shorter.')
        fixed_wlen = gui.spin(box, self, 'fixed_wlen', 2, 1000,
                              label='Fixed window width:',
                              callback=self.on_changed)
        fixed_wlen.setDisabled(not self.non_overlapping)
        # TODO: allow the user to choose left-aligned, right-aligned, or center-aligned window

        class TableView(gui.TableView):
            def __init__(self, parent):
                super().__init__(parent,
                                 editTriggers=(self.SelectedClicked |
                                               self.CurrentChanged |
                                               self.DoubleClicked |
                                               self.EditKeyPressed),
                                 )
                self.horizontalHeader().setStretchLastSection(False)
                agg_functions = ListModel(AGG_FUNCTIONS +
                                          [Cumulative_sum, Cumulative_product],
                                          parent=self)
                self.setItemDelegateForColumn(0, self.VariableDelegate(parent))
                self.setItemDelegateForColumn(1, self.SpinDelegate(parent))
                self.setItemDelegateForColumn(2, self.ComboDelegate(self, agg_functions))

            class _ItemDelegate(QStyledItemDelegate):
                def updateEditorGeometry(self, widget, option, _index):
                    widget.setGeometry(option.rect)

            class ComboDelegate(_ItemDelegate):
                def __init__(self, parent=None, combo_model=None):
                    super().__init__(parent)
                    self._parent = parent
                    if combo_model is not None:
                        self._combo_model = combo_model

                def createEditor(self, parent, _QStyleOptionViewItem, index):
                    combo = QComboBox(parent)
                    combo.setModel(self._combo_model)
                    return combo

                def setEditorData(self, combo, index):
                    var = index.model().data(index, Qt.EditRole)
                    combo.setCurrentIndex(self._combo_model.indexOf(var))

                def setModelData(self, combo, model, index):
                    var = self._combo_model[combo.currentIndex()]
                    model.setData(index, var, Qt.EditRole)

            class VariableDelegate(ComboDelegate):
                @property
                def _combo_model(self):
                    return self._parent.var_model

            class SpinDelegate(_ItemDelegate):
                def paint(self, painter, option, index):
                    # Don't paint window length if non-overlapping windows set
                    if not self.parent().non_overlapping:
                        super().paint(painter, option, index)

                def createEditor(self, parent, _QStyleOptionViewItem, _index):
                    # Don't edit window length if non-overlapping windows set
                    if self.parent().non_overlapping:
                        return None
                    spin = QSpinBox(parent, minimum=1, maximum=1000)
                    return spin

                def setEditorData(self, spin, index):
                    spin.setValue(index.model().data(index, Qt.EditRole))

                def setModelData(self, spin, model, index):
                    spin.interpretText()
                    model.setData(index, spin.value(), Qt.EditRole)

        self.var_model = VariableListModel(parent=self)

        self.table_model = model = PyTableModel(parent=self, editable=True)
        model.setHorizontalHeaderLabels(['Series', 'Window width', 'Aggregation function'])
        model.dataChanged.connect(self.on_changed)

        self.view = view = TableView(self)
        view.setModel(model)
        box.layout().addWidget(view)

        hbox = gui.hBox(box)
        from os.path import dirname, join
        self.add_button = button = gui.button(
            hbox, self, 'Add &Transform',
            callback=self.on_add_transform)
        button.setIcon(QIcon(join(dirname(__file__), 'icons', 'LineChart-plus.png')))

        self.del_button = button = gui.button(
            hbox, self, '&Delete Selected',
            callback=self.on_del_transform)
        QIcon.setThemeName('gnome')  # Works for me
        button.setIcon(QIcon.fromTheme('edit-delete'))

        gui.auto_commit(box, self, 'autocommit', '&Apply')

        self.settingsAboutToBePacked.connect(self.store_transformations)

    def sizeHint(self):
        return QSize(450, 600)

    def on_add_transform(self):
        if self.data is not None:
            self.table_model.append([self.var_model[0], self.last_win_width, AGG_FUNCTIONS[0]])
        self.commit()

    def on_del_transform(self):
        for row in sorted([mi.row() for mi in self.view.selectionModel().selectedRows(0)],
                          reverse=True):
            del self.table_model[row]
        if len(self.table_model):
            selection_model = self.view.selectionModel()
            selection_model.select(self.table_model.index(len(self.table_model) - 1, 0),
                                   selection_model.Select | selection_model.Rows)
        self.commit()

    def store_transformations(self):
        self.transformations = list(self.table_model)

    @Inputs.time_series
    def set_data(self, data):
        self.store_transformations()
        self.closeContext()
        self.transformations = []
        if data is None:
            self.data = None
            self.var_model.clear()
        else:
            self.data = Timeseries.from_data_table(data)
            self.var_model[:] = [
                var for var in self.data.domain.variables
                if var.is_continuous and var is not self.data.time_variable]

        self.add_button.setDisabled(not self.var_model)
        self.openContext(self.var_model)
        self.table_model[:] = self.transformations
        self.on_changed()

    def on_changed(self):
        self.commit()

    def commit(self):
        self.Warning.no_transforms_added.clear()
        data = self.data
        if not data:
            self.Outputs.time_series.send(None)
            return

        if not len(self.table_model):
            self.Warning.no_transforms_added()
            self.Outputs.time_series.send(None)
            return

        ts = moving_transform(data, self.table_model, self.non_overlapping and self.fixed_wlen)
        self.Outputs.time_series.send(ts)
Exemplo n.º 9
0
class OWRadviz(OWAnchorProjectionWidget):
    name = "Radviz"
    description = "Display Radviz projection"
    icon = "icons/Radviz.svg"
    priority = 241
    keywords = ["viz"]

    settings_version = 2

    selected_vars = ContextSetting([])
    vizrank = SettingProvider(RadvizVizRank)
    GRAPH_CLASS = OWRadvizGraph
    graph = SettingProvider(OWRadvizGraph)

    class Warning(OWAnchorProjectionWidget.Warning):
        no_features = widget.Msg("Radviz requires at least two features.")
        invalid_embedding = widget.Msg("No projection for selected features")
        removed_vars = widget.Msg("Categorical variables with more than"
                                  " two values are not shown.")

    class Error(OWAnchorProjectionWidget.Error):
        no_features = widget.Msg(
            "At least three numeric or categorical variables are required"
        )

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.rowsInserted.connect(self.__model_selected_changed)
        self.model_selected.rowsRemoved.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            None, self, "Suggest features", self.__vizrank_set_attrs
        )
        super().__init__()

    def _add_controls(self):
        self.variables_selection = VariablesSelection(
            self, self.model_selected, self.model_other, self.controlArea
        )
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank
        )
        super()._add_controls()
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    @property
    def primitive_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas)
                if v.is_continuous or v.is_discrete and len(v.values) == 2]

    @property
    def effective_variables(self):
        return self.model_selected[:]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.primitive_variables
                               if var not in attrs]

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var)) for var
                              in self.model_selected]

        self.Warning.no_features.clear()
        if len(self.model_selected) < 2:
            self.Warning.no_features()
            return

        self.init_projection()
        self.setup_plot()
        self.commit()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[name] for name in selected]
            self.model_other[:] = [d[attr.name] for attr in
                                   self.primitive_variables
                                   if attr.name not in selected]
        elif self.data is not None:
            d, variables = self.data.domain, self.primitive_variables
            class_var = [variables.pop(variables.index(d.class_var))] \
                if d.class_var in variables else []
            self.model_selected[:] = variables[:5]
            self.model_other[:] = variables[5:] + class_var

        self._init_vizrank()

    def _init_vizrank(self):
        is_enabled = self.data is not None and \
            len(self.primitive_variables) > 3 and \
            self.attr_color is not None and \
            not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all() and \
            len(self.data[self.valid_data]) > 1 and \
            np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if len(self.primitive_variables) < 3:
                error(self.Error.no_features)
            else:
                domain = self.data.domain
                vars_ = chain(domain.variables, domain.metas)
                n_vars = sum(v.is_primitive() for v in vars_)
                if len(self.primitive_variables) < n_vars:
                    self.Warning.removed_vars()

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    def _manual_move(self, anchor_idx, x, y):
        angle = np.arctan2(y, x)
        super()._manual_move(anchor_idx, np.cos(angle), np.sin(angle))

    def _send_components_x(self):
        components_ = super()._send_components_x()
        angle = np.arctan2(*components_[::-1])
        return np.row_stack((components_, angle))

    def _send_components_metas(self):
        return np.vstack((super()._send_components_metas(), ["angle"]))

    def clear(self):
        if self.model_selected:
            self.model_selected.clear()
        if self.model_other:
            self.model_other.clear()
        super().clear()
        self.projector = RadViz()

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 10
0
class OWLinearProjection(OWAnchorProjectionWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2),
                     type=int, qualname="OWLinearProjection.Placement")

    Projection_name = {Placement.Circular: "Circular Placement",
                       Placement.LDA: "Linear Discriminant Analysis",
                       Placement.PCA: "Principal Component Analysis"}

    settings_version = 5

    placement = Setting(Placement.Circular)
    selected_vars = ContextSetting([])
    vizrank = SettingProvider(LinearProjectionVizRank)
    GRAPH_CLASS = OWLinProjGraph
    graph = SettingProvider(OWLinProjGraph)

    class Error(OWAnchorProjectionWidget.Error):
        no_cont_features = Msg("Plotting requires numeric features")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.rowsInserted.connect(self.__model_selected_changed)
        self.model_selected.rowsRemoved.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            None, self, "Suggest Features", self.__vizrank_set_attrs)

        super().__init__()

    def _add_controls(self):
        self._add_controls_variables()
        self._add_controls_placement()
        super()._add_controls()
        self.graph.gui.add_control(
            self._effects_box, gui.hSlider, "Hide radius:", master=self.graph,
            value="hide_radius", minValue=0, maxValue=100, step=10,
            createLabel=False, callback=self.__radius_slider_changed
        )
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    def _add_controls_variables(self):
        self.variables_selection = VariablesSelection(
            self, self.model_selected, self.model_other, self.controlArea
        )
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank
        )

    def _add_controls_placement(self):
        box = gui.widgetBox(
            self.controlArea, True,
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)
        )
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=[self.Projection_name[x] for x in self.Placement],
            callback=self.__placement_radio_changed
        )

    @property
    def continuous_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas) if v.is_continuous]

    @property
    def effective_variables(self):
        return self.model_selected[:]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.continuous_variables
                               if var not in attrs]

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var)) for var
                              in self.model_selected]
        self.projection = None
        self._check_options()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def __placement_radio_changed(self):
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)
        self.projection = self.projector = None
        self._init_vizrank()
        self.init_projection()
        self.setup_plot()
        self.commit()

    def __radius_slider_changed(self):
        self.graph.update_radius()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[attr] for attr in selected]
            self.model_other[:] = [d[attr.name] for attr in
                                   self.continuous_variables
                                   if attr.name not in selected]
        elif self.data is not None:
            self.model_selected[:] = self.continuous_variables[:3]
            self.model_other[:] = self.continuous_variables[3:]

        self._check_options()
        self._init_vizrank()

    def _check_options(self):
        buttons = self.radio_placement.buttons
        for btn in buttons:
            btn.setEnabled(True)

        if self.data is not None:
            has_discrete_class = self.data.domain.has_discrete_class
            if not has_discrete_class or len(np.unique(self.data.Y)) < 2:
                buttons[self.Placement.LDA].setEnabled(False)
                if self.placement == self.Placement.LDA:
                    self.placement = self.Placement.Circular

        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)

    def _init_vizrank(self):
        is_enabled, msg = False, ""
        if self.data is None:
            msg = "There is no data."
        elif self.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.attr_color.is_continuous and \
                self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear " \
                  "Discriminant Analysis Projection when " \
                  "continuous color variable is selected."
        elif len([v for v in self.continuous_variables
                  if v is not self.attr_color]) < 3:
            msg = "Not enough available continuous variables"
        elif len(self.data[self.valid_data]) < 2:
            msg = "Not enough valid data instances"
        else:
            is_enabled = not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all()
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if not len(self.continuous_variables):
                error(self.Error.no_cont_features)

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    def init_projection(self):
        if not len(self.effective_variables):
            return
        if self.placement == self.Placement.Circular:
            self.projector = CircularPlacement()
        elif self.placement == self.Placement.LDA:
            self.projector = LDA(solver="eigen", n_components=2)
        elif self.placement == self.Placement.PCA:
            self.projector = PCA(n_components=2)
            self.projector.component = 2
            self.projector.preprocessors = PCA.preprocessors + [Normalize()]

        super().init_projection()

    def get_coordinates_data(self):
        def normalized(a):
            span = np.max(a, axis=0) - np.min(a, axis=0)
            span[span == 0] = 1
            return (a - np.mean(a, axis=0)) / span

        embedding = self.get_embedding()
        if embedding is None:
            return None, None
        norm_emb = normalized(embedding[self.valid_data])
        return (norm_emb.ravel(), np.zeros(len(norm_emb), dtype=float)) \
            if embedding.shape[1] == 1 else norm_emb.T

    def _get_send_report_caption(self):
        def projection_name():
            return self.Projection_name[self.placement]

        return report.render_items_vert((
            ("Projection", projection_name()),
            ("Color", self._get_caption_var_name(self.attr_color)),
            ("Label", self._get_caption_var_name(self.attr_label)),
            ("Shape", self._get_caption_var_name(self.attr_shape)),
            ("Size", self._get_caption_var_name(self.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and
             "{} %".format(self.graph.jitter_size))))

    def clear(self):
        if self.model_selected:
            self.model_selected.clear()
        if self.model_other:
            self.model_other.clear()
        super().clear()

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph
        if version < 4:
            if "radius" in settings_:
                settings_["graph"]["hide_radius"] = settings_["radius"]
            if "selection_indices" in settings_ and \
                    settings_["selection_indices"] is not None:
                selection = settings_["selection_indices"]
                settings_["selection"] = [(i, 1) for i, selected in
                                          enumerate(selection) if selected]
        if version < 5:
            if "placement" in settings_ and \
                    settings_["placement"] not in cls.Placement:
                settings_["placement"] = cls.Placement.Circular

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index", "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
        if version == 3:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 11
0
class OWLineChart(widget.OWWidget):
    name = 'Line Chart'
    description = "Visualize time series' sequence and progression."
    icon = 'icons/LineChart.svg'
    priority = 90

    class Inputs:
        time_series = Input("Time series", Table)
        forecast = Input("Forecast", Timeseries, multiple=True)

    attrs = settings.Setting({})  # Maps data.name -> [attrs]

    graph_name = 'chart'

    def __init__(self):
        self.data = None
        self.plots = []
        self.configs = []
        self.forecasts = OrderedDict()
        self.varmodel = VariableListModel(parent=self)
        icon = QIcon(join(dirname(__file__), 'icons', 'LineChart-plus.png'))
        self.add_button = button = QPushButton(icon, ' &Add plot', self)
        button.clicked.connect(self.add_plot)
        self.controlArea.layout().addWidget(button)
        self.configsArea = gui.vBox(self.controlArea)
        self.controlArea.layout().addStretch(1)
        # TODO: allow selecting ranges that are sent to output as subset table
        self.chart = highstock = Highstock(self, highchart='StockChart')
        self.mainArea.layout().addWidget(highstock)
        # highstock.evalJS('Highcharts.setOptions({navigator: {enabled:false}});')
        highstock.chart(
            # For some reason, these options don't work as global opts applied at Highstock init time
            # Disable top range selector
            rangeSelector_enabled=False,
            rangeSelector_inputEnabled=False,
            # Disable bottom miniview navigator (it doesn't update)
            navigator_enabled=False, )
        QTimer.singleShot(0, self.add_plot)

    def add_plot(self):
        ax = self.chart.addAxis()
        config = PlotConfigWidget(self, ax, self.varmodel)
        # Connect the signals
        config.sigSelection.connect(self.chart.setSeries)
        config.sigLogarithmic.connect(self.chart.setLogarithmic)
        config.sigType.connect(self.chart.setType)
        config.sigClosed.connect(self.chart.removeAxis)
        config.sigClosed.connect(lambda ax, widget: widget.setParent(None))
        config.sigClosed.connect(lambda ax, widget:
                                 self.add_button.setDisabled(False))
        self.configs.append(config)
        self.add_button.setDisabled(len(self.configs) >= 5)
        self.configsArea.layout().addWidget(config)

    @Inputs.time_series
    def set_data(self, data):
        # TODO: set xAxis resolution and tooltip time contents depending on
        # data.time_delta. See: http://imgur.com/yrnlgQz

        # If the same data is updated, short circuit to just updating the chart,
        # retaining all panels and list view selections ...
        if data is not None and self.data is not None and data.domain == self.data.domain:
            self.data = Timeseries.from_data_table(data)
            for config in self.configs:
                config.selection_changed()
            return

        self.data = data = None if data is None else Timeseries.from_data_table(data)
        if data is None:
            self.varmodel.clear()
            self.chart.clear()
            return
        if getattr(data.time_variable, 'utc_offset', False):
            offset_minutes = data.time_variable.utc_offset.total_seconds() / 60
            self.chart.evalJS('Highcharts.setOptions({global: {timezoneOffset: %d}});' % -offset_minutes)  # Why is this negative? It works.
            self.chart.chart()

        self.chart.setXAxisType(
            'datetime'
            if (data.time_variable and
                (getattr(data.time_variable, 'have_date', False) or
                 getattr(data.time_variable, 'have_time', False))) else
            'linear')

        self.varmodel.wrap([var for var in data.domain.variables
                            if var.is_continuous and var != data.time_variable])

    @Inputs.forecast
    def set_forecast(self, forecast, id):
        if forecast is not None:
            self.forecasts[id] = forecast
        else:
            self.forecasts.pop(id, None)
Exemplo n.º 12
0
class OWLinearProjection(widget.OWWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240

    selection_indices = settings.Setting(None, schema_only=True)

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)
        projection = Input("Projection", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    Placement = Enum("Placement",
                     dict(Circular=0,
                          LDA=1,
                          PCA=2,
                          Projection=3),
                     type=int,
                     qualname="OWLinearProjection.Placement")

    Component_name = {Placement.Circular: "C", Placement.LDA: "LD", Placement.PCA: "PC"}
    Variable_name = {Placement.Circular: "circular",
                     Placement.LDA: "lda",
                     Placement.PCA: "pca",
                     Placement.Projection: "projection"}

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    settings_version = 3
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})
    placement = settings.Setting(Placement.Circular)
    radius = settings.Setting(0)
    auto_commit = settings.Setting(True)

    resolution = 256

    graph = settings.SettingProvider(OWLinProjGraph)
    ReplotRequest = QEvent.registerEventType()
    vizrank = settings.SettingProvider(LinearProjectionVizRank)
    graph_name = "graph.plot_widget.plotItem"

    class Warning(widget.OWWidget.Warning):
        no_cont_features = widget.Msg("Plotting requires numeric features")
        not_enough_components = widget.Msg("Input projection has less than 2 components")
        trivial_components = widget.Msg(
            "All components of the PCA are trivial (explain 0 variance). "
            "Input data is constant (or near constant).")

    class Error(widget.OWWidget.Error):
        proj_and_domain_match = widget.Msg("Projection and Data domains do not match")
        no_valid_data = widget.Msg("No projection due to invalid data")

    def __init__(self):
        super().__init__()

        self.data = None
        self.projection = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None
        self.__replot_requested = False
        self.n_cont_var = 0
        #: Remember the saved state to restore
        self.__pending_selection_restore = self.selection_indices
        self.selection_indices = None

        self.variable_x = None
        self.variable_y = None

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWLinProjGraph(self, box, "Plot", view_box=LinProjInteractiveViewBox)
        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            self.controlArea, self, "Suggest Features", self._vizrank)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        box = gui.widgetBox(
            self.controlArea, "Placement", sizePolicy=SIZE_POLICY)
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=["Circular Placement",
                       "Linear Discriminant Analysis",
                       "Principal Component Analysis",
                       "Use input projection"],
            callback=self._change_placement
        )

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        box = g.point_properties_box(self.controlArea)
        self.models = g.points_models
        g.add_widget(g.JitterSizeSlider, box)
        box.setSizePolicy(*SIZE_POLICY)

        box = gui.widgetBox(self.controlArea, "Hide axes", sizePolicy=SIZE_POLICY)
        self.rslider = gui.hSlider(
            box, self, "radius", minValue=0, maxValue=100,
            step=5, label="Radius", createLabel=False, ticks=True,
            callback=self.update_radius)
        self.rslider.setTickInterval(0)
        self.rslider.setPageStep(10)

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)

        g.add_widgets([g.ShowLegend,
                       g.ToolTipShowsAll,
                       g.ClassDensity,
                       g.LabelOnlySelected], box)

        box = self.graph.box_zoom_select(self.controlArea)
        box.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)
        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection",
                        auto_label="Send Automatically")
        self.graph.zoom_actions(self)

        self._new_plotdata()
        self._change_placement()
        self.graph.jitter_continuous = True

    def reset_graph_data(self):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph(reset_view=True)

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def _vizrank(self, attrs):
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.model_other if var not in attrs]

    def _change_placement(self):
        placement = self.placement
        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        self.variables_selection.set_enabled(placement in [p_Circular, p_LDA])
        self._vizrank_color_change()
        self.rslider.setEnabled(placement != p_Circular)
        self._setup_plot()
        self.commit()

    def _get_min_radius(self):
        return self.radius * np.max(np.linalg.norm(self.plotdata.axes, axis=1)) / 100 + 1e-5

    def update_radius(self):
        # Update the anchor/axes visibility
        pd = self.plotdata
        assert pd is not None
        if pd.hidecircle is None:
            return
        min_radius = self._get_min_radius()
        for anchor, item in zip(pd.axes, pd.axisitems):
            item.setVisible(np.linalg.norm(anchor) > min_radius)
        pd.hidecircle.setRect(QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

    def _new_plotdata(self):
        self.plotdata = namespace(
            valid_mask=None,
            embedding_coords=None,
            axisitems=[],
            axes=[],
            variables=[],
            data=None,
            hidecircle=None
        )

    def _anchor_circle(self, variables):
        # minimum visible anchor radius (radius)
        min_radius = self._get_min_radius()
        axisitems = []
        for anchor, var in zip(self.plotdata.axes, variables[:]):
            axitem = AnchorItem(line=QLineF(0, 0, *anchor), text=var.name,)
            axitem.setVisible(np.linalg.norm(anchor) > min_radius)
            axitem.setPen(pg.mkPen((100, 100, 100)))
            axitem.setArrowVisible(True)
            self.viewbox.addItem(axitem)
            axisitems.append(axitem)

        self.plotdata.axisitems = axisitems
        if self.placement == self.Placement.Circular:
            return

        hidecircle = QGraphicsEllipseItem()
        hidecircle.setRect(QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius))

        _pen = QPen(Qt.lightGray, 1)
        _pen.setCosmetic(True)
        hidecircle.setPen(_pen)

        self.viewbox.addItem(hidecircle)
        self.plotdata.hidecircle = hidecircle

    def update_colors(self):
        self._vizrank_color_change()

    def clear(self):
        # Clear/reset the widget state
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()
        self.selection_indices = None

    def _clear_plot(self):
        self.Warning.trivial_components.clear()
        for axisitem in self.plotdata.axisitems:
            self.viewbox.removeItem(axisitem)
        if self.plotdata.hidecircle:
            self.viewbox.removeItem(self.plotdata.hidecircle)
        self._new_plotdata()
        self.graph.hide_axes()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10)

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        is_enabled = False
        if self.data is None:
            self.btn_vizrank.setToolTip("There is no data.")
            return
        vars = [v for v in chain(self.data.domain.variables, self.data.domain.metas) if
                v.is_primitive and v is not self.graph.attr_color]
        self.n_cont_var = len(vars)
        if self.placement not in [self.Placement.Circular, self.Placement.LDA]:
            msg = "Suggest Features works only for Circular and " \
                  "Linear Discriminant Analysis Projection"
        elif self.graph.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.graph.attr_color.is_continuous and self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear Discriminant Analysis Projection " \
                  "when continuous color variable is selected."
        elif len(vars) < 3:
            msg = "Not enough available continuous variables"
        else:
            is_enabled = True
            msg = ""
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        self.vizrank.stop_and_reset(is_enabled)

    @Inputs.projection
    def set_projection(self, projection):
        self.Warning.not_enough_components.clear()
        if projection and len(projection) < 2:
            self.Warning.not_enough_components()
            projection = None
        if projection is not None:
            self.placement = self.Placement.Projection
        self.projection = projection

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset.

        Args:
            data (Orange.data.table): data instances
        """
        def sql(data):
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.information("Data has been sampled")
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()}

            self.openContext(data.domain)
            selected_keys = [key for key, (sind, _) in self.variable_state.items() if sind == 0]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            if self.__pending_selection_restore is not None:
                self._selection = np.array(self.__pending_selection_restore, dtype=int)
                self.__pending_selection_restore = None

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)])
            return selected, other

        self.closeContext()
        self.clear()
        self.Warning.no_cont_features.clear()
        self.information()
        data = sql(data)
        if data is not None:
            domain = data.domain
            vars = [var for var in chain(domain.variables, domain.metas) if var.is_continuous]
            if not len(vars):
                self.Warning.no_cont_features()
                data = None
        self.data = data
        self.init_attr_values()
        if data is not None and len(data):
            self._initialize(data)
            self.model_selected[:], self.model_other[:] = settings(data)
            self.vizrank.stop_and_reset()
            self.vizrank.attrs = self.data.domain.attributes if self.data is not None else []

    def _check_possible_opt(self):
        def set_enabled(is_enabled):
            for btn in self.radio_placement.buttons:
                btn.setEnabled(is_enabled)
            self.variables_selection.set_enabled(is_enabled)

        p_Circular = self.Placement.Circular
        p_LDA = self.Placement.LDA
        p_Input = self.Placement.Projection
        if self.data:
            set_enabled(True)
            domain = self.data.domain
            if not domain.has_discrete_class or len(domain.class_var.values) < 2:
                self.radio_placement.buttons[p_LDA].setEnabled(False)
                if self.placement == p_LDA:
                    self.placement = p_Circular
            if not self.projection:
                self.radio_placement.buttons[p_Input].setEnabled(False)
                if self.placement == p_Input:
                    self.placement = p_Circular
            self._setup_plot()
        else:
            self.graph.new_data(None)
            self.rslider.setEnabled(False)
            set_enabled(False)
        self.commit()

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None and self.subset_data is not None:
            # Update the plot's highlight items
            dataids = self.data.ids.ravel()
            subsetids = np.unique(self.subset_data.ids)
            self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True)
        self._check_possible_opt()
        self._change_placement()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWLinearProjection.ReplotRequest:
            self.__replot_requested = False
            self._setup_plot()
            self.commit()
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected), list(self.model_other)]
        )
        super().closeContext()

    def _initialize(self, data):
        # Initialize the GUI controls from data's domain.
        vars = [v for v in chain(data.domain.metas, data.domain.attributes) if v.is_continuous]
        self.model_other[:] = vars[3:]
        self.model_selected[:] = vars[:3]

    def prepare_plot_data(self, variables):
        def projection(variables):
            if set(self.projection.domain.attributes).issuperset(variables):
                axes = self.projection[:2, variables].X
            elif set(f.name for f in
                     self.projection.domain.attributes).issuperset(f.name for f in variables):
                axes = self.projection[:2, [f.name for f in variables]].X
            else:
                self.Error.proj_and_domain_match()
                axes = None
            return axes

        def get_axes(variables):
            self.Error.proj_and_domain_match.clear()
            axes = None
            if self.placement == self.Placement.Circular:
                axes = LinProj.defaultaxes(len(variables))
            elif self.placement == self.Placement.LDA:
                axes = self._get_lda(self.data, variables)
            elif self.placement == self.Placement.Projection and self.projection:
                axes = projection(variables)
            return axes

        coords = [column_data(self.data, var, dtype=float) for var in variables]
        coords = np.vstack(coords)
        p, N = coords.shape
        assert N == len(self.data), p == len(variables)

        axes = get_axes(variables)
        if axes is None:
            return None, None, None
        assert axes.shape == (2, p)

        valid_mask = ~np.isnan(coords).any(axis=0)
        coords = coords[:, valid_mask]

        X, Y = np.dot(axes, coords)
        if X.size and Y.size:
            X = normalized(X)
            Y = normalized(Y)

        return valid_mask, np.stack((X, Y), axis=1), axes.T

    def _setup_plot(self):
        self._clear_plot()
        if self.data is None:
            return
        self.__replot_requested = False
        names = get_unique_names([v.name for v in chain(self.data.domain.variables,
                                                        self.data.domain.metas)],
                                 ["{}-x".format(self.Variable_name[self.placement]),
                                  "{}-y".format(self.Variable_name[self.placement])])
        self.variable_x = ContinuousVariable(names[0])
        self.variable_y = ContinuousVariable(names[1])
        if self.placement in [self.Placement.Circular, self.Placement.LDA]:
            variables = list(self.model_selected)
        elif self.placement == self.Placement.Projection:
            variables = self.model_selected[:] + self.model_other[:]
        elif self.placement == self.Placement.PCA:
            variables = [var for var in self.data.domain.attributes if var.is_continuous]
        if not variables:
            self.graph.new_data(None)
            return
        if self.placement == self.Placement.PCA:
            valid_mask, ec, axes = self._get_pca()
            variables = self._pca.orig_domain.attributes
        else:
            valid_mask, ec, axes = self.prepare_plot_data(variables)

        self.plotdata.variables = variables
        self.plotdata.valid_mask = valid_mask
        self.plotdata.embedding_coords = ec
        self.plotdata.axes = axes
        if any(e is None for e in (valid_mask, ec, axes)):
            return

        if not sum(valid_mask):
            self.Error.no_valid_data()
            self.graph.new_data(None, None)
            return
        self.Error.no_valid_data.clear()

        self._anchor_circle(variables=variables)
        self._plot()

    def _plot(self):
        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas)
        valid_mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[valid_mask] = self.plotdata.embedding_coords
        self.plotdata.data = data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = data[self._subset_mask & valid_mask]\
            if self._subset_mask is not None and len(self._subset_mask) else None
        self.plotdata.data = data
        self.graph.new_data(data[valid_mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, False)

    def _get_lda(self, data, variables):
        domain = Domain(attributes=variables, class_vars=data.domain.class_vars)
        data = data.transform(domain)
        lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
        lda.fit(data.X, data.Y)
        scalings = lda.scalings_[:, :2].T
        if scalings.shape == (1, 1):
            scalings = np.array([[1.], [0.]])
        return scalings

    def _get_pca(self):
        data = self.data
        MAX_COMPONENTS = 2
        ncomponents = 2
        DECOMPOSITIONS = [PCA]  # TruncatedSVD
        cls = DECOMPOSITIONS[0]
        pca_projector = cls(n_components=MAX_COMPONENTS)
        pca_projector.component = ncomponents
        pca_projector.preprocessors = cls.preprocessors + [Normalize()]

        pca = pca_projector(data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min([np.abs(np.min(coords, axis=0)), np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes

    def _update_graph(self, reset_view=False):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view)

    def update_density(self):
        self._update_graph(reset_view=False)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection = np.zeros(len(self.data), dtype=np.uint8)
            self._selection[self.plotdata.valid_mask] = self.graph.selection
            self.selection_indices = self._selection.tolist()
        else:
            self._selection = self.selection_indices = None
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        def prepare_components():
            if self.placement in [self.Placement.Circular, self.Placement.LDA]:
                attrs = [a for a in self.model_selected[:]]
                axes = self.plotdata.axes
            elif self.placement == self.Placement.PCA:
                axes = self._pca.components_.T
                attrs = [a for a in self._pca.orig_domain.attributes]
            if self.placement != self.Placement.Projection:
                domain = Domain([ContinuousVariable(a.name, compute_value=lambda _: None)
                                 for a in attrs],
                                metas=[StringVariable(name='component')])
                metas = np.array([["{}{}".format(self.Component_name[self.placement], i + 1)
                                   for i in range(axes.shape[1])]],
                                 dtype=object).T
                components = Table(domain, axes.T, metas=metas)
                components.name = 'components'
            else:
                components = self.projection
            return components

        selected = annotated = components = None
        if self.data is not None and self.plotdata.data is not None:
            components = prepare_components()

            graph = self.graph
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
            else [False * len(mask)]

            selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            name = self.data.name
            data = self.plotdata.data
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes

            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        def projection_name():
            name = ("Circular Placement",
                    "Linear Discriminant Analysis",
                    "Principal Component Analysis",
                    "Input projection")
            return name[self.placement]

        caption = report.render_items_vert((
            ("Projection", projection_name()),
            ("Color", name(self.graph.attr_color)),
            ("Label", name(self.graph.attr_label)),
            ("Shape", name(self.graph.attr_shape)),
            ("Size", name(self.graph.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index", "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
Exemplo n.º 13
0
class OWLinearProjection(OWAnchorProjectionWidget):
    name = "Linear Projection"
    description = "A multi-axis projection of data onto " \
                  "a two-dimensional plane."
    icon = "icons/LinearProjection.svg"
    priority = 240
    keywords = []

    class Inputs(OWAnchorProjectionWidget.Inputs):
        projection_input = Input("Projection", Table)

    Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2, Projection=3),
                     type=int, qualname="OWLinearProjection.Placement")

    Component_name = {Placement.Circular: "C", Placement.LDA: "LD",
                      Placement.PCA: "PC"}
    Variable_name = {Placement.Circular: "circular",
                     Placement.LDA: "lda",
                     Placement.PCA: "pca",
                     Placement.Projection: "projection"}
    Projection_name = {Placement.Circular: "Circular Placement",
                       Placement.LDA: "Linear Discriminant Analysis",
                       Placement.PCA: "Principal Component Analysis",
                       Placement.Projection: "Use input projection"}

    settings_version = 4

    placement = Setting(Placement.Circular)
    selected_vars = ContextSetting([])
    vizrank = SettingProvider(LinearProjectionVizRank)
    GRAPH_CLASS = OWLinProjGraph
    graph = SettingProvider(OWLinProjGraph)

    class Warning(OWAnchorProjectionWidget.Warning):
        not_enough_comp = Msg("Input projection has less than two components")
        trivial_components = Msg(
            "All components of the PCA are trivial (explain zero variance). "
            "Input data is constant (or near constant).")

    class Error(OWAnchorProjectionWidget.Error):
        no_cont_features = Msg("Plotting requires numeric features")
        proj_and_domain_match = Msg("Projection and Data domains do not match")

    def __init__(self):
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_selected.rowsInserted.connect(self.__model_selected_changed)
        self.model_selected.rowsRemoved.connect(self.__model_selected_changed)
        self.model_other = VariableListModel(enable_dnd=True)

        self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank(
            None, self, "Suggest Features", self.__vizrank_set_attrs)

        super().__init__()
        self.projection_input = None
        self.variables = None

    def _add_controls(self):
        self._add_controls_variables()
        self._add_controls_placement()
        super()._add_controls()
        self.graph.gui.add_control(
            self._effects_box, gui.hSlider, "Hide radius:", master=self.graph,
            value="hide_radius", minValue=0, maxValue=100, step=10,
            createLabel=False, callback=self.__radius_slider_changed
        )
        self.controlArea.layout().removeWidget(self.control_area_stretch)
        self.control_area_stretch.setParent(None)

    def _add_controls_variables(self):
        self.variables_selection = VariablesSelection(
            self, self.model_selected, self.model_other, self.controlArea
        )
        self.variables_selection.add_remove.layout().addWidget(
            self.btn_vizrank
        )

    def _add_controls_placement(self):
        box = gui.widgetBox(
            self.controlArea, True,
            sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)
        )
        self.radio_placement = gui.radioButtonsInBox(
            box, self, "placement",
            btnLabels=[self.Projection_name[x] for x in self.Placement],
            callback=self.__placement_radio_changed
        )

    @property
    def continuous_variables(self):
        if self.data is None or self.data.domain is None:
            return []
        dom = self.data.domain
        return [v for v in chain(dom.variables, dom.metas) if v.is_continuous]

    def __vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [var for var in self.continuous_variables
                               if var not in attrs]

    def __model_selected_changed(self):
        self.selected_vars = [(var.name, vartype(var)) for var
                              in self.model_selected]
        self.projection = None
        self.variables = None
        self._check_options()
        self.setup_plot()
        self.commit()

    def __placement_radio_changed(self):
        self.variables_selection.set_enabled(
            self.placement in [self.Placement.Circular, self.Placement.LDA])
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)
        self.projection = None
        self.variables = None
        self._init_vizrank()
        self.setup_plot()
        self.commit()

    def __radius_slider_changed(self):
        self.graph.update_radius()

    def colors_changed(self):
        super().colors_changed()
        self._init_vizrank()

    def set_data(self, data):
        super().set_data(data)
        if self.data is not None and len(self.selected_vars):
            d, selected = self.data.domain, [v[0] for v in self.selected_vars]
            self.model_selected[:] = [d[attr] for attr in selected]
            self.model_other[:] = [d[attr.name] for attr in
                                   self.continuous_variables
                                   if attr.name not in selected]
        elif self.data is not None:
            self.model_selected[:] = self.continuous_variables[:3]
            self.model_other[:] = self.continuous_variables[3:]

        self._check_options()
        self._init_vizrank()

    def _check_options(self):
        buttons = self.radio_placement.buttons
        for btn in buttons:
            btn.setEnabled(True)
        if self.data is not None:
            has_discrete_class = self.data.domain.has_discrete_class
            if not has_discrete_class or len(np.unique(self.data.Y)) < 2:
                buttons[self.Placement.LDA].setEnabled(False)
                if self.placement == self.Placement.LDA:
                    self.placement = self.Placement.Circular
            if not self.projection_input:
                buttons[self.Placement.Projection].setEnabled(False)
                if self.placement == self.Placement.Projection:
                    self.placement = self.Placement.Circular

        self.variables_selection.set_enabled(
            self.placement in [self.Placement.Circular, self.Placement.LDA])
        self.controls.graph.hide_radius.setEnabled(
            self.placement != self.Placement.Circular)

    def _init_vizrank(self):
        is_enabled, msg = False, ""
        if self.data is None:
            msg = "There is no data."
        elif self.placement not in [self.Placement.Circular,
                                    self.Placement.LDA]:
            msg = "Suggest Features works only for Circular and " \
                  "Linear Discriminant Analysis Projection"
        elif self.attr_color is None:
            msg = "Color variable has to be selected"
        elif self.attr_color.is_continuous and \
                self.placement == self.Placement.LDA:
            msg = "Suggest Features does not work for Linear " \
                  "Discriminant Analysis Projection when " \
                  "continuous color variable is selected."
        elif len([v for v in self.continuous_variables
                  if v is not self.attr_color]) < 3:
            msg = "Not enough available continuous variables"
        elif len(self.data[self.valid_data]) < 2:
            msg = "Not enough valid data instances"
        else:
            is_enabled = not np.isnan(self.data.get_column_view(
                self.attr_color)[0].astype(float)).all()
        self.btn_vizrank.setToolTip(msg)
        self.btn_vizrank.setEnabled(is_enabled)
        if is_enabled:
            self.vizrank.initialize()

    def check_data(self):
        def error(err):
            err()
            self.data = None

        super().check_data()
        if self.data is not None:
            if not len(self.continuous_variables):
                error(self.Error.no_cont_features)

    def init_attr_values(self):
        super().init_attr_values()
        self.selected_vars = []

    @Inputs.projection_input
    def set_projection(self, projection):
        self.Warning.not_enough_comp.clear()
        if projection and len(projection) < 2:
            self.Warning.not_enough_comp()
            projection = None
        if projection is not None:
            self.placement = self.Placement.Projection
        self.projection_input = projection
        self._check_options()

    def get_embedding(self):
        self.valid_data = None
        if self.data is None or not self.variables:
            return None

        if self.placement == self.Placement.PCA:
            self.valid_data, ec, self.projection = self._get_pca()
            self.variables = self._pca.orig_domain.attributes
        else:
            self.valid_data, ec, self.projection = \
                self.prepare_projection_data(self.variables)

        self.Error.no_valid_data.clear()
        if self.valid_data is None or not sum(self.valid_data) or \
                self.projection is None or ec is None:
            self.Error.no_valid_data()
            return None

        embedding = np.zeros((len(self.data), 2), dtype=np.float)
        embedding[self.valid_data] = ec
        return embedding

    def prepare_projection_data(self, variables):
        def projection(_vars):
            attrs = self.projection_input.domain.attributes
            if set(attrs).issuperset(_vars):
                return self.projection_input[:2, _vars].X
            elif set(f.name for f in attrs).issuperset(f.name for f in _vars):
                return self.projection_input[:2, [f.name for f in _vars]].X
            else:
                self.Error.proj_and_domain_match()
                return None

        def get_axes(_vars):
            self.Error.proj_and_domain_match.clear()
            if self.placement == self.Placement.Circular:
                return LinProj.defaultaxes(len(_vars))
            elif self.placement == self.Placement.LDA:
                return self._get_lda(self.data, _vars)
            elif self.placement == self.Placement.Projection and \
                    self.projection_input is not None:
                return projection(_vars)
            else:
                return None

        coords = np.vstack(column_data(self.data, v, float) for v in variables)
        axes = get_axes(variables)
        if axes is None:
            return None, None, None

        valid_mask = ~np.isnan(coords).any(axis=0)
        X, Y = np.dot(axes, coords[:, valid_mask])
        if X.size and Y.size:
            X = normalized(X)
            Y = normalized(Y)
        return valid_mask, np.stack((X, Y), axis=1), axes.T

    def get_anchors(self):
        if self.projection is None:
            return None, None
        return self.projection, [v.name for v in self.variables]

    def setup_plot(self):
        self.init_projection_variables()
        super().setup_plot()

    def init_projection_variables(self):
        self.variables = None
        if self.data is None:
            return

        if self.placement in [self.Placement.Circular, self.Placement.LDA]:
            self.variables = self.model_selected[:]
        elif self.placement == self.Placement.Projection:
            self.variables = self.model_selected[:] + self.model_other[:]
        elif self.placement == self.Placement.PCA:
            self.variables = [var for var in self.data.domain.attributes
                              if var.is_continuous]

    def _get_lda(self, data, variables):
        data = data.transform(Domain(variables, data.domain.class_vars))
        lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2)
        lda.fit(data.X, data.Y)
        scalings = lda.scalings_[:, :2].T
        if scalings.shape == (1, 1):
            scalings = np.array([[1.], [0.]])
        return scalings

    def _get_pca(self):
        pca_projector = PCA(n_components=2)
        pca_projector.component = 2
        pca_projector.preprocessors = PCA.preprocessors + [Normalize()]

        pca = pca_projector(self.data)
        variance_ratio = pca.explained_variance_ratio_
        cumulative = np.cumsum(variance_ratio)

        self._pca = pca
        if not np.isfinite(cumulative[-1]):
            self.Warning.trivial_components()

        coords = pca(self.data).X
        valid_mask = ~np.isnan(coords).any(axis=1)
        # scale axes
        max_radius = np.min([np.abs(np.min(coords, axis=0)),
                             np.max(coords, axis=0)])
        axes = pca.components_.T.copy()
        axes *= max_radius / np.max(np.linalg.norm(axes, axis=1))
        return valid_mask, coords, axes

    def send_components(self):
        components = None
        if self.data is not None and self.valid_data is not None and \
                self.projection is not None:
            if self.placement in [self.Placement.Circular, self.Placement.LDA]:
                axes = self.projection
                attrs = self.model_selected
            elif self.placement == self.Placement.PCA:
                axes = self._pca.components_.T
                attrs = self._pca.orig_domain.attributes
            if self.placement != self.Placement.Projection:
                meta_attrs = [StringVariable(name='component')]
                metas = np.array(
                    [["{}{}".format(self.Component_name[self.placement], i + 1)
                      for i in range(axes.shape[1])]], dtype=object).T
                components = Table(Domain(attrs, metas=meta_attrs),
                                   axes.T, metas=metas)
                components.name = self.data.name
            else:
                components = self.projection_input
        self.Outputs.components.send(components)

    def _get_projection_variables(self):
        pn = self.Variable_name[self.placement]
        self.embedding_variables_names = ("{}-x".format(pn), "{}-y".format(pn))
        return super()._get_projection_variables()

    def _get_send_report_caption(self):
        def projection_name():
            return self.Projection_name[self.placement]

        return report.render_items_vert((
            ("Projection", projection_name()),
            ("Color", self._get_caption_var_name(self.attr_color)),
            ("Label", self._get_caption_var_name(self.attr_label)),
            ("Shape", self._get_caption_var_name(self.attr_shape)),
            ("Size", self._get_caption_var_name(self.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and
             "{} %".format(self.graph.jitter_size))))

    def clear(self):
        self.variables = None
        if self.model_selected:
            self.model_selected.clear()
        if self.model_other:
            self.model_other.clear()
        super().clear()

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            settings_["point_width"] = settings_["point_size"]
        if version < 3:
            settings_graph = {}
            settings_graph["jitter_size"] = settings_["jitter_value"]
            settings_graph["point_width"] = settings_["point_width"]
            settings_graph["alpha_value"] = settings_["alpha_value"]
            settings_graph["class_density"] = settings_["class_density"]
            settings_["graph"] = settings_graph
        if version < 4:
            if "radius" in settings_:
                settings_["graph"]["hide_radius"] = settings_["radius"]
            if "selection_indices" in settings_ and \
                    settings_["selection_indices"] is not None:
                selection = settings_["selection_indices"]
                settings_["selection"] = [(i, 1) for i, selected in
                                          enumerate(selection) if selected]

    @classmethod
    def migrate_context(cls, context, version):
        if version < 2:
            domain = context.ordered_domain
            c_domain = [t for t in context.ordered_domain if t[1] == 2]
            d_domain = [t for t in context.ordered_domain if t[1] == 1]
            for d, old_val, new_val in ((domain, "color_index", "attr_color"),
                                        (d_domain, "shape_index", "attr_shape"),
                                        (c_domain, "size_index", "attr_size")):
                index = context.values[old_val][0] - 1
                context.values[new_val] = (d[index][0], d[index][1] + 100) \
                    if 0 <= index < len(d) else None
        if version < 3:
            context.values["graph"] = {
                "attr_color": context.values["attr_color"],
                "attr_shape": context.values["attr_shape"],
                "attr_size": context.values["attr_size"]
            }
        if version == 3:
            values = context.values
            values["attr_color"] = values["graph"]["attr_color"]
            values["attr_size"] = values["graph"]["attr_size"]
            values["attr_shape"] = values["graph"]["attr_shape"]
            values["attr_label"] = values["graph"]["attr_label"]
Exemplo n.º 14
0
class OWLineChart(widget.OWWidget):
    name = "折线图(Line Chart)"
    description = "以折线图形式观察某一列数据."
    icon = "icons/LineChart.svg"
    priority = 90
    category = "可视化(Visualize)"
    keywords = ["zhexiantu"]

    class Inputs:
        time_series = Input("数据(Data)", Table, replaces="Data")

    attrs = settings.Setting({})  # Maps data.name -> [attrs]

    graph_name = "chart"

    def __init__(self):
        self.data = None
        self.plots = []
        self.configs = []
        self.varmodel = VariableListModel(parent=self)
        icon = QIcon(join(dirname(__file__), "icons", "LineChart-plus.png"))
        self.add_button = button = QPushButton(icon, "添加折线图", self)
        button.clicked.connect(self.add_plot)
        self.controlArea.layout().addWidget(button)
        self.configsArea = gui.vBox(self.controlArea)
        self.controlArea.layout().addStretch(1)
        # TODO: allow selecting ranges that are sent to output as subset table
        self.chart = highstock = Highstock(self, highchart="StockChart")
        self.mainArea.layout().addWidget(highstock)
        # highstock.evalJS('Highcharts.setOptions({navigator: {enabled:false}});')
        highstock.chart(
            # For some reason, these options don't work as global opts applied at Highstock init time
            # Disable top range selector
            rangeSelector_enabled=False,
            rangeSelector_inputEnabled=False,
            # Disable bottom miniview navigator (it doesn't update)
            navigator_enabled=False,
        )
        QTimer.singleShot(0, self.add_plot)
        self.chart.add_legend()

    def add_plot(self):
        ax = self.chart.addAxis()
        config = PlotConfigWidget(self, ax, self.varmodel)
        # Connect the signals
        config.sigSelection.connect(self.chart.setSeries)
        config.sigLogarithmic.connect(self.chart.setLogarithmic)
        config.sigType.connect(self.chart.setType)
        config.sigClosed.connect(self.chart.removeAxis)
        config.sigClosed.connect(lambda ax, widget: widget.setParent(None))
        config.sigClosed.connect(
            lambda ax, widget: self.add_button.setDisabled(False))
        self.configs.append(config)
        self.add_button.setDisabled(len(self.configs) >= 5)
        config.sigClosed.connect(lambda ax, widget: self.remove_plot(widget))

        self.configsArea.layout().addWidget(config)

    def remove_plot(self, plot):
        self.configs.remove(plot)
        # # self.configsArea.layout()
        if len(self.chart.axes) < 2:
            self.resize(QSize(925, 635))

    @Inputs.time_series
    def set_data(self, data):
        # TODO: set xAxis resolution and tooltip time contents depending on
        # data.time_delta. See: http://imgur.com/yrnlgQz

        # If the same data is updated, short circuit to just updating the chart,
        # retaining all panels and list view selections ...

        # new_data = None if data is None else \
        #            Timeseries.from_data_table(data)
        new_data = data
        if (new_data is not None and self.data is not None
                and new_data.domain == self.data.domain):
            self.data = new_data
            for config in self.configs:
                config.selection_changed()
            return

        # self.data = data = None if data is None else \
        #                    Timeseries.from_data_table(data)
        self.data = data
        if data is None:
            self.varmodel.clear()
            self.chart.clear()
            return
        # if getattr(data.time_variable, 'utc_offset', False):
        #     offset_minutes = data.time_variable.utc_offset.total_seconds() / 60
        #     self.chart.evalJS('Highcharts.setOptions({global: {timezoneOffset: %d}});' % -offset_minutes)  # Why is this negative? It works.
        #     self.chart.chart() and

        self.chart.setXAxisType("linear")

        self.varmodel.wrap(
            [var for var in data.domain.variables if var.is_continuous])
Exemplo n.º 15
0
class OWBoxPlot(widget.OWWidget):
    """
    Here's how the widget's functions call each other:

    - `set_data` is a signal handler fills the list boxes and calls
    `grouping_changed`.

    - `grouping_changed` handles changes of grouping attribute: it enables or
    disables the box for ordering, orders attributes and calls `attr_changed`.

    - `attr_changed` handles changes of attribute. It recomputes box data by
    calling `compute_box_data`, shows the appropriate display box
    (discrete/continuous) and then calls`layout_changed`

    - `layout_changed` constructs all the elements for the scene (as lists of
    QGraphicsItemGroup) and calls `display_changed`. It is called when the
    attribute or grouping is changed (by attr_changed) and on resize event.

    - `display_changed` puts the elements corresponding to the current display
    settings on the scene. It is called when the elements are reconstructed
    (layout is changed due to selection of attributes or resize event), or
    when the user changes display settings or colors.

    For discrete attributes, the flow is a bit simpler: the elements are not
    constructed in advance (by layout_changed). Instead, layout_changed and
    display_changed call display_changed_disc that draws everything.
    """
    name = "箱形图"
    description = "在方框图中可视化特征值的分布。"
    icon = "icons/BoxPlot.svg"
    priority = 100
    keywords = ["whisker"]

    class Inputs:
        data = Input("数据", Orange.data.Table)

    class Outputs:
        selected_data = Output("所选数据", Orange.data.Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)

    #: Comparison types for continuous variables
    CompareNone, CompareMedians, CompareMeans = 0, 1, 2

    settingsHandler = DomainContextHandler()
    conditions = ContextSetting([])

    attribute = ContextSetting(None)
    order_by_importance = Setting(False)
    group_var = ContextSetting(None)
    show_annotations = Setting(True)
    compare = Setting(CompareMeans)
    stattest = Setting(0)
    sig_threshold = Setting(0.05)
    stretched = Setting(True)
    show_labels = Setting(True)
    sort_freqs = Setting(False)
    auto_commit = Setting(True)

    _sorting_criteria_attrs = {
        CompareNone: "",
        CompareMedians: "median",
        CompareMeans: "mean"
    }

    _pen_axis_tick = QPen(Qt.white, 5)
    _pen_axis = QPen(Qt.darkGray, 3)
    _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2)
    _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2)
    _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1)
    _pen_dotted.setStyle(Qt.DotLine)
    _post_line_pen = QPen(Qt.lightGray, 2)
    _post_grp_pen = QPen(Qt.lightGray, 4)
    for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis,
                _pen_axis_tick, _post_line_pen, _post_grp_pen):
        pen.setCosmetic(True)
        pen.setCapStyle(Qt.RoundCap)
        pen.setJoinStyle(Qt.RoundJoin)
    _pen_axis_tick.setCapStyle(Qt.FlatCap)

    _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0))

    _axis_font = QFont()
    _axis_font.setPixelSize(12)
    _label_font = QFont()
    _label_font.setPixelSize(11)
    _attr_brush = QBrush(QColor(0x33, 0x00, 0xff))

    graph_name = "盒式布景"

    def __init__(self):
        super().__init__()
        self.stats = []
        self.dataset = None
        self.posthoc_lines = []

        self.label_txts = self.mean_labels = self.boxes = self.labels = \
            self.label_txts_all = self.attr_labels = self.order = []
        self.p = -1.0
        self.scale_x = self.scene_min_x = self.scene_width = 0
        self.label_width = 0

        self.attrs = VariableListModel()
        view = gui.listView(self.controlArea,
                            self,
                            "attribute",
                            box="变量",
                            model=self.attrs,
                            callback=self.attr_changed)
        view.setMinimumSize(QSize(30, 30))
        # Any other policy than Ignored will let the QListBox's scrollbar
        # set the minimal height (see the penultimate paragraph of
        # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
        gui.separator(view.box, 6, 6)
        self.cb_order = gui.checkBox(view.box,
                                     self,
                                     "order_by_importance",
                                     "按相关性排序",
                                     tooltip="由𝜒²或方差对子群排序",
                                     callback=self.apply_sorting)
        self.group_vars = DomainModel(placeholder="无",
                                      separators=False,
                                      valid_types=Orange.data.DiscreteVariable)
        self.group_view = view = gui.listView(self.controlArea,
                                              self,
                                              "group_var",
                                              box="子群",
                                              model=self.group_vars,
                                              callback=self.grouping_changed)
        view.setEnabled(False)
        view.setMinimumSize(QSize(30, 30))
        # See the comment above
        view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)

        # TODO: move Compare median/mean to grouping box
        # The vertical size policy is needed to let only the list views expand
        self.display_box = gui.vBox(self.controlArea,
                                    "Display",
                                    sizePolicy=(QSizePolicy.Minimum,
                                                QSizePolicy.Maximum),
                                    addSpace=False)

        gui.checkBox(self.display_box,
                     self,
                     "show_annotations",
                     "Annotate",
                     callback=self.display_changed)
        self.compare_rb = gui.radioButtonsInBox(
            self.display_box,
            self,
            'compare',
            btnLabels=["无比较", "中位数比较", "均值比较"],
            callback=self.layout_changed)

        # The vertical size policy is needed to let only the list views expand
        self.stretching_box = box = gui.vBox(self.controlArea,
                                             box="显示",
                                             sizePolicy=(QSizePolicy.Minimum,
                                                         QSizePolicy.Fixed))
        self.stretching_box.sizeHint = self.display_box.sizeHint
        gui.checkBox(box,
                     self,
                     'stretched',
                     "拉杆",
                     callback=self.display_changed)
        gui.checkBox(box,
                     self,
                     'show_labels',
                     "显示框标签",
                     callback=self.display_changed)
        self.sort_cb = gui.checkBox(box,
                                    self,
                                    'sort_freqs',
                                    "按子组频率排序",
                                    callback=self.display_changed)
        gui.rubber(box)

        gui.auto_commit(self.controlArea, self, "auto_commit", "选中发送", "自动发送")

        gui.vBox(self.mainArea, addSpace=True)
        self.box_scene = QGraphicsScene()
        self.box_scene.selectionChanged.connect(self.commit)
        self.box_view = QGraphicsView(self.box_scene)
        self.box_view.setRenderHints(QPainter.Antialiasing
                                     | QPainter.TextAntialiasing
                                     | QPainter.SmoothPixmapTransform)
        self.box_view.viewport().installEventFilter(self)

        self.mainArea.layout().addWidget(self.box_view)

        e = gui.hBox(self.mainArea, addSpace=False)
        self.infot1 = gui.widgetLabel(e, "<center>没有测试结果。</center>")
        self.mainArea.setMinimumWidth(600)

        self.stats = self.dist = self.conts = []
        self.is_continuous = False

        self.update_display_box()

    def sizeHint(self):
        return QSize(100, 500)  # Vertical size is regulated by mainArea

    def eventFilter(self, obj, event):
        if obj is self.box_view.viewport() and \
                event.type() == QEvent.Resize:
            self.layout_changed()

        return super().eventFilter(obj, event)

    def reset_attrs(self, domain):
        self.attrs[:] = [
            var for var in chain(domain.class_vars, domain.metas,
                                 domain.attributes) if var.is_primitive()
        ]

    # noinspection PyTypeChecker
    @Inputs.data
    def set_data(self, dataset):
        if dataset is not None and (not bool(dataset)
                                    or not len(dataset.domain) and
                                    not any(var.is_primitive()
                                            for var in dataset.domain.metas)):
            dataset = None
        self.closeContext()
        self.dataset = dataset
        self.dist = self.stats = self.conts = []
        self.group_var = None
        self.attribute = None
        if dataset:
            domain = dataset.domain
            self.group_vars.set_domain(domain)
            self.group_view.setEnabled(len(self.group_vars) > 1)
            self.reset_attrs(domain)
            self.select_default_variables(domain)
            self.openContext(self.dataset)
            self.grouping_changed()
        else:
            self.reset_all_data()
        self.commit()

    def select_default_variables(self, domain):
        # visualize first non-class variable, group by class (if present)
        if len(self.attrs) > len(domain.class_vars):
            self.attribute = self.attrs[len(domain.class_vars)]
        elif self.attrs:
            self.attribute = self.attrs[0]

        if domain.class_var and domain.class_var.is_discrete:
            self.group_var = domain.class_var
        else:
            self.group_var = None  # Reset to trigger selection via callback

    def apply_sorting(self):
        def compute_score(attr):
            if attr is group_var:
                return 3
            if attr.is_continuous:
                # One-way ANOVA
                col = data.get_column_view(attr)[0].astype(float)
                groups = (col[group_col == i] for i in range(n_groups))
                groups = (col[~np.isnan(col)] for col in groups)
                groups = [group for group in groups if len(group)]
                p = f_oneway(*groups)[1] if len(groups) > 1 else 2
            else:
                # Chi-square with the given distribution into groups
                # (see degrees of freedom in computation of the p-value)
                if not attr.values or not group_var.values:
                    return 2
                observed = np.array(
                    contingency.get_contingency(data, group_var, attr))
                observed = observed[observed.sum(axis=1) != 0, :]
                observed = observed[:, observed.sum(axis=0) != 0]
                if min(observed.shape) < 2:
                    return 2
                expected = \
                    np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \
                    np.sum(observed)
                p = chisquare(observed.ravel(),
                              f_exp=expected.ravel(),
                              ddof=n_groups - 1)[1]
            if math.isnan(p):
                return 2
            return p

        data = self.dataset
        if data is None:
            return
        domain = data.domain
        attribute = self.attribute
        group_var = self.group_var
        if self.order_by_importance and group_var is not None:
            n_groups = len(group_var.values)
            group_col = data.get_column_view(group_var)[0] if \
                domain.has_continuous_attributes(
                    include_class=True, include_metas=True) else None
            self.attrs.sort(key=compute_score)
        else:
            self.reset_attrs(domain)
        self.attribute = attribute

    def reset_all_data(self):
        self.clear_scene()
        self.infot1.setText("")
        self.attrs.clear()
        self.group_vars.set_domain(None)
        self.group_view.setEnabled(False)
        self.is_continuous = False
        self.update_display_box()

    def grouping_changed(self):
        self.cb_order.setEnabled(self.group_var is not None)
        self.apply_sorting()
        self.attr_changed()

    def select_box_items(self):
        temp_cond = self.conditions.copy()
        for box in self.box_scene.items():
            if isinstance(box, FilterGraphicsRectItem):
                box.setSelected(
                    box.filter.conditions in [c.conditions for c in temp_cond])

    def attr_changed(self):
        self.compute_box_data()
        self.update_display_box()
        self.layout_changed()

        if self.is_continuous:
            heights = 90 if self.show_annotations else 60
            self.box_view.centerOn(self.scene_min_x + self.scene_width / 2,
                                   -30 - len(self.stats) * heights / 2 + 45)
        else:
            self.box_view.centerOn(self.scene_width / 2,
                                   -30 - len(self.boxes) * 40 / 2 + 45)

    def compute_box_data(self):
        attr = self.attribute
        if not attr:
            return
        dataset = self.dataset
        self.is_continuous = attr.is_continuous
        if dataset is None or not self.is_continuous and not attr.values or \
                        self.group_var and not self.group_var.values:
            self.stats = self.dist = self.conts = []
            return
        if self.group_var:
            self.dist = []
            self.conts = contingency.get_contingency(dataset, attr,
                                                     self.group_var)
            if self.is_continuous:
                stats, label_texts = [], []
                for i, cont in enumerate(self.conts):
                    if np.sum(cont[1]):
                        stats.append(BoxData(cont, attr, i, self.group_var))
                        label_texts.append(self.group_var.values[i])
                self.stats = stats
                self.label_txts_all = label_texts
            else:
                self.label_txts_all = \
                    [v for v, c in zip(self.group_var.values, self.conts)
                     if np.sum(c) > 0]
        else:
            self.dist = distribution.get_distribution(dataset, attr)
            self.conts = []
            if self.is_continuous:
                self.stats = [BoxData(self.dist, attr, None)]
            self.label_txts_all = [""]
        self.label_txts = [
            txts for stat, txts in zip(self.stats, self.label_txts_all)
            if stat.n > 0
        ]
        self.stats = [stat for stat in self.stats if stat.n > 0]

    def update_display_box(self):
        if self.is_continuous:
            self.stretching_box.hide()
            self.display_box.show()
            self.compare_rb.setEnabled(self.group_var is not None)
        else:
            self.stretching_box.show()
            self.display_box.hide()
            self.sort_cb.setEnabled(self.group_var is not None)

    def clear_scene(self):
        self.closeContext()
        self.box_scene.clearSelection()
        self.box_scene.clear()
        self.box_view.viewport().update()
        self.attr_labels = []
        self.labels = []
        self.boxes = []
        self.mean_labels = []
        self.posthoc_lines = []
        self.openContext(self.dataset)

    def layout_changed(self):
        attr = self.attribute
        if not attr:
            return
        self.clear_scene()
        if self.dataset is None or len(self.conts) == len(self.dist) == 0:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.mean_labels = [
            self.mean_label(stat, attr, lab)
            for stat, lab in zip(self.stats, self.label_txts)
        ]
        self.draw_axis()
        self.boxes = [self.box_group(stat) for stat in self.stats]
        self.labels = [
            self.label_group(stat, attr, mean_lab)
            for stat, mean_lab in zip(self.stats, self.mean_labels)
        ]
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts
        ]
        for it in chain(self.labels, self.attr_labels):
            self.box_scene.addItem(it)
        self.display_changed()

    def display_changed(self):
        if self.dataset is None:
            return

        if not self.is_continuous:
            self.display_changed_disc()
            return

        self.order = list(range(len(self.stats)))
        criterion = self._sorting_criteria_attrs[self.compare]
        if criterion:
            vals = [getattr(stat, criterion) for stat in self.stats]
            overmax = max((val for val in vals if val is not None), default=0) \
                      + 1
            vals = [val if val is not None else overmax for val in vals]
            self.order = sorted(self.order, key=vals.__getitem__)

        heights = 90 if self.show_annotations else 60

        for row, box_index in enumerate(self.order):
            y = (-len(self.stats) + row) * heights + 10
            for item in self.boxes[box_index]:
                self.box_scene.addItem(item)
                item.setY(y)
            labels = self.labels[box_index]

            if self.show_annotations:
                labels.show()
                labels.setY(y)
            else:
                labels.hide()

            label = self.attr_labels[box_index]
            label.setY(y - 15 - label.boundingRect().height())
            if self.show_annotations:
                label.hide()
            else:
                stat = self.stats[box_index]

                if self.compare == OWBoxPlot.CompareMedians and \
                        stat.median is not None:
                    pos = stat.median + 5 / self.scale_x
                elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None:
                    pos = stat.mean + 5 / self.scale_x
                else:
                    pos = stat.q25
                label.setX(pos * self.scale_x)
                label.show()

        r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights,
                   self.scene_width,
                   len(self.stats) * heights + 90)
        self.box_scene.setSceneRect(r)

        self.compute_tests()
        self.show_posthoc()
        self.select_box_items()

    def display_changed_disc(self):
        assert not self.is_continuous
        self.clear_scene()
        self.attr_labels = [
            QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all
        ]

        if not self.stretched:
            if self.group_var:
                self.labels = [
                    QGraphicsTextItem("{}".format(int(sum(cont))))
                    for cont in self.conts if np.sum(cont) > 0
                ]
            else:
                self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))]

        self.order = list(range(len(self.attr_labels)))

        self.draw_axis_disc()
        if self.group_var:
            self.boxes = \
                [self.strudel(cont, i) for i, cont in enumerate(self.conts)
                 if np.sum(cont) > 0]
            self.conts = self.conts[np.sum(np.array(self.conts), axis=1) > 0]

            if self.sort_freqs:
                # pylint: disable=invalid-unary-operand-type
                self.order = sorted(
                    self.order, key=(-np.sum(self.conts, axis=1)).__getitem__)
        else:
            self.boxes = [self.strudel(self.dist)]

        for row, box_index in enumerate(self.order):
            y = (-len(self.boxes) + row) * 40 + 10
            box = self.boxes[box_index]
            bars, labels = box[::2], box[1::2]

            self.__draw_group_labels(y, box_index)
            if not self.stretched:
                self.__draw_row_counts(y, box_index)
            if self.show_labels and self.attribute is not self.group_var:
                self.__draw_bar_labels(y, bars, labels)
            self.__draw_bars(y, bars)

        self.box_scene.setSceneRect(-self.label_width - 5,
                                    -30 - len(self.boxes) * 40,
                                    self.scene_width,
                                    len(self.boxes * 40) + 90)
        self.infot1.setText("")
        self.select_box_items()

    def __draw_group_labels(self, y, row):
        """Draw group labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        label = self.attr_labels[row]
        b = label.boundingRect()
        label.setPos(-b.width() - 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_row_counts(self, y, row):
        """Draw row counts

        Parameters
        ----------
        y: int
            vertical offset of bars
        row: int
            row index
        """
        assert not self.is_continuous
        label = self.labels[row]
        b = label.boundingRect()
        if self.group_var:
            right = self.scale_x * sum(self.conts[row])
        else:
            right = self.scale_x * sum(self.dist)
        label.setPos(right + 10, y - b.height() / 2)
        self.box_scene.addItem(label)

    def __draw_bar_labels(self, y, bars, labels):
        """Draw bar labels

        Parameters
        ----------
        y: int
            vertical offset of bars
        bars: List[FilterGraphicsRectItem]
            list of bars being drawn
        labels: List[QGraphicsTextItem]
            list of labels for corresponding bars
        """
        label = bar_part = None
        for text_item, bar_part in zip(labels, bars):
            label = self.Label(text_item.toPlainText())
            label.setPos(bar_part.boundingRect().x(),
                         y - label.boundingRect().height() - 8)
            label.setMaxWidth(bar_part.boundingRect().width())
            self.box_scene.addItem(label)

    def __draw_bars(self, y, bars):
        """Draw bars

        Parameters
        ----------
        y: int
            vertical offset of bars

        bars: List[FilterGraphicsRectItem]
            list of bars to draw
        """
        for item in bars:
            item.setPos(0, y)
            self.box_scene.addItem(item)

    # noinspection PyPep8Naming
    def compute_tests(self):
        # The t-test and ANOVA are implemented here since they efficiently use
        # the widget-specific data in self.stats.
        # The non-parametric tests can't do this, so we use statistics.tests
        def stat_ttest():
            d1, d2 = self.stats
            if d1.n == 0 or d2.n == 0:
                return np.nan, np.nan
            pooled_var = d1.var / d1.n + d2.var / d2.n
            df = pooled_var ** 2 / \
                ((d1.var / d1.n) ** 2 / (d1.n - 1) +
                 (d2.var / d2.n) ** 2 / (d2.n - 1))
            if pooled_var == 0:
                return np.nan, np.nan
            t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var)
            p = 2 * (1 - scipy.special.stdtr(df, t))
            return t, p

        # TODO: Check this function
        # noinspection PyPep8Naming
        def stat_ANOVA():
            if any(stat.n == 0 for stat in self.stats):
                return np.nan, np.nan
            n = sum(stat.n for stat in self.stats)
            grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n
            var_between = sum(stat.n * (stat.mean - grand_avg)**2
                              for stat in self.stats)
            df_between = len(self.stats) - 1

            var_within = sum(stat.n * stat.var for stat in self.stats)
            df_within = n - len(self.stats)
            F = (var_between / df_between) / (var_within / df_within)
            p = 1 - scipy.special.fdtr(df_between, df_within, F)
            return F, p

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            t = ""
        elif any(s.n <= 1 for s in self.stats):
            t = "At least one group has just one instance," \
                "cannot compute significance"
        elif len(self.stats) == 2:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # z, self.p = tests.wilcoxon_rank_sum(
                #    self.stats[0].dist, self.stats[1].dist)
                # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p)
            else:
                t, self.p = stat_ttest()
                t = "Student's t: %.3f (p=%.3f)" % (t, self.p)
        else:
            if self.compare == OWBoxPlot.CompareMedians:
                t = ""
                # U, self.p = -1, -1
                # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p)
            else:
                F, self.p = stat_ANOVA()
                t = "ANOVA: %.3f (p=%.3f)" % (F, self.p)
        self.infot1.setText("<center>%s</center>" % t)

    def mean_label(self, stat, attr, val_name):
        label = QGraphicsItemGroup()
        t = QGraphicsSimpleTextItem(
            "%.*f" % (attr.number_of_decimals + 1, stat.mean), label)
        t.setFont(self._label_font)
        bbox = t.boundingRect()
        w2, h = bbox.width() / 2, bbox.height()
        t.setPos(-w2, -h)
        tpm = QGraphicsSimpleTextItem(
            " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev),
            label)
        tpm.setFont(self._label_font)
        tpm.setPos(w2, -h)
        if val_name:
            vnm = QGraphicsSimpleTextItem(val_name + ": ", label)
            vnm.setFont(self._label_font)
            vnm.setBrush(self._attr_brush)
            vb = vnm.boundingRect()
            label.min_x = -w2 - vb.width()
            vnm.setPos(label.min_x, -h)
        else:
            label.min_x = -w2
        return label

    def draw_axis(self):
        """Draw the horizontal axis and sets self.scale_x"""
        misssing_stats = not self.stats
        stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)]
        mean_labels = self.mean_labels or [
            self.mean_label(stats[0], self.attribute, "")
        ]
        bottom = min(stat.a_min for stat in stats)
        top = max(stat.a_max for stat in stats)

        first_val, step = compute_scale(bottom, top)
        while bottom <= first_val:
            first_val -= step
        bottom = first_val
        no_ticks = math.ceil((top - first_val) / step) + 1
        top = max(top, first_val + no_ticks * step)

        gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats))
        gtop = max(top, max(stat.mean + stat.dev for stat in stats))

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        # In principle we should repeat this until convergence since the new
        # scaling is too conservative. (No chance am I doing this.)
        mlb = min(stat.mean + mean_lab.min_x / scale_x
                  for stat, mean_lab in zip(stats, mean_labels))
        if mlb < gbottom:
            gbottom = mlb
            self.scale_x = scale_x = viewrect.width() / (gtop - gbottom)

        self.scene_min_x = gbottom * scale_x
        self.scene_width = (gtop - gbottom) * scale_x

        val = first_val
        decimals = max(3, 4 - int(math.log10(step)))
        while True:
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(
                repr(round(val, decimals)) if not misssing_stats else "?",
                self._axis_font)
            t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations)
            r = t.boundingRect()
            t.setPos(val * scale_x - r.width() / 2, 8)
            if val >= top:
                break
            val += step
        self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0,
                               self._pen_axis)

    def draw_axis_disc(self):
        """
        Draw the horizontal axis and sets self.scale_x for discrete attributes
        """
        assert not self.is_continuous
        if self.stretched:
            if not self.attr_labels:
                return
            step = steps = 10
        else:
            if self.group_var:
                max_box = max(float(np.sum(dist)) for dist in self.conts)
            else:
                max_box = float(np.sum(self.dist))
            if max_box == 0:
                self.scale_x = 1
                return
            _, step = compute_scale(0, max_box)
            step = int(step) if step > 1 else 1
            steps = int(math.ceil(max_box / step))
        max_box = step * steps

        bv = self.box_view
        viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30)
        self.scene_width = viewrect.width()

        lab_width = max(lab.boundingRect().width() for lab in self.attr_labels)
        lab_width = max(lab_width, 40)
        lab_width = min(lab_width, self.scene_width / 3)
        self.label_width = lab_width

        right_offset = 0  # offset for the right label
        if not self.stretched and self.labels:
            if self.group_var:
                rows = list(zip(self.conts, self.labels))
            else:
                rows = [(self.dist, self.labels[0])]
            # available space left of the 'group labels'
            available = self.scene_width - lab_width - 10
            scale_x = (available - right_offset) / max_box
            max_right = max(
                sum(dist) * scale_x + 10 + lbl.boundingRect().width()
                for dist, lbl in rows)
            right_offset = max(0, max_right - max_box * scale_x)

        self.scale_x = scale_x = \
            (self.scene_width - lab_width - 10 - right_offset) / max_box

        self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis)
        for val in range(0, step * steps + 1, step):
            l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1,
                                       self._pen_axis_tick)
            l.setZValue(100)
            t = self.box_scene.addSimpleText(str(val), self._axis_font)
            t.setPos(val * scale_x - t.boundingRect().width() / 2, 8)
        if self.stretched:
            self.scale_x *= 100

    def label_group(self, stat, attr, mean_lab):
        def centered_text(val, pos):
            t = QGraphicsSimpleTextItem(
                "%.*f" % (attr.number_of_decimals + 1, val), labels)
            t.setFont(self._label_font)
            bbox = t.boundingRect()
            t.setPos(pos - bbox.width() / 2, 22)
            return t

        def line(x, down=1):
            QGraphicsLineItem(x, 12 * down, x, 20 * down, labels)

        def move_label(label, frm, to):
            label.setX(to)
            to += t_box.width() / 2
            path = QPainterPath()
            path.lineTo(0, 4)
            path.lineTo(to - frm, 4)
            path.lineTo(to - frm, 8)
            p = QGraphicsPathItem(path)
            p.setPos(frm, 12)
            labels.addToGroup(p)

        labels = QGraphicsItemGroup()

        labels.addToGroup(mean_lab)
        m = stat.mean * self.scale_x
        mean_lab.setPos(m, -22)
        line(m, -1)

        if stat.median is not None:
            msc = stat.median * self.scale_x
            med_t = centered_text(stat.median, msc)
            med_box_width2 = med_t.boundingRect().width() / 2
            line(msc)

        if stat.q25 is not None:
            x = stat.q25 * self.scale_x
            t = centered_text(stat.q25, x)
            t_box = t.boundingRect()
            med_left = msc - med_box_width2
            if x + t_box.width() / 2 >= med_left - 5:
                move_label(t, x, med_left - t_box.width() - 5)
            else:
                line(x)

        if stat.q75 is not None:
            x = stat.q75 * self.scale_x
            t = centered_text(stat.q75, x)
            t_box = t.boundingRect()
            med_right = msc + med_box_width2
            if x - t_box.width() / 2 <= med_right + 5:
                move_label(t, x, med_right + 5)
            else:
                line(x)

        return labels

    def box_group(self, stat, height=20):
        def line(x0, y0, x1, y1, *args):
            return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args)

        scale_x = self.scale_x
        box = []
        whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5)
        whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5)
        vert_line = line(stat.a_min, 0, stat.a_max, 0)
        mean_line = line(stat.mean, -height / 3, stat.mean, height / 3)
        for it in (whisker1, whisker2, mean_line):
            it.setPen(self._pen_paramet)
        vert_line.setPen(self._pen_dotted)
        var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0)
        var_line.setPen(self._pen_paramet)
        box.extend([whisker1, whisker2, vert_line, mean_line, var_line])
        if stat.q25 is not None and stat.q75 is not None:
            mbox = FilterGraphicsRectItem(stat.conditions, stat.q25 * scale_x,
                                          -height / 2,
                                          (stat.q75 - stat.q25) * scale_x,
                                          height)
            mbox.setBrush(self._box_brush)
            mbox.setPen(QPen(Qt.NoPen))
            mbox.setZValue(-200)
            box.append(mbox)

        if stat.median is not None:
            median_line = line(stat.median, -height / 2, stat.median,
                               height / 2)
            median_line.setPen(self._pen_median)
            median_line.setZValue(-150)
            box.append(median_line)

        return box

    def strudel(self, dist, group_val_index=None):
        attr = self.attribute
        ss = np.sum(dist)
        box = []
        if ss < 1e-6:
            cond = [FilterDiscrete(attr, None)]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
        cum = 0
        for i, v in enumerate(dist):
            if v < 1e-6:
                continue
            if self.stretched:
                v /= ss
            v *= self.scale_x
            cond = [FilterDiscrete(attr, [i])]
            if group_val_index is not None:
                cond.append(FilterDiscrete(self.group_var, [group_val_index]))
            rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
            rect.setBrush(QBrush(QColor(*attr.colors[i])))
            rect.setPen(QPen(Qt.NoPen))
            if self.stretched:
                tooltip = "{}: {:.2f}%".format(attr.values[i],
                                               100 * dist[i] / sum(dist))
            else:
                tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
            rect.setToolTip(tooltip)
            text = QGraphicsTextItem(attr.values[i])
            box.append(rect)
            box.append(text)
            cum += v
        return box

    def commit(self):
        self.conditions = [
            item.filter for item in self.box_scene.selectedItems()
            if item.filter
        ]
        selected, selection = None, []
        if self.conditions:
            selected = Values(self.conditions, conjunction=False)(self.dataset)
            selection = np.in1d(self.dataset.ids,
                                selected.ids,
                                assume_unique=True).nonzero()[0]
        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(
            create_annotated_table(self.dataset, selection))

    def show_posthoc(self):
        def line(y0, y1):
            it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen)
            it.setZValue(-100)
            self.posthoc_lines.append(it)

        while self.posthoc_lines:
            self.box_scene.removeItem(self.posthoc_lines.pop())

        if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2:
            return

        if self.compare == OWBoxPlot.CompareMedians:
            crit_line = "median"
        else:
            crit_line = "mean"

        xs = []

        height = 90 if self.show_annotations else 60

        y_up = -len(self.stats) * height + 10
        for pos, box_index in enumerate(self.order):
            stat = self.stats[box_index]
            x = getattr(stat, crit_line)
            if x is None:
                continue
            x *= self.scale_x
            xs.append(x * self.scale_x)
            by = y_up + pos * height
            line(by + 12, 3)
            line(by - 12, by - 25)

        used_to = []
        last_to = to = 0
        for frm, frm_x in enumerate(xs[:-1]):
            for to in range(frm + 1, len(xs)):
                if xs[to] - frm_x > 1.5:
                    to -= 1
                    break
            if to in (last_to, frm):
                continue
            for rowi, used in enumerate(used_to):
                if used < frm:
                    used_to[rowi] = to
                    break
            else:
                rowi = len(used_to)
                used_to.append(to)
            y = -6 - rowi * 6
            it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y,
                                        self._post_grp_pen)
            self.posthoc_lines.append(it)
            last_to = to

    def get_widget_name_extension(self):
        return self.attribute.name if self.attribute else None

    def send_report(self):
        self.report_plot()
        text = ""
        if self.attribute:
            text += "Box plot for attribute '{}' ".format(self.attribute.name)
        if self.group_var:
            text += "grouped by '{}'".format(self.group_var.name)
        if text:
            self.report_caption(text)

    class Label(QGraphicsSimpleTextItem):
        """Boxplot Label with settable maxWidth"""
        # Minimum width to display label text
        MIN_LABEL_WIDTH = 25

        # padding bellow the text
        PADDING = 3

        __max_width = None

        def maxWidth(self):
            return self.__max_width

        def setMaxWidth(self, max_width):
            self.__max_width = max_width

        def paint(self, painter, option, widget):
            """Overrides QGraphicsSimpleTextItem.paint

            If label text is too long, it is elided
            to fit into the allowed region
            """
            if self.__max_width is None:
                width = option.rect.width()
            else:
                width = self.__max_width

            if width < self.MIN_LABEL_WIDTH:
                # if space is too narrow, no label
                return

            fm = painter.fontMetrics()
            text = fm.elidedText(self.text(), Qt.ElideRight, width)
            painter.drawText(
                option.rect.x(),
                option.rect.y() + self.boundingRect().height() - self.PADDING,
                text)
Exemplo n.º 16
0
class OWRadviz(widget.OWWidget):
    name = "Radviz"
    description = "Radviz"

    icon = "icons/Radviz.svg"
    priority = 240

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    settings_version = 1
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})

    auto_commit = settings.Setting(True)
    graph = settings.SettingProvider(OWRadvizGraph)
    vizrank = settings.SettingProvider(RadvizVizRank)

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    ReplotRequest = QEvent.registerEventType()

    graph_name = "graph.plot_widget.plotItem"

    class Information(widget.OWWidget.Information):
        sql_sampled_data = widget.Msg("Data has been sampled")

    class Warning(widget.OWWidget.Warning):
        no_features = widget.Msg("At least 2 features have to be chosen")

    class Error(widget.OWWidget.Error):
        sparse_data = widget.Msg("Sparse data is not supported")
        no_features = widget.Msg(
            "At least 3 numeric or categorical variables are required"
        )
        no_instances = widget.Msg("At least 2 data instances are required")

    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None  # np.array
        self.__replot_requested = False
        self._new_plotdata()

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox)
        self.graph.hide_axes()

        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            self.controlArea, self, "Suggest features", self.vizrank_set_attrs
        )
        self.btn_vizrank.setSizePolicy(*SIZE_POLICY)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        pp_box = g.point_properties_box(self.controlArea)
        pp_box.setSizePolicy(*SIZE_POLICY)
        self.models = g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)
        g.add_widget(g.JitterSizeSlider, box)

        g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box)

        zoom_select = self.graph.box_zoom_select(self.controlArea)
        zoom_select.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(
            self.controlArea,
            self,
            "auto_commit",
            "Send Selection",
            auto_label="Send Automatically",
        )

        self.graph.zoom_actions(self)

        self._circle = QGraphicsEllipseItem()
        self._circle.setRect(QRectF(-1.0, -1.0, 2.0, 2.0))
        self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2))

    def resizeEvent(self, event):
        self._update_points_labels()

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [v for v in self.model_other if v not in attrs]

    def _new_plotdata(self):
        self.plotdata = namespace(
            valid_mask=None,
            embedding_coords=None,
            points=None,
            arcarrows=[],
            point_labels=[],
            rand=None,
            data=None,
        )

    def update_colors(self):
        self._vizrank_color_change()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def sizeHint(self):
        return QSize(800, 500)

    def clear(self):
        """
        Clear/reset the widget state
        """
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()

    def _clear_plot(self):
        self._new_plotdata()
        self.graph.plot_widget.clear()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(
                self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10
            )

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        attr_color = self.graph.attr_color
        is_enabled = (
            self.data is not None
            and not self.data.is_sparse()
            and (len(self.model_other) + len(self.model_selected)) > 3
            and len(self.data) > 1
        )
        self.btn_vizrank.setEnabled(
            is_enabled
            and attr_color is not None
            and not np.isnan(
                self.data.get_column_view(attr_color)[0].astype(float)
            ).all()
        )
        self.vizrank.initialize()

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset and check if data is valid.

        Args:
            data (Orange.data.table): data instances
        """

        def sql(data):
            self.Information.sql_sampled_data.clear()
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.Information.sql_sampled_data()
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {
                key: (source_ind, np.inf) for key, (source_ind, _) in state.items()
            }

            self.openContext(data.domain)
            selected_keys = [
                key for key, (sind, _) in self.variable_state.items() if sind == 0
            ]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)]
            )
            return selected, other

        def is_sparse(data):
            if data.is_sparse():
                self.Error.sparse_data()
                data = None
            return data

        def are_features(data):
            domain = data.domain
            vars = [
                var
                for var in chain(domain.class_vars, domain.metas, domain.attributes)
                if var.is_primitive()
            ]
            if len(vars) < 3:
                self.Error.no_features()
                data = None
            return data

        def are_instances(data):
            if len(data) < 2:
                self.Error.no_instances()
                data = None
            return data

        self.clear_messages()
        self.btn_vizrank.setEnabled(False)
        self.closeContext()
        self.clear()
        self.information()
        self.Error.clear()
        for f in [sql, is_sparse, are_features, are_instances]:
            if data is None:
                break
            data = f(data)

        if data is not None:
            self.data = data
            self.init_attr_values()
            domain = data.domain
            vars = [
                v for v in chain(domain.metas, domain.attributes) if v.is_primitive()
            ]
            self.model_selected[:] = vars[:5]
            self.model_other[:] = vars[5:] + list(domain.class_vars)
            self.model_selected[:], self.model_other[:] = settings(data)
            self._selection = np.zeros(len(data), dtype=np.uint8)
            self.invalidate_plot()
        else:
            self.data = None

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None:
            self._clear_plot()
            if self.subset_data is not None and self._subset_mask is None:
                dataids = self.data.ids.ravel()
                subsetids = np.unique(self.subset_data.ids)
                self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True)
            self.setup_plot(reset_view=True)
            self.cb_class_density.setEnabled(self.graph.can_draw_density())
        else:
            self.init_attr_values()
            self.graph.new_data(None)
        self._vizrank_color_change()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWRadviz.ReplotRequest:
            self.__replot_requested = False
            self._clear_plot()
            self.setup_plot(reset_view=True)
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected), list(self.model_other)]
        )
        super().closeContext()

    def prepare_radviz_data(self, variables):
        ec, points, valid_mask = radviz(self.data, variables, self.plotdata.points)
        self.plotdata.embedding_coords = ec
        self.plotdata.points = points
        self.plotdata.valid_mask = valid_mask

    def setup_plot(self, reset_view=True):
        if self.data is None:
            return
        self.graph.jitter_continuous = True
        self.__replot_requested = False

        variables = list(self.model_selected)
        if len(variables) < 2:
            self.Warning.no_features()
            self.graph.new_data(None)
            return

        self.Warning.clear()
        self.prepare_radviz_data(variables)

        if self.plotdata.embedding_coords is None:
            return

        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(
            attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas
        )
        mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[mask] = self.plotdata.embedding_coords
        data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = (
            data[self._subset_mask & mask]
            if self._subset_mask is not None and len(self._subset_mask)
            else None
        )
        self.plotdata.data = data
        self.graph.new_data(data[mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[self.plotdata.valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1]
        )
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def randomize_indices(self):
        ec = self.plotdata.embedding_coords
        self.plotdata.rand = (
            np.random.choice(len(ec), MAX_POINTS, replace=False)
            if len(ec) > MAX_POINTS
            else None
        )

    def manual_move(self):
        self.__replot_requested = False

        if self.plotdata.rand is not None:
            rand = self.plotdata.rand
            valid_mask = self.plotdata.valid_mask
            data = self.data[valid_mask]
            selection = self._selection[valid_mask]
            selection = selection[rand]
            ec, _, valid_mask = radviz(
                data, list(self.model_selected), self.plotdata.points
            )
            assert sum(valid_mask) == len(data)
            data = data[rand]
            ec = ec[rand]
            data_x = data.X
            data_y = data.Y
            data_metas = data.metas
        else:
            self.prepare_radviz_data(list(self.model_selected))
            ec = self.plotdata.embedding_coords
            valid_mask = self.plotdata.valid_mask
            data_x = self.data.X[valid_mask]
            data_y = self.data.Y[valid_mask]
            data_metas = self.data.metas[valid_mask]
            selection = self._selection[valid_mask]

        attributes = (self.variable_x, self.variable_y) + self.data.domain.attributes
        domain = Domain(
            attributes=attributes,
            class_vars=self.data.domain.class_vars,
            metas=self.data.domain.metas,
        )
        data = Table.from_numpy(
            domain, X=np.hstack((ec, data_x)), Y=data_y, metas=data_metas
        )
        self.graph.new_data(data, None)
        self.graph.selection = selection
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=True)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1]
        )
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def _update_points_labels(self):
        if self.plotdata.points is None:
            return
        for point_label in self.plotdata.point_labels:
            self.graph.plot_widget.removeItem(point_label)
        self.plotdata.point_labels = []
        sx, sy = self.graph.view_box.viewPixelSize()

        for row in self.plotdata.points:
            ti = TextItem()
            metrics = QFontMetrics(ti.textItem.font())
            text_width = ((RANGE.width()) / 2.0 - np.abs(row[0])) / sx
            name = row[2].name
            ti.setText(name)
            ti.setTextWidth(text_width)
            ti.setColor(QColor(0, 0, 0))
            br = ti.boundingRect()
            width = (
                metrics.width(name) if metrics.width(name) < br.width() else br.width()
            )
            width = sx * (width + 5)
            height = sy * br.height()
            ti.setPos(row[0] - (row[0] < 0) * width, row[1] + (row[1] > 0) * height)
            self.plotdata.point_labels.append(ti)
            self.graph.plot_widget.addItem(ti)

    def _update_jitter(self):
        self.invalidate_plot()

    def reset_graph_data(self, *_):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph()

    def _update_graph(self, reset_view=True, **_):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)

    def update_density(self):
        self._update_graph(reset_view=True)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection[self.plotdata.valid_mask] = self.graph.selection
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        selected = annotated = components = None
        graph = self.graph
        if self.plotdata.data is not None:
            name = self.data.name
            data = self.plotdata.data
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = (
                graph.selection if graph.selection is not None else [False * len(mask)]
            )
            selection = (
                np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            )
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes
            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

            comp_domain = Domain(
                self.plotdata.points[:, 2], metas=[StringVariable(name="component")]
            )

            metas = np.array([["RX"], ["RY"], ["angle"]])
            angle = np.arctan2(
                np.array(self.plotdata.points[:, 1].T, dtype=float),
                np.array(self.plotdata.points[:, 0].T, dtype=float),
            )
            components = Table.from_numpy(
                comp_domain,
                X=np.row_stack((self.plotdata.points[:, :2].T, angle)),
                metas=metas,
            )
            components.name = name + ": components"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        caption = report.render_items_vert(
            (
                ("Color", name(self.graph.attr_color)),
                ("Label", name(self.graph.attr_label)),
                ("Shape", name(self.graph.attr_shape)),
                ("Size", name(self.graph.attr_size)),
                (
                    "Jittering",
                    self.graph.jitter_size != 0
                    and "{} %".format(self.graph.jitter_size),
                ),
            )
        )
        self.report_plot()
        if caption:
            self.report_caption(caption)
Exemplo n.º 17
0
class OWRadviz(widget.OWWidget):
    name = "Radviz"
    description = "Radviz"

    icon = "icons/Radviz.svg"
    priority = 240

    class Inputs:
        data = Input("Data", Table, default=True)
        data_subset = Input("Data Subset", Table)

    class Outputs:
        selected_data = Output("Selected Data", Table, default=True)
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)
        components = Output("Components", Table)

    settings_version = 1
    settingsHandler = settings.DomainContextHandler()

    variable_state = settings.ContextSetting({})

    auto_commit = settings.Setting(True)
    graph = settings.SettingProvider(OWRadvizGraph)
    vizrank = settings.SettingProvider(RadvizVizRank)

    jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0]

    ReplotRequest = QEvent.registerEventType()

    graph_name = "graph.plot_widget.plotItem"

    class Information(widget.OWWidget.Information):
        sql_sampled_data = widget.Msg("Data has been sampled")

    class Warning(widget.OWWidget.Warning):
        no_features = widget.Msg("At least 2 features have to be chosen")

    class Error(widget.OWWidget.Error):
        sparse_data = widget.Msg("Sparse data is not supported")
        no_features = widget.Msg("At least 3 numeric or categorical variables are required")
        no_instances = widget.Msg("At least 2 data instances are required")

    def __init__(self):
        super().__init__()

        self.data = None
        self.subset_data = None
        self._subset_mask = None
        self._selection = None  # np.array
        self.__replot_requested = False
        self._new_plotdata()

        self.variable_x = ContinuousVariable("radviz-x")
        self.variable_y = ContinuousVariable("radviz-y")

        box = gui.vBox(self.mainArea, True, margin=0)
        self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox)
        self.graph.hide_axes()

        box.layout().addWidget(self.graph.plot_widget)
        plot = self.graph.plot_widget

        SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum)

        self.variables_selection = VariablesSelection()
        self.model_selected = VariableListModel(enable_dnd=True)
        self.model_other = VariableListModel(enable_dnd=True)
        self.variables_selection(self, self.model_selected, self.model_other)

        self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank(
            self.controlArea, self, "Suggest features", self.vizrank_set_attrs)
        self.btn_vizrank.setSizePolicy(*SIZE_POLICY)
        self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank)

        self.viewbox = plot.getViewBox()
        self.replot = None

        g = self.graph.gui
        pp_box = g.point_properties_box(self.controlArea)
        pp_box.setSizePolicy(*SIZE_POLICY)
        self.models = g.points_models

        box = gui.vBox(self.controlArea, "Plot Properties")
        box.setSizePolicy(*SIZE_POLICY)
        g.add_widget(g.JitterSizeSlider, box)

        g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box)

        zoom_select = self.graph.box_zoom_select(self.controlArea)
        zoom_select.setSizePolicy(*SIZE_POLICY)

        self.icons = gui.attributeIconDict

        p = self.graph.plot_widget.palette()
        self.graph.set_palette(p)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection",
                        auto_label="Send Automatically")

        self.graph.zoom_actions(self)

        self._circle = QGraphicsEllipseItem()
        self._circle.setRect(QRectF(-1., -1., 2., 2.))
        self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2))

    def resizeEvent(self, event):
        self._update_points_labels()

    def keyPressEvent(self, event):
        super().keyPressEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def keyReleaseEvent(self, event):
        super().keyReleaseEvent(event)
        self.graph.update_tooltip(event.modifiers())

    def vizrank_set_attrs(self, attrs):
        if not attrs:
            return
        self.variables_selection.display_none()
        self.model_selected[:] = attrs[:]
        self.model_other[:] = [v for v in self.model_other if v not in attrs]

    def _new_plotdata(self):
        self.plotdata = namespace(
            valid_mask=None,
            embedding_coords=None,
            points=None,
            arcarrows=[],
            point_labels=[],
            rand=None,
            data=None,
        )

    def update_colors(self):
        self._vizrank_color_change()
        self.cb_class_density.setEnabled(self.graph.can_draw_density())

    def sizeHint(self):
        return QSize(800, 500)

    def clear(self):
        """
        Clear/reset the widget state
        """
        self.data = None
        self.model_selected.clear()
        self.model_other.clear()
        self._clear_plot()

    def _clear_plot(self):
        self._new_plotdata()
        self.graph.plot_widget.clear()

    def invalidate_plot(self):
        """
        Schedule a delayed replot.
        """
        if not self.__replot_requested:
            self.__replot_requested = True
            QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10)

    def init_attr_values(self):
        self.graph.set_domain(self.data)

    def _vizrank_color_change(self):
        attr_color = self.graph.attr_color
        is_enabled = self.data is not None and not self.data.is_sparse() and \
                     (len(self.model_other) + len(self.model_selected)) > 3 and len(self.data) > 1
        self.btn_vizrank.setEnabled(
            is_enabled and attr_color is not None
            and not np.isnan(self.data.get_column_view(attr_color)[0].astype(float)).all())
        self.vizrank.initialize()

    @Inputs.data
    def set_data(self, data):
        """
        Set the input dataset and check if data is valid.

        Args:
            data (Orange.data.table): data instances
        """
        def sql(data):
            self.Information.sql_sampled_data.clear()
            if isinstance(data, SqlTable):
                if data.approx_len() < 4000:
                    data = Table(data)
                else:
                    self.Information.sql_sampled_data()
                    data_sample = data.sample_time(1, no_cache=True)
                    data_sample.download_data(2000, partial=True)
                    data = Table(data_sample)
            return data

        def settings(data):
            # get the default encoded state, replacing the position with Inf
            state = VariablesSelection.encode_var_state(
                [list(self.model_selected), list(self.model_other)]
            )
            state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()}

            self.openContext(data.domain)
            selected_keys = [key
                             for key, (sind, _) in self.variable_state.items()
                             if sind == 0]

            if set(selected_keys).issubset(set(state.keys())):
                pass

            # update the defaults state (the encoded state must contain
            # all variables in the input domain)
            state.update(self.variable_state)
            # ... and restore it with saved positions taking precedence over
            # the defaults
            selected, other = VariablesSelection.decode_var_state(
                state, [list(self.model_selected), list(self.model_other)])
            return selected, other

        def is_sparse(data):
            if data.is_sparse():
                self.Error.sparse_data()
                data = None
            return data

        def are_features(data):
            domain = data.domain
            vars = [var for var in chain(domain.class_vars, domain.metas, domain.attributes)
                    if var.is_primitive()]
            if len(vars) < 3:
                self.Error.no_features()
                data = None
            return data

        def are_instances(data):
            if len(data) < 2:
                self.Error.no_instances()
                data = None
            return data

        self.clear_messages()
        self.btn_vizrank.setEnabled(False)
        self.closeContext()
        self.clear()
        self.information()
        self.Error.clear()
        for f in [sql, is_sparse, are_features, are_instances]:
            if data is None:
                break
            data = f(data)

        if data is not None:
            self.data = data
            self.init_attr_values()
            domain = data.domain
            vars = [v for v in chain(domain.metas, domain.attributes)
                    if v.is_primitive()]
            self.model_selected[:] = vars[:5]
            self.model_other[:] = vars[5:] + list(domain.class_vars)
            self.model_selected[:], self.model_other[:] = settings(data)
            self._selection = np.zeros(len(data), dtype=np.uint8)
            self.invalidate_plot()
        else:
            self.data = None

    @Inputs.data_subset
    def set_subset_data(self, subset):
        """
        Set the supplementary input subset dataset.

        Args:
            subset (Orange.data.table): subset of data instances
        """
        self.subset_data = subset
        self._subset_mask = None
        self.controls.graph.alpha_value.setEnabled(subset is None)

    def handleNewSignals(self):
        if self.data is not None:
            self._clear_plot()
            if self.subset_data is not None and self._subset_mask is None:
                dataids = self.data.ids.ravel()
                subsetids = np.unique(self.subset_data.ids)
                self._subset_mask = np.in1d(
                    dataids, subsetids, assume_unique=True)
            self.setup_plot(reset_view=True)
            self.cb_class_density.setEnabled(self.graph.can_draw_density())
        else:
            self.init_attr_values()
            self.graph.new_data(None)
        self._vizrank_color_change()
        self.commit()

    def customEvent(self, event):
        if event.type() == OWRadviz.ReplotRequest:
            self.__replot_requested = False
            self._clear_plot()
            self.setup_plot(reset_view=True)
        else:
            super().customEvent(event)

    def closeContext(self):
        self.variable_state = VariablesSelection.encode_var_state(
            [list(self.model_selected), list(self.model_other)]
        )
        super().closeContext()

    def prepare_radviz_data(self, variables):
        ec, points, valid_mask = radviz(self.data, variables, self.plotdata.points)
        self.plotdata.embedding_coords = ec
        self.plotdata.points = points
        self.plotdata.valid_mask = valid_mask

    def setup_plot(self, reset_view=True):
        if self.data is None:
            return
        self.graph.jitter_continuous = True
        self.__replot_requested = False

        variables = list(self.model_selected)
        if len(variables) < 2:
            self.Warning.no_features()
            self.graph.new_data(None)
            return

        self.Warning.clear()
        self.prepare_radviz_data(variables)

        if self.plotdata.embedding_coords is None:
            return

        domain = self.data.domain
        new_metas = domain.metas + (self.variable_x, self.variable_y)
        domain = Domain(attributes=domain.attributes,
                        class_vars=domain.class_vars,
                        metas=new_metas)
        mask = self.plotdata.valid_mask
        array = np.zeros((len(self.data), 2), dtype=np.float)
        array[mask] = self.plotdata.embedding_coords
        data = self.data.transform(domain)
        data[:, self.variable_x] = array[:, 0].reshape(-1, 1)
        data[:, self.variable_y] = array[:, 1].reshape(-1, 1)
        subset_data = data[self._subset_mask & mask]\
            if self._subset_mask is not None and len(self._subset_mask) else None
        self.plotdata.data = data
        self.graph.new_data(data[mask], subset_data)
        if self._selection is not None:
            self.graph.selection = self._selection[self.plotdata.valid_mask]
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0],
            y=self.plotdata.points[:, 1]
        )
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def randomize_indices(self):
        ec = self.plotdata.embedding_coords
        self.plotdata.rand = np.random.choice(len(ec), MAX_POINTS, replace=False) \
            if len(ec) > MAX_POINTS else None

    def manual_move(self):
        self.__replot_requested = False

        if self.plotdata.rand is not None:
            rand = self.plotdata.rand
            valid_mask = self.plotdata.valid_mask
            data = self.data[valid_mask]
            selection = self._selection[valid_mask]
            selection = selection[rand]
            ec, _, valid_mask = radviz(data, list(self.model_selected), self.plotdata.points)
            assert sum(valid_mask) == len(data)
            data = data[rand]
            ec = ec[rand]
            data_x = data.X
            data_y = data.Y
            data_metas = data.metas
        else:
            self.prepare_radviz_data(list(self.model_selected))
            ec = self.plotdata.embedding_coords
            valid_mask = self.plotdata.valid_mask
            data_x = self.data.X[valid_mask]
            data_y = self.data.Y[valid_mask]
            data_metas = self.data.metas[valid_mask]
            selection = self._selection[valid_mask]

        attributes = (self.variable_x, self.variable_y) + self.data.domain.attributes
        domain = Domain(attributes=attributes,
                        class_vars=self.data.domain.class_vars,
                        metas=self.data.domain.metas)
        data = Table.from_numpy(domain, X=np.hstack((ec, data_x)), Y=data_y, metas=data_metas)
        self.graph.new_data(data, None)
        self.graph.selection = selection
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=True)
        self.graph.plot_widget.addItem(self._circle)
        self.graph.scatterplot_points = ScatterPlotItem(
            x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1])
        self._update_points_labels()
        self.graph.plot_widget.addItem(self.graph.scatterplot_points)

    def _update_points_labels(self):
        if self.plotdata.points is None:
            return
        for point_label in self.plotdata.point_labels:
            self.graph.plot_widget.removeItem(point_label)
        self.plotdata.point_labels = []
        sx, sy = self.graph.view_box.viewPixelSize()

        for row in self.plotdata.points:
            ti = TextItem()
            metrics = QFontMetrics(ti.textItem.font())
            text_width = ((RANGE.width())/2. - np.abs(row[0])) / sx
            name = row[2].name
            ti.setText(name)
            ti.setTextWidth(text_width)
            ti.setColor(QColor(0, 0, 0))
            br = ti.boundingRect()
            width = metrics.width(name) if metrics.width(name) < br.width() else br.width()
            width = sx * (width + 5)
            height = sy * br.height()
            ti.setPos(row[0] - (row[0] < 0) * width, row[1] + (row[1] > 0) * height)
            self.plotdata.point_labels.append(ti)
            self.graph.plot_widget.addItem(ti)

    def _update_jitter(self):
        self.invalidate_plot()

    def reset_graph_data(self, *_):
        if self.data is not None:
            self.graph.rescale_data()
            self._update_graph()

    def _update_graph(self, reset_view=True, **_):
        self.graph.zoomStack = []
        if self.graph.data is None:
            return
        self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view)

    def update_density(self):
        self._update_graph(reset_view=True)

    def selection_changed(self):
        if self.graph.selection is not None:
            self._selection[self.plotdata.valid_mask] = self.graph.selection
        self.commit()

    def prepare_data(self):
        pass

    def commit(self):
        selected = annotated = components = None
        graph = self.graph
        if self.plotdata.data is not None:
            name = self.data.name
            data = self.plotdata.data
            mask = self.plotdata.valid_mask.astype(int)
            mask[mask == 1] = graph.selection if graph.selection is not None \
                else [False * len(mask)]
            selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask)
            if len(selection):
                selected = data[selection]
                selected.name = name + ": selected"
                selected.attributes = self.data.attributes
            if graph.selection is not None and np.max(graph.selection) > 1:
                annotated = create_groups_table(data, mask)
            else:
                annotated = create_annotated_table(data, selection)
            annotated.attributes = self.data.attributes
            annotated.name = name + ": annotated"

            comp_domain = Domain(
                self.plotdata.points[:, 2],
                metas=[StringVariable(name='component')])

            metas = np.array([["RX"], ["RY"], ["angle"]])
            angle = np.arctan2(np.array(self.plotdata.points[:, 1].T, dtype=float),
                               np.array(self.plotdata.points[:, 0].T, dtype=float))
            components = Table.from_numpy(
                comp_domain,
                X=np.row_stack((self.plotdata.points[:, :2].T, angle)),
                metas=metas)
            components.name = name + ": components"

        self.Outputs.selected_data.send(selected)
        self.Outputs.annotated_data.send(annotated)
        self.Outputs.components.send(components)

    def send_report(self):
        if self.data is None:
            return

        def name(var):
            return var and var.name

        caption = report.render_items_vert((
            ("Color", name(self.graph.attr_color)),
            ("Label", name(self.graph.attr_label)),
            ("Shape", name(self.graph.attr_shape)),
            ("Size", name(self.graph.attr_size)),
            ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size))))
        self.report_plot()
        if caption:
            self.report_caption(caption)
Exemplo n.º 18
0
class OWUnique(widget.OWWidget):
    name = 'Unique'
    icon = 'icons/Unique.svg'
    description = 'Filter instances unique by specified key attribute(s).'

    class Inputs:
        data = widget.Input("Data", Table)

    class Outputs:
        data = widget.Output("Data", Table)

    want_main_area = False

    TIEBREAKERS = {
        'Last instance':
        itemgetter(-1),
        'First instance':
        itemgetter(0),
        'Middle instance':
        lambda seq: seq[len(seq) // 2],
        'Random instance':
        np.random.choice,
        'Discard instances with non-unique keys':
        lambda seq: seq[0] if len(seq) == 1 else None
    }

    settingsHandler = settings.DomainContextHandler()
    grouping_attrs = settings.ContextSetting([])
    tiebreaker = settings.Setting(next(iter(TIEBREAKERS)))
    autocommit = settings.Setting(True)

    def __init__(self):
        # Commit is thunked because autocommit redefines it
        # pylint: disable=unnecessary-lambda
        super().__init__()
        self.data = None

        list_args = dict(alternatingRowColors=True,
                         dragEnabled=True,
                         dragDropMode=QListView.DragDrop,
                         acceptDrops=True,
                         defaultDropAction=Qt.MoveAction,
                         showDropIndicator=True,
                         selectionMode=QListView.ExtendedSelection,
                         selectionBehavior=QListView.SelectRows)

        hbox = gui.hBox(self.controlArea)

        listview_avail = QListView(self, **list_args)
        self.model_avail = VariableListModel(parent=self, enable_dnd=True)
        listview_avail.setModel(self.model_avail)
        gui.vBox(hbox,
                 'Available Variables').layout().addWidget(listview_avail)

        listview_key = QListView(self, **list_args)
        self.model_key = VariableListModel(parent=self, enable_dnd=True)
        listview_key.setModel(self.model_key)
        self.model_key.rowsInserted.connect(lambda: self.commit())
        self.model_key.rowsRemoved.connect(lambda: self.commit())
        gui.vBox(hbox, 'Group by Variables').layout().addWidget(listview_key)

        gui.comboBox(self.controlArea,
                     self,
                     'tiebreaker',
                     box="Item Selection",
                     label='Instance to select in each group:',
                     orientation=Qt.Horizontal,
                     items=tuple(self.TIEBREAKERS),
                     callback=lambda: self.commit(),
                     sendSelectedValue=True,
                     sizeAdjustPolicy=QComboBox.AdjustToContents,
                     minimumContentsLength=20,
                     sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Preferred))
        gui.auto_commit(self.controlArea,
                        self,
                        'autocommit',
                        'Commit',
                        orientation=Qt.Horizontal)

    def storeSpecificSettings(self):
        self.grouping_attrs = list(self.model_key)

    @Inputs.data
    def set_data(self, data):
        self.closeContext()
        self.data = data
        if data:
            self.openContext(data.domain)
            self.model_key[:] = self.grouping_attrs
            self.model_avail[:] = \
                [var for var in data.domain.variables + data.domain.metas
                 if var not in self.model_key]
        else:
            self.grouping_attrs = []
            self.model_key.clear()
            self.model_avail.clear()
        self.unconditional_commit()

    def commit(self):
        if self.data is None:
            self.Outputs.data.send(None)
        else:
            self.Outputs.data.send(self._compute_unique_data())

    def _compute_unique_data(self):
        uniques = {}
        keys = zip(
            *[self.data.get_column_view(attr)[0] for attr in self.model_key])
        for i, key in enumerate(keys):
            uniques.setdefault(key, []).append(i)

        choose = self.TIEBREAKERS[self.tiebreaker]
        selection = sorted(x
                           for x in (choose(inds) for inds in uniques.values())
                           if x is not None)
        if selection:
            return self.data[selection]
        else:
            return None