class OWRadviz(widget.OWWidget): name = "Radviz" description = "Radviz" icon = "icons/Radviz.svg" priority = 240 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) components = Output("Components", Table) settings_version = 1 settingsHandler = settings.DomainContextHandler() variable_state = settings.ContextSetting({}) auto_commit = settings.Setting(True) graph = settings.SettingProvider(OWRadvizGraph) vizrank = settings.SettingProvider(RadvizVizRank) jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0] ReplotRequest = QEvent.registerEventType() graph_name = "graph.plot_widget.plotItem" class Information(widget.OWWidget.Information): sql_sampled_data = widget.Msg("Data has been sampled") class Warning(widget.OWWidget.Warning): no_features = widget.Msg("At least 2 features have to be chosen") class Error(widget.OWWidget.Error): sparse_data = widget.Msg("Sparse data is not supported") no_features = widget.Msg("At least 3 numeric or categorical variables are required") no_instances = widget.Msg("At least 2 data instances are required") def __init__(self): super().__init__() self.data = None self.subset_data = None self._subset_mask = None self._selection = None # np.array self.__replot_requested = False self._new_plotdata() self.variable_x = ContinuousVariable("radviz-x") self.variable_y = ContinuousVariable("radviz-y") box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox) self.graph.hide_axes() box.layout().addWidget(self.graph.plot_widget) plot = self.graph.plot_widget SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum) self.variables_selection = VariablesSelection() self.model_selected = VariableListModel(enable_dnd=True) self.model_other = VariableListModel(enable_dnd=True) self.variables_selection(self, self.model_selected, self.model_other) self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank( self.controlArea, self, "Suggest features", self.vizrank_set_attrs) self.btn_vizrank.setSizePolicy(*SIZE_POLICY) self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank) self.viewbox = plot.getViewBox() self.replot = None g = self.graph.gui pp_box = g.point_properties_box(self.controlArea) pp_box.setSizePolicy(*SIZE_POLICY) self.models = g.points_models box = gui.vBox(self.controlArea, "Plot Properties") box.setSizePolicy(*SIZE_POLICY) g.add_widget(g.JitterSizeSlider, box) g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box) zoom_select = self.graph.box_zoom_select(self.controlArea) zoom_select.setSizePolicy(*SIZE_POLICY) self.icons = gui.attributeIconDict p = self.graph.plot_widget.palette() self.graph.set_palette(p) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", auto_label="Send Automatically") self.graph.zoom_actions(self) self._circle = QGraphicsEllipseItem() self._circle.setRect(QRectF(-1., -1., 2., 2.)) self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2)) def resizeEvent(self, event): self._update_points_labels() def keyPressEvent(self, event): super().keyPressEvent(event) self.graph.update_tooltip(event.modifiers()) def keyReleaseEvent(self, event): super().keyReleaseEvent(event) self.graph.update_tooltip(event.modifiers()) def vizrank_set_attrs(self, attrs): if not attrs: return self.variables_selection.display_none() self.model_selected[:] = attrs[:] self.model_other[:] = [v for v in self.model_other if v not in attrs] def _new_plotdata(self): self.plotdata = namespace( valid_mask=None, embedding_coords=None, points=None, arcarrows=[], point_labels=[], rand=None, data=None, ) def update_colors(self): self._vizrank_color_change() self.cb_class_density.setEnabled(self.graph.can_draw_density()) def sizeHint(self): return QSize(800, 500) def clear(self): """ Clear/reset the widget state """ self.data = None self.model_selected.clear() self.model_other.clear() self._clear_plot() def _clear_plot(self): self._new_plotdata() self.graph.plot_widget.clear() def invalidate_plot(self): """ Schedule a delayed replot. """ if not self.__replot_requested: self.__replot_requested = True QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10) def init_attr_values(self): self.graph.set_domain(self.data) def _vizrank_color_change(self): attr_color = self.graph.attr_color is_enabled = self.data is not None and not self.data.is_sparse() and \ (len(self.model_other) + len(self.model_selected)) > 3 and len(self.data) > 1 self.btn_vizrank.setEnabled( is_enabled and attr_color is not None and not np.isnan(self.data.get_column_view(attr_color)[0].astype(float)).all()) self.vizrank.initialize() @Inputs.data def set_data(self, data): """ Set the input dataset and check if data is valid. Args: data (Orange.data.table): data instances """ def sql(data): self.Information.sql_sampled_data.clear() if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.Information.sql_sampled_data() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) return data def settings(data): # get the default encoded state, replacing the position with Inf state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)] ) state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()} self.openContext(data.domain) selected_keys = [key for key, (sind, _) in self.variable_state.items() if sind == 0] if set(selected_keys).issubset(set(state.keys())): pass # update the defaults state (the encoded state must contain # all variables in the input domain) state.update(self.variable_state) # ... and restore it with saved positions taking precedence over # the defaults selected, other = VariablesSelection.decode_var_state( state, [list(self.model_selected), list(self.model_other)]) return selected, other def is_sparse(data): if data.is_sparse(): self.Error.sparse_data() data = None return data def are_features(data): domain = data.domain vars = [var for var in chain(domain.class_vars, domain.metas, domain.attributes) if var.is_primitive()] if len(vars) < 3: self.Error.no_features() data = None return data def are_instances(data): if len(data) < 2: self.Error.no_instances() data = None return data self.clear_messages() self.btn_vizrank.setEnabled(False) self.closeContext() self.clear() self.information() self.Error.clear() for f in [sql, is_sparse, are_features, are_instances]: if data is None: break data = f(data) if data is not None: self.data = data self.init_attr_values() domain = data.domain vars = [v for v in chain(domain.metas, domain.attributes) if v.is_primitive()] self.model_selected[:] = vars[:5] self.model_other[:] = vars[5:] + list(domain.class_vars) self.model_selected[:], self.model_other[:] = settings(data) self._selection = np.zeros(len(data), dtype=np.uint8) self.invalidate_plot() else: self.data = None @Inputs.data_subset def set_subset_data(self, subset): """ Set the supplementary input subset dataset. Args: subset (Orange.data.table): subset of data instances """ self.subset_data = subset self._subset_mask = None self.controls.graph.alpha_value.setEnabled(subset is None) def handleNewSignals(self): if self.data is not None: self._clear_plot() if self.subset_data is not None and self._subset_mask is None: dataids = self.data.ids.ravel() subsetids = np.unique(self.subset_data.ids) self._subset_mask = np.in1d( dataids, subsetids, assume_unique=True) self.setup_plot(reset_view=True) self.cb_class_density.setEnabled(self.graph.can_draw_density()) else: self.init_attr_values() self.graph.new_data(None) self._vizrank_color_change() self.commit() def customEvent(self, event): if event.type() == OWRadviz.ReplotRequest: self.__replot_requested = False self._clear_plot() self.setup_plot(reset_view=True) else: super().customEvent(event) def closeContext(self): self.variable_state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)] ) super().closeContext() def prepare_radviz_data(self, variables): ec, points, valid_mask = radviz(self.data, variables, self.plotdata.points) self.plotdata.embedding_coords = ec self.plotdata.points = points self.plotdata.valid_mask = valid_mask def setup_plot(self, reset_view=True): if self.data is None: return self.graph.jitter_continuous = True self.__replot_requested = False variables = list(self.model_selected) if len(variables) < 2: self.Warning.no_features() self.graph.new_data(None) return self.Warning.clear() self.prepare_radviz_data(variables) if self.plotdata.embedding_coords is None: return domain = self.data.domain new_metas = domain.metas + (self.variable_x, self.variable_y) domain = Domain(attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas) mask = self.plotdata.valid_mask array = np.zeros((len(self.data), 2), dtype=np.float) array[mask] = self.plotdata.embedding_coords data = self.data.transform(domain) data[:, self.variable_x] = array[:, 0].reshape(-1, 1) data[:, self.variable_y] = array[:, 1].reshape(-1, 1) subset_data = data[self._subset_mask & mask]\ if self._subset_mask is not None and len(self._subset_mask) else None self.plotdata.data = data self.graph.new_data(data[mask], subset_data) if self._selection is not None: self.graph.selection = self._selection[self.plotdata.valid_mask] self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view) self.graph.plot_widget.addItem(self._circle) self.graph.scatterplot_points = ScatterPlotItem( x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1] ) self._update_points_labels() self.graph.plot_widget.addItem(self.graph.scatterplot_points) def randomize_indices(self): ec = self.plotdata.embedding_coords self.plotdata.rand = np.random.choice(len(ec), MAX_POINTS, replace=False) \ if len(ec) > MAX_POINTS else None def manual_move(self): self.__replot_requested = False if self.plotdata.rand is not None: rand = self.plotdata.rand valid_mask = self.plotdata.valid_mask data = self.data[valid_mask] selection = self._selection[valid_mask] selection = selection[rand] ec, _, valid_mask = radviz(data, list(self.model_selected), self.plotdata.points) assert sum(valid_mask) == len(data) data = data[rand] ec = ec[rand] data_x = data.X data_y = data.Y data_metas = data.metas else: self.prepare_radviz_data(list(self.model_selected)) ec = self.plotdata.embedding_coords valid_mask = self.plotdata.valid_mask data_x = self.data.X[valid_mask] data_y = self.data.Y[valid_mask] data_metas = self.data.metas[valid_mask] selection = self._selection[valid_mask] attributes = (self.variable_x, self.variable_y) + self.data.domain.attributes domain = Domain(attributes=attributes, class_vars=self.data.domain.class_vars, metas=self.data.domain.metas) data = Table.from_numpy(domain, X=np.hstack((ec, data_x)), Y=data_y, metas=data_metas) self.graph.new_data(data, None) self.graph.selection = selection self.graph.update_data(self.variable_x, self.variable_y, reset_view=True) self.graph.plot_widget.addItem(self._circle) self.graph.scatterplot_points = ScatterPlotItem( x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1]) self._update_points_labels() self.graph.plot_widget.addItem(self.graph.scatterplot_points) def _update_points_labels(self): if self.plotdata.points is None: return for point_label in self.plotdata.point_labels: self.graph.plot_widget.removeItem(point_label) self.plotdata.point_labels = [] sx, sy = self.graph.view_box.viewPixelSize() for row in self.plotdata.points: ti = TextItem() metrics = QFontMetrics(ti.textItem.font()) text_width = ((RANGE.width())/2. - np.abs(row[0])) / sx name = row[2].name ti.setText(name) ti.setTextWidth(text_width) ti.setColor(QColor(0, 0, 0)) br = ti.boundingRect() width = metrics.width(name) if metrics.width(name) < br.width() else br.width() width = sx * (width + 5) height = sy * br.height() ti.setPos(row[0] - (row[0] < 0) * width, row[1] + (row[1] > 0) * height) self.plotdata.point_labels.append(ti) self.graph.plot_widget.addItem(ti) def _update_jitter(self): self.invalidate_plot() def reset_graph_data(self, *_): if self.data is not None: self.graph.rescale_data() self._update_graph() def _update_graph(self, reset_view=True, **_): self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view) def update_density(self): self._update_graph(reset_view=True) def selection_changed(self): if self.graph.selection is not None: self._selection[self.plotdata.valid_mask] = self.graph.selection self.commit() def prepare_data(self): pass def commit(self): selected = annotated = components = None graph = self.graph if self.plotdata.data is not None: name = self.data.name data = self.plotdata.data mask = self.plotdata.valid_mask.astype(int) mask[mask == 1] = graph.selection if graph.selection is not None \ else [False * len(mask)] selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask) if len(selection): selected = data[selection] selected.name = name + ": selected" selected.attributes = self.data.attributes if graph.selection is not None and np.max(graph.selection) > 1: annotated = create_groups_table(data, mask) else: annotated = create_annotated_table(data, selection) annotated.attributes = self.data.attributes annotated.name = name + ": annotated" comp_domain = Domain( self.plotdata.points[:, 2], metas=[StringVariable(name='component')]) metas = np.array([["RX"], ["RY"], ["angle"]]) angle = np.arctan2(np.array(self.plotdata.points[:, 1].T, dtype=float), np.array(self.plotdata.points[:, 0].T, dtype=float)) components = Table.from_numpy( comp_domain, X=np.row_stack((self.plotdata.points[:, :2].T, angle)), metas=metas) components.name = name + ": components" self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) self.Outputs.components.send(components) def send_report(self): if self.data is None: return def name(var): return var and var.name caption = report.render_items_vert(( ("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) self.report_plot() if caption: self.report_caption(caption)
class OWLinearProjection(widget.OWWidget): name = "Linear Projection" description = "A multi-axis projection of data onto " \ "a two-dimensional plane." icon = "icons/LinearProjection.svg" priority = 240 keywords = [] selection_indices = settings.Setting(None, schema_only=True) class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) projection = Input("Projection", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) components = Output("Components", Table) Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2, Projection=3), type=int, qualname="OWLinearProjection.Placement") Component_name = { Placement.Circular: "C", Placement.LDA: "LD", Placement.PCA: "PC" } Variable_name = { Placement.Circular: "circular", Placement.LDA: "lda", Placement.PCA: "pca", Placement.Projection: "projection" } jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0] settings_version = 3 settingsHandler = settings.DomainContextHandler() variable_state = settings.ContextSetting({}) placement = settings.Setting(Placement.Circular) radius = settings.Setting(0) auto_commit = settings.Setting(True) resolution = 256 graph = settings.SettingProvider(OWLinProjGraph) ReplotRequest = QEvent.registerEventType() vizrank = settings.SettingProvider(LinearProjectionVizRank) graph_name = "graph.plot_widget.plotItem" class Warning(widget.OWWidget.Warning): no_cont_features = widget.Msg("Plotting requires numeric features") not_enough_components = widget.Msg( "Input projection has less than 2 components") trivial_components = widget.Msg( "All components of the PCA are trivial (explain 0 variance). " "Input data is constant (or near constant).") class Error(widget.OWWidget.Error): proj_and_domain_match = widget.Msg( "Projection and Data domains do not match") no_valid_data = widget.Msg("No projection due to invalid data") def __init__(self): super().__init__() self.data = None self.projection = None self.subset_data = None self._subset_mask = None self._selection = None self.__replot_requested = False self.n_cont_var = 0 #: Remember the saved state to restore self.__pending_selection_restore = self.selection_indices self.selection_indices = None self.variable_x = None self.variable_y = None box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWLinProjGraph(self, box, "Plot", view_box=LinProjInteractiveViewBox) box.layout().addWidget(self.graph.plot_widget) plot = self.graph.plot_widget SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum) self.variables_selection = VariablesSelection() self.model_selected = VariableListModel(enable_dnd=True) self.model_other = VariableListModel(enable_dnd=True) self.variables_selection(self, self.model_selected, self.model_other) self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank( self.controlArea, self, "Suggest Features", self._vizrank) self.variables_selection.add_remove.layout().addWidget( self.btn_vizrank) box = gui.widgetBox(self.controlArea, "Placement", sizePolicy=SIZE_POLICY) self.radio_placement = gui.radioButtonsInBox( box, self, "placement", btnLabels=[ "Circular Placement", "Linear Discriminant Analysis", "Principal Component Analysis", "Use input projection" ], callback=self._change_placement) self.viewbox = plot.getViewBox() self.replot = None g = self.graph.gui box = g.point_properties_box(self.controlArea) self.models = g.points_models g.add_widget(g.JitterSizeSlider, box) box.setSizePolicy(*SIZE_POLICY) box = gui.widgetBox(self.controlArea, "Hide axes", sizePolicy=SIZE_POLICY) self.rslider = gui.hSlider(box, self, "radius", minValue=0, maxValue=100, step=5, label="Radius", createLabel=False, ticks=True, callback=self.update_radius) self.rslider.setTickInterval(0) self.rslider.setPageStep(10) box = gui.vBox(self.controlArea, "Plot Properties") box.setSizePolicy(*SIZE_POLICY) g.add_widgets([ g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity, g.LabelOnlySelected ], box) box = self.graph.box_zoom_select(self.controlArea) box.setSizePolicy(*SIZE_POLICY) self.icons = gui.attributeIconDict p = self.graph.plot_widget.palette() self.graph.set_palette(p) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", auto_label="Send Automatically") self.graph.zoom_actions(self) self._new_plotdata() self._change_placement() self.graph.jitter_continuous = True def reset_graph_data(self): if self.data is not None: self.graph.rescale_data() self._update_graph(reset_view=True) def keyPressEvent(self, event): super().keyPressEvent(event) self.graph.update_tooltip(event.modifiers()) def keyReleaseEvent(self, event): super().keyReleaseEvent(event) self.graph.update_tooltip(event.modifiers()) def _vizrank(self, attrs): self.variables_selection.display_none() self.model_selected[:] = attrs[:] self.model_other[:] = [ var for var in self.model_other if var not in attrs ] def _change_placement(self): placement = self.placement p_Circular = self.Placement.Circular p_LDA = self.Placement.LDA self.variables_selection.set_enabled(placement in [p_Circular, p_LDA]) self._vizrank_color_change() self.rslider.setEnabled(placement != p_Circular) self._setup_plot() self.commit() def _get_min_radius(self): return self.radius * np.max(np.linalg.norm(self.plotdata.axes, axis=1)) / 100 + 1e-5 def update_radius(self): # Update the anchor/axes visibility pd = self.plotdata assert pd is not None if pd.hidecircle is None: return min_radius = self._get_min_radius() for anchor, item in zip(pd.axes, pd.axisitems): item.setVisible(np.linalg.norm(anchor) > min_radius) pd.hidecircle.setRect( QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius)) def _new_plotdata(self): self.plotdata = namespace(valid_mask=None, embedding_coords=None, axisitems=[], axes=[], variables=[], data=None, hidecircle=None) def _anchor_circle(self, variables): # minimum visible anchor radius (radius) min_radius = self._get_min_radius() axisitems = [] for anchor, var in zip(self.plotdata.axes, variables[:]): axitem = AnchorItem( line=QLineF(0, 0, *anchor), text=var.name, ) axitem.setVisible(np.linalg.norm(anchor) > min_radius) axitem.setPen(pg.mkPen((100, 100, 100))) axitem.setArrowVisible(True) self.viewbox.addItem(axitem) axisitems.append(axitem) self.plotdata.axisitems = axisitems if self.placement == self.Placement.Circular: return hidecircle = QGraphicsEllipseItem() hidecircle.setRect( QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius)) _pen = QPen(Qt.lightGray, 1) _pen.setCosmetic(True) hidecircle.setPen(_pen) self.viewbox.addItem(hidecircle) self.plotdata.hidecircle = hidecircle def update_colors(self): self._vizrank_color_change() def clear(self): # Clear/reset the widget state self.data = None self.model_selected.clear() self.model_other.clear() self._clear_plot() self.selection_indices = None def _clear_plot(self): self.Warning.trivial_components.clear() for axisitem in self.plotdata.axisitems: self.viewbox.removeItem(axisitem) if self.plotdata.hidecircle: self.viewbox.removeItem(self.plotdata.hidecircle) self._new_plotdata() self.graph.hide_axes() def invalidate_plot(self): """ Schedule a delayed replot. """ if not self.__replot_requested: self.__replot_requested = True QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10) def init_attr_values(self): self.graph.set_domain(self.data) def _vizrank_color_change(self): is_enabled = False if self.data is None: self.btn_vizrank.setToolTip("There is no data.") return vars = [ v for v in chain(self.data.domain.variables, self.data.domain.metas) if v.is_primitive and v is not self.graph.attr_color ] self.n_cont_var = len(vars) if self.placement not in [self.Placement.Circular, self.Placement.LDA]: msg = "Suggest Features works only for Circular and " \ "Linear Discriminant Analysis Projection" elif self.graph.attr_color is None: msg = "Color variable has to be selected" elif self.graph.attr_color.is_continuous and self.placement == self.Placement.LDA: msg = "Suggest Features does not work for Linear Discriminant Analysis Projection " \ "when continuous color variable is selected." elif len(vars) < 3: msg = "Not enough available continuous variables" else: is_enabled = True msg = "" self.btn_vizrank.setToolTip(msg) self.btn_vizrank.setEnabled(is_enabled) self.vizrank.stop_and_reset(is_enabled) @Inputs.projection def set_projection(self, projection): self.Warning.not_enough_components.clear() if projection and len(projection) < 2: self.Warning.not_enough_components() projection = None if projection is not None: self.placement = self.Placement.Projection self.projection = projection @Inputs.data def set_data(self, data): """ Set the input dataset. Args: data (Orange.data.table): data instances """ def sql(data): if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.information("Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) return data def settings(data): # get the default encoded state, replacing the position with Inf state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)]) state = { key: (source_ind, np.inf) for key, (source_ind, _) in state.items() } self.openContext(data.domain) selected_keys = [ key for key, (sind, _) in self.variable_state.items() if sind == 0 ] if set(selected_keys).issubset(set(state.keys())): pass if self.__pending_selection_restore is not None: self._selection = np.array(self.__pending_selection_restore, dtype=int) self.__pending_selection_restore = None # update the defaults state (the encoded state must contain # all variables in the input domain) state.update(self.variable_state) # ... and restore it with saved positions taking precedence over # the defaults selected, other = VariablesSelection.decode_var_state( state, [list(self.model_selected), list(self.model_other)]) return selected, other self.closeContext() self.clear() self.Warning.no_cont_features.clear() self.information() data = sql(data) if data is not None: domain = data.domain vars = [ var for var in chain(domain.variables, domain.metas) if var.is_continuous ] if not len(vars): self.Warning.no_cont_features() data = None self.data = data self.init_attr_values() if data is not None and len(data): self._initialize(data) self.model_selected[:], self.model_other[:] = settings(data) self.vizrank.stop_and_reset() self.vizrank.attrs = self.data.domain.attributes if self.data is not None else [] def _check_possible_opt(self): def set_enabled(is_enabled): for btn in self.radio_placement.buttons: btn.setEnabled(is_enabled) self.variables_selection.set_enabled(is_enabled) p_Circular = self.Placement.Circular p_LDA = self.Placement.LDA p_Input = self.Placement.Projection if self.data: set_enabled(True) domain = self.data.domain if not domain.has_discrete_class or len( domain.class_var.values) < 2: self.radio_placement.buttons[p_LDA].setEnabled(False) if self.placement == p_LDA: self.placement = p_Circular if not self.projection: self.radio_placement.buttons[p_Input].setEnabled(False) if self.placement == p_Input: self.placement = p_Circular self._setup_plot() else: self.graph.new_data(None) self.rslider.setEnabled(False) set_enabled(False) self.commit() @Inputs.data_subset def set_subset_data(self, subset): """ Set the supplementary input subset dataset. Args: subset (Orange.data.table): subset of data instances """ self.subset_data = subset self._subset_mask = None self.controls.graph.alpha_value.setEnabled(subset is None) def handleNewSignals(self): if self.data is not None and self.subset_data is not None: # Update the plot's highlight items dataids = self.data.ids.ravel() subsetids = np.unique(self.subset_data.ids) self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True) self._check_possible_opt() self._change_placement() self.commit() def customEvent(self, event): if event.type() == OWLinearProjection.ReplotRequest: self.__replot_requested = False self._setup_plot() self.commit() else: super().customEvent(event) def closeContext(self): self.variable_state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)]) super().closeContext() def _initialize(self, data): # Initialize the GUI controls from data's domain. vars = [ v for v in chain(data.domain.metas, data.domain.attributes) if v.is_continuous ] self.model_other[:] = vars[3:] self.model_selected[:] = vars[:3] def prepare_plot_data(self, variables): def projection(variables): if set(self.projection.domain.attributes).issuperset(variables): axes = self.projection[:2, variables].X elif set(f.name for f in self.projection.domain.attributes).issuperset( f.name for f in variables): axes = self.projection[:2, [f.name for f in variables]].X else: self.Error.proj_and_domain_match() axes = None return axes def get_axes(variables): self.Error.proj_and_domain_match.clear() axes = None if self.placement == self.Placement.Circular: axes = LinProj.defaultaxes(len(variables)) elif self.placement == self.Placement.LDA: axes = self._get_lda(self.data, variables) elif self.placement == self.Placement.Projection and self.projection: axes = projection(variables) return axes coords = [ column_data(self.data, var, dtype=float) for var in variables ] coords = np.vstack(coords) p, N = coords.shape assert N == len(self.data), p == len(variables) axes = get_axes(variables) if axes is None: return None, None, None assert axes.shape == (2, p) valid_mask = ~np.isnan(coords).any(axis=0) coords = coords[:, valid_mask] X, Y = np.dot(axes, coords) if X.size and Y.size: X = normalized(X) Y = normalized(Y) return valid_mask, np.stack((X, Y), axis=1), axes.T def _setup_plot(self): self._clear_plot() if self.data is None: return self.__replot_requested = False names = get_unique_names([ v.name for v in chain(self.data.domain.variables, self.data.domain.metas) ], [ "{}-x".format(self.Variable_name[self.placement]), "{}-y".format( self.Variable_name[self.placement]) ]) self.variable_x = ContinuousVariable(names[0]) self.variable_y = ContinuousVariable(names[1]) if self.placement in [self.Placement.Circular, self.Placement.LDA]: variables = list(self.model_selected) elif self.placement == self.Placement.Projection: variables = self.model_selected[:] + self.model_other[:] elif self.placement == self.Placement.PCA: variables = [ var for var in self.data.domain.attributes if var.is_continuous ] if not variables: self.graph.new_data(None) return if self.placement == self.Placement.PCA: valid_mask, ec, axes = self._get_pca() variables = self._pca.orig_domain.attributes else: valid_mask, ec, axes = self.prepare_plot_data(variables) self.plotdata.variables = variables self.plotdata.valid_mask = valid_mask self.plotdata.embedding_coords = ec self.plotdata.axes = axes if any(e is None for e in (valid_mask, ec, axes)): return if not sum(valid_mask): self.Error.no_valid_data() self.graph.new_data(None, None) return self.Error.no_valid_data.clear() self._anchor_circle(variables=variables) self._plot() def _plot(self): domain = self.data.domain new_metas = domain.metas + (self.variable_x, self.variable_y) domain = Domain(attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas) valid_mask = self.plotdata.valid_mask array = np.zeros((len(self.data), 2), dtype=np.float) array[valid_mask] = self.plotdata.embedding_coords self.plotdata.data = data = self.data.transform(domain) data[:, self.variable_x] = array[:, 0].reshape(-1, 1) data[:, self.variable_y] = array[:, 1].reshape(-1, 1) subset_data = data[self._subset_mask & valid_mask]\ if self._subset_mask is not None and len(self._subset_mask) else None self.plotdata.data = data self.graph.new_data(data[valid_mask], subset_data) if self._selection is not None: self.graph.selection = self._selection[valid_mask] self.graph.update_data(self.variable_x, self.variable_y, False) def _get_lda(self, data, variables): domain = Domain(attributes=variables, class_vars=data.domain.class_vars) data = data.transform(domain) lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2) lda.fit(data.X, data.Y) scalings = lda.scalings_[:, :2].T if scalings.shape == (1, 1): scalings = np.array([[1.], [0.]]) return scalings def _get_pca(self): data = self.data MAX_COMPONENTS = 2 ncomponents = 2 DECOMPOSITIONS = [PCA] # TruncatedSVD cls = DECOMPOSITIONS[0] pca_projector = cls(n_components=MAX_COMPONENTS) pca_projector.component = ncomponents pca_projector.preprocessors = cls.preprocessors + [Normalize()] pca = pca_projector(data) variance_ratio = pca.explained_variance_ratio_ cumulative = np.cumsum(variance_ratio) self._pca = pca if not np.isfinite(cumulative[-1]): self.Warning.trivial_components() coords = pca(data).X valid_mask = ~np.isnan(coords).any(axis=1) # scale axes max_radius = np.min( [np.abs(np.min(coords, axis=0)), np.max(coords, axis=0)]) axes = pca.components_.T.copy() axes *= max_radius / np.max(np.linalg.norm(axes, axis=1)) return valid_mask, coords, axes def _update_graph(self, reset_view=False): self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.variable_x, self.variable_y, reset_view) def update_density(self): self._update_graph(reset_view=False) def selection_changed(self): if self.graph.selection is not None: self._selection = np.zeros(len(self.data), dtype=np.uint8) self._selection[self.plotdata.valid_mask] = self.graph.selection self.selection_indices = self._selection.tolist() else: self._selection = self.selection_indices = None self.commit() def prepare_data(self): pass def commit(self): def prepare_components(): if self.placement in [self.Placement.Circular, self.Placement.LDA]: attrs = [a for a in self.model_selected[:]] axes = self.plotdata.axes elif self.placement == self.Placement.PCA: axes = self._pca.components_.T attrs = [a for a in self._pca.orig_domain.attributes] if self.placement != self.Placement.Projection: domain = Domain([ ContinuousVariable(a.name, compute_value=lambda _: None) for a in attrs ], metas=[StringVariable(name='component')]) metas = np.array([[ "{}{}".format(self.Component_name[self.placement], i + 1) for i in range(axes.shape[1]) ]], dtype=object).T components = Table(domain, axes.T, metas=metas) components.name = 'components' else: components = self.projection return components selected = annotated = components = None if self.data is not None and self.plotdata.data is not None: components = prepare_components() graph = self.graph mask = self.plotdata.valid_mask.astype(int) mask[mask == 1] = graph.selection if graph.selection is not None \ else [False * len(mask)] selection = np.array( [], dtype=np.uint8) if mask is None else np.flatnonzero(mask) name = self.data.name data = self.plotdata.data if len(selection): selected = data[selection] selected.name = name + ": selected" selected.attributes = self.data.attributes if graph.selection is not None and np.max(graph.selection) > 1: annotated = create_groups_table(data, mask) else: annotated = create_annotated_table(data, selection) annotated.attributes = self.data.attributes annotated.name = name + ": annotated" self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) self.Outputs.components.send(components) def send_report(self): if self.data is None: return def name(var): return var and var.name def projection_name(): name = ("Circular Placement", "Linear Discriminant Analysis", "Principal Component Analysis", "Input projection") return name[self.placement] caption = report.render_items_vert( (("Projection", projection_name()), ("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) self.report_plot() if caption: self.report_caption(caption) @classmethod def migrate_settings(cls, settings_, version): if version < 2: settings_["point_width"] = settings_["point_size"] if version < 3: settings_graph = {} settings_graph["jitter_size"] = settings_["jitter_value"] settings_graph["point_width"] = settings_["point_width"] settings_graph["alpha_value"] = settings_["alpha_value"] settings_graph["class_density"] = settings_["class_density"] settings_["graph"] = settings_graph @classmethod def migrate_context(cls, context, version): if version < 2: domain = context.ordered_domain c_domain = [t for t in context.ordered_domain if t[1] == 2] d_domain = [t for t in context.ordered_domain if t[1] == 1] for d, old_val, new_val in ((domain, "color_index", "attr_color"), (d_domain, "shape_index", "attr_shape"), (c_domain, "size_index", "attr_size")): index = context.values[old_val][0] - 1 context.values[new_val] = (d[index][0], d[index][1] + 100) \ if 0 <= index < len(d) else None if version < 3: context.values["graph"] = { "attr_color": context.values["attr_color"], "attr_shape": context.values["attr_shape"], "attr_size": context.values["attr_size"] }
class OWRadviz(OWProjectionWidget): name = "Radviz" description = "Display Radviz projection" icon = "icons/Radviz.svg" priority = 241 keywords = ["viz"] class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) components = Output("Components", Table) settings_version = 2 settingsHandler = settings.DomainContextHandler() variable_state = settings.ContextSetting({}) auto_commit = settings.Setting(True) vizrank = settings.SettingProvider(RadvizVizRank) graph = settings.SettingProvider(OWRadvizGraph) graph_name = "graph.plot_widget.plotItem" ReplotRequest = QEvent.registerEventType() class Information(OWProjectionWidget.Information): sql_sampled_data = widget.Msg("Data has been sampled") class Warning(OWProjectionWidget.Warning): no_features = widget.Msg("At least 2 features have to be chosen") invalid_embedding = widget.Msg("No projection for selected features") class Error(OWProjectionWidget.Error): sparse_data = widget.Msg("Sparse data is not supported") no_features = widget.Msg( "At least 3 numeric or categorical variables are required") no_instances = widget.Msg("At least 2 data instances are required") def __init__(self): super().__init__() self.data = None self.subset_data = None self.subset_indices = None self._embedding_coords = None self._rand_indices = None self.__replot_requested = False self.variable_x = ContinuousVariable("radviz-x") self.variable_y = ContinuousVariable("radviz-y") box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWRadvizGraph(self, box) box.layout().addWidget(self.graph.plot_widget) self.variables_selection = VariablesSelection() self.model_selected = selected = VariableListModel(enable_dnd=True) self.model_other = other = VariableListModel(enable_dnd=True) self.variables_selection(self, selected, other, self.controlArea) self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank( None, self, "Suggest features", self.vizrank_set_attrs) # Todo: this button introduces some margin at the bottom?! self.variables_selection.add_remove.layout().addWidget( self.btn_vizrank) g = self.graph.gui g.point_properties_box(self.controlArea) g.effects_box(self.controlArea) g.plot_properties_box(self.controlArea) self.graph.box_zoom_select(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self.graph.view_box.started.connect(self._randomize_indices) self.graph.view_box.moved.connect(self._manual_move) self.graph.view_box.finished.connect(self._finish_manual_move) def vizrank_set_attrs(self, attrs): if not attrs: return self.variables_selection.display_none() self.model_selected[:] = attrs[:] self.model_other[:] = [v for v in self.model_other if v not in attrs] def update_colors(self): self._vizrank_color_change() self.cb_class_density.setEnabled(self.can_draw_density()) def invalidate_plot(self): """ Schedule a delayed replot. """ if not self.__replot_requested: self.__replot_requested = True QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10) def _vizrank_color_change(self): is_enabled = self.data is not None and not self.data.is_sparse() and \ len(self.model_other) + len(self.model_selected) > 3 and \ len(self.data[self.valid_data]) > 1 and \ np.all(np.nan_to_num(np.nanstd(self.data.X, 0)) != 0) self.btn_vizrank.setEnabled( is_enabled and self.attr_color is not None and not np.isnan( self.data.get_column_view( self.attr_color)[0].astype(float)).all()) self.vizrank.initialize() def clear(self): self.data = None self.valid_data = None self._embedding_coords = None self._rand_indices = None self.model_selected.clear() self.model_other.clear() self.graph.set_attributes(()) self.graph.set_points(None) self.graph.update_coordinates() self.graph.clear() @Inputs.data def set_data(self, data): self.clear_messages() self.btn_vizrank.setEnabled(False) self.closeContext() self.clear() self.data = data self._check_data() self.init_attr_values() self.openContext(self.data) if self.data is not None: self.model_selected[:], self.model_other[:] = self._load_settings() def _check_data(self): if self.data is not None: domain = self.data.domain if self.data.is_sparse(): self.Error.sparse_data() self.data = None elif isinstance(self.data, SqlTable): if self.data.approx_len() < 4000: self.data = Table(self.data) else: self.Information.sql_sampled_data() data_sample = self.data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) self.data = Table(data_sample) elif len(self.data) < 2: self.Error.no_instances() self.data = None elif len([ v for v in domain.variables + domain.metas if v.is_primitive() ]) < 3: self.Error.no_features() self.data = None def _load_settings(self): domain = self.data.domain variables = [ v for v in domain.attributes + domain.metas if v.is_primitive() ] self.model_selected[:] = variables[:5] self.model_other[:] = variables[5:] + list(domain.class_vars) state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)]) state = {key: (ind, np.inf) for key, (ind, _) in state.items()} state.update(self.variable_state) return VariablesSelection.decode_var_state( state, [list(self.model_selected), list(self.model_other)]) @Inputs.data_subset def set_subset_data(self, subset): self.subset_data = subset self.subset_indices = {e.id for e in subset} \ if subset is not None else {} self.controls.graph.alpha_value.setEnabled(subset is None) def handleNewSignals(self): self.setup_plot() self._vizrank_color_change() self.commit() def get_coordinates_data(self): ec = self._embedding_coords if ec is None or np.any(np.isnan(ec)): return None, None return ec[:, 0], ec[:, 1] def get_subset_mask(self): if self.subset_indices: return np.array([ ex.id in self.subset_indices for ex in self.data[self.valid_data] ]) def customEvent(self, event): if event.type() == OWRadviz.ReplotRequest: self.__replot_requested = False self.setup_plot() else: super().customEvent(event) def closeContext(self): self.variable_state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)]) super().closeContext() def setup_plot(self): if self.data is None: return self.__replot_requested = False self.clear_messages() if len(self.model_selected) < 2: self.Warning.no_features() self.graph.clear() return r = radviz(self.data, self.model_selected) self._embedding_coords = r[0] self.graph.set_points(r[1]) self.valid_data = r[2] if self._embedding_coords is None or \ np.any(np.isnan(self._embedding_coords)): self.Warning.invalid_embedding() self.graph.reset_graph() def _randomize_indices(self): n = len(self._embedding_coords) if n > MAX_POINTS: self._rand_indices = np.random.choice(n, MAX_POINTS, replace=False) self._rand_indices = sorted(self._rand_indices) def _manual_move(self): self.__replot_requested = False res = radviz(self.data, self.model_selected, self.graph.get_points()) self._embedding_coords = res[0] if self._rand_indices is not None: # save widget state selection = self.graph.selection valid_data = self.valid_data.copy() data = self.data.copy() ec = self._embedding_coords.copy() # plot subset self.__plot_random_subset(selection) # restore widget state self.graph.selection = selection self.valid_data = valid_data self.data = data self._embedding_coords = ec else: self.graph.update_coordinates() def __plot_random_subset(self, selection): self._embedding_coords = self._embedding_coords[self._rand_indices] self.data = self.data[self._rand_indices] self.valid_data = self.valid_data[self._rand_indices] self.graph.reset_graph() if selection is not None: self.graph.selection = selection[self._rand_indices] self.graph.update_selection_colors() def _finish_manual_move(self): if self._rand_indices is not None: selection = self.graph.selection self.graph.reset_graph() if selection is not None: self.graph.selection = selection self.graph.select_by_index(self.graph.get_selection()) def selection_changed(self): self.commit() def commit(self): selected = annotated = components = None if self.data is not None and np.sum(self.valid_data): name = self.data.name domain = self.data.domain metas = domain.metas + (self.variable_x, self.variable_y) domain = Domain(domain.attributes, domain.class_vars, metas) embedding_coords = np.zeros((len(self.data), 2), dtype=np.float) embedding_coords[self.valid_data] = self._embedding_coords data = self.data.transform(domain) data[:, self.variable_x] = embedding_coords[:, 0][:, None] data[:, self.variable_y] = embedding_coords[:, 1][:, None] selection = self.graph.get_selection() if len(selection): selected = data[selection] selected.name = name + ": selected" selected.attributes = self.data.attributes if self.graph.selection is not None and \ np.max(self.graph.selection) > 1: annotated = create_groups_table(data, self.graph.selection) else: annotated = create_annotated_table(data, selection) annotated.attributes = self.data.attributes annotated.name = name + ": annotated" points = self.graph.get_points() comp_domain = Domain(points[:, 2], metas=[StringVariable(name='component')]) metas = np.array([["RX"], ["RY"], ["angle"]]) angle = np.arctan2(np.array(points[:, 1].T, dtype=float), np.array(points[:, 0].T, dtype=float)) components = Table.from_numpy(comp_domain, X=np.row_stack( (points[:, :2].T, angle)), metas=metas) components.name = name + ": components" self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) self.Outputs.components.send(components) def send_report(self): if self.data is None: return def name(var): return var and var.name caption = report.render_items_vert( (("Color", name(self.attr_color)), ("Label", name(self.attr_label)), ("Shape", name(self.attr_shape)), ("Size", name(self.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) self.report_plot() if caption: self.report_caption(caption) @classmethod def migrate_context(cls, context, version): if version < 3: values = context.values values["attr_color"] = values["graph"]["attr_color"] values["attr_size"] = values["graph"]["attr_size"] values["attr_shape"] = values["graph"]["attr_shape"] values["attr_label"] = values["graph"]["attr_label"]
class OWRadviz(widget.OWWidget): name = "Radviz" description = "Radviz" icon = "icons/Radviz.svg" priority = 240 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) components = Output("Components", Table) settings_version = 1 settingsHandler = settings.DomainContextHandler() variable_state = settings.ContextSetting({}) auto_commit = settings.Setting(True) graph = settings.SettingProvider(OWRadvizGraph) vizrank = settings.SettingProvider(RadvizVizRank) jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0] ReplotRequest = QEvent.registerEventType() graph_name = "graph.plot_widget.plotItem" class Information(widget.OWWidget.Information): sql_sampled_data = widget.Msg("Data has been sampled") class Warning(widget.OWWidget.Warning): no_features = widget.Msg("At least 2 features have to be chosen") class Error(widget.OWWidget.Error): sparse_data = widget.Msg("Sparse data is not supported") no_features = widget.Msg( "At least 3 numeric or categorical variables are required" ) no_instances = widget.Msg("At least 2 data instances are required") def __init__(self): super().__init__() self.data = None self.subset_data = None self._subset_mask = None self._selection = None # np.array self.__replot_requested = False self._new_plotdata() self.variable_x = ContinuousVariable("radviz-x") self.variable_y = ContinuousVariable("radviz-y") box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWRadvizGraph(self, box, "Plot", view_box=RadvizInteractiveViewBox) self.graph.hide_axes() box.layout().addWidget(self.graph.plot_widget) plot = self.graph.plot_widget SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum) self.variables_selection = VariablesSelection() self.model_selected = VariableListModel(enable_dnd=True) self.model_other = VariableListModel(enable_dnd=True) self.variables_selection(self, self.model_selected, self.model_other) self.vizrank, self.btn_vizrank = RadvizVizRank.add_vizrank( self.controlArea, self, "Suggest features", self.vizrank_set_attrs ) self.btn_vizrank.setSizePolicy(*SIZE_POLICY) self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank) self.viewbox = plot.getViewBox() self.replot = None g = self.graph.gui pp_box = g.point_properties_box(self.controlArea) pp_box.setSizePolicy(*SIZE_POLICY) self.models = g.points_models box = gui.vBox(self.controlArea, "Plot Properties") box.setSizePolicy(*SIZE_POLICY) g.add_widget(g.JitterSizeSlider, box) g.add_widgets([g.ShowLegend, g.ClassDensity, g.LabelOnlySelected], box) zoom_select = self.graph.box_zoom_select(self.controlArea) zoom_select.setSizePolicy(*SIZE_POLICY) self.icons = gui.attributeIconDict p = self.graph.plot_widget.palette() self.graph.set_palette(p) gui.auto_commit( self.controlArea, self, "auto_commit", "Send Selection", auto_label="Send Automatically", ) self.graph.zoom_actions(self) self._circle = QGraphicsEllipseItem() self._circle.setRect(QRectF(-1.0, -1.0, 2.0, 2.0)) self._circle.setPen(pg.mkPen(QColor(0, 0, 0), width=2)) def resizeEvent(self, event): self._update_points_labels() def keyPressEvent(self, event): super().keyPressEvent(event) self.graph.update_tooltip(event.modifiers()) def keyReleaseEvent(self, event): super().keyReleaseEvent(event) self.graph.update_tooltip(event.modifiers()) def vizrank_set_attrs(self, attrs): if not attrs: return self.variables_selection.display_none() self.model_selected[:] = attrs[:] self.model_other[:] = [v for v in self.model_other if v not in attrs] def _new_plotdata(self): self.plotdata = namespace( valid_mask=None, embedding_coords=None, points=None, arcarrows=[], point_labels=[], rand=None, data=None, ) def update_colors(self): self._vizrank_color_change() self.cb_class_density.setEnabled(self.graph.can_draw_density()) def sizeHint(self): return QSize(800, 500) def clear(self): """ Clear/reset the widget state """ self.data = None self.model_selected.clear() self.model_other.clear() self._clear_plot() def _clear_plot(self): self._new_plotdata() self.graph.plot_widget.clear() def invalidate_plot(self): """ Schedule a delayed replot. """ if not self.__replot_requested: self.__replot_requested = True QApplication.postEvent( self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10 ) def init_attr_values(self): self.graph.set_domain(self.data) def _vizrank_color_change(self): attr_color = self.graph.attr_color is_enabled = ( self.data is not None and not self.data.is_sparse() and (len(self.model_other) + len(self.model_selected)) > 3 and len(self.data) > 1 ) self.btn_vizrank.setEnabled( is_enabled and attr_color is not None and not np.isnan( self.data.get_column_view(attr_color)[0].astype(float) ).all() ) self.vizrank.initialize() @Inputs.data def set_data(self, data): """ Set the input dataset and check if data is valid. Args: data (Orange.data.table): data instances """ def sql(data): self.Information.sql_sampled_data.clear() if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.Information.sql_sampled_data() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) return data def settings(data): # get the default encoded state, replacing the position with Inf state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)] ) state = { key: (source_ind, np.inf) for key, (source_ind, _) in state.items() } self.openContext(data.domain) selected_keys = [ key for key, (sind, _) in self.variable_state.items() if sind == 0 ] if set(selected_keys).issubset(set(state.keys())): pass # update the defaults state (the encoded state must contain # all variables in the input domain) state.update(self.variable_state) # ... and restore it with saved positions taking precedence over # the defaults selected, other = VariablesSelection.decode_var_state( state, [list(self.model_selected), list(self.model_other)] ) return selected, other def is_sparse(data): if data.is_sparse(): self.Error.sparse_data() data = None return data def are_features(data): domain = data.domain vars = [ var for var in chain(domain.class_vars, domain.metas, domain.attributes) if var.is_primitive() ] if len(vars) < 3: self.Error.no_features() data = None return data def are_instances(data): if len(data) < 2: self.Error.no_instances() data = None return data self.clear_messages() self.btn_vizrank.setEnabled(False) self.closeContext() self.clear() self.information() self.Error.clear() for f in [sql, is_sparse, are_features, are_instances]: if data is None: break data = f(data) if data is not None: self.data = data self.init_attr_values() domain = data.domain vars = [ v for v in chain(domain.metas, domain.attributes) if v.is_primitive() ] self.model_selected[:] = vars[:5] self.model_other[:] = vars[5:] + list(domain.class_vars) self.model_selected[:], self.model_other[:] = settings(data) self._selection = np.zeros(len(data), dtype=np.uint8) self.invalidate_plot() else: self.data = None @Inputs.data_subset def set_subset_data(self, subset): """ Set the supplementary input subset dataset. Args: subset (Orange.data.table): subset of data instances """ self.subset_data = subset self._subset_mask = None self.controls.graph.alpha_value.setEnabled(subset is None) def handleNewSignals(self): if self.data is not None: self._clear_plot() if self.subset_data is not None and self._subset_mask is None: dataids = self.data.ids.ravel() subsetids = np.unique(self.subset_data.ids) self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True) self.setup_plot(reset_view=True) self.cb_class_density.setEnabled(self.graph.can_draw_density()) else: self.init_attr_values() self.graph.new_data(None) self._vizrank_color_change() self.commit() def customEvent(self, event): if event.type() == OWRadviz.ReplotRequest: self.__replot_requested = False self._clear_plot() self.setup_plot(reset_view=True) else: super().customEvent(event) def closeContext(self): self.variable_state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)] ) super().closeContext() def prepare_radviz_data(self, variables): ec, points, valid_mask = radviz(self.data, variables, self.plotdata.points) self.plotdata.embedding_coords = ec self.plotdata.points = points self.plotdata.valid_mask = valid_mask def setup_plot(self, reset_view=True): if self.data is None: return self.graph.jitter_continuous = True self.__replot_requested = False variables = list(self.model_selected) if len(variables) < 2: self.Warning.no_features() self.graph.new_data(None) return self.Warning.clear() self.prepare_radviz_data(variables) if self.plotdata.embedding_coords is None: return domain = self.data.domain new_metas = domain.metas + (self.variable_x, self.variable_y) domain = Domain( attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas ) mask = self.plotdata.valid_mask array = np.zeros((len(self.data), 2), dtype=np.float) array[mask] = self.plotdata.embedding_coords data = self.data.transform(domain) data[:, self.variable_x] = array[:, 0].reshape(-1, 1) data[:, self.variable_y] = array[:, 1].reshape(-1, 1) subset_data = ( data[self._subset_mask & mask] if self._subset_mask is not None and len(self._subset_mask) else None ) self.plotdata.data = data self.graph.new_data(data[mask], subset_data) if self._selection is not None: self.graph.selection = self._selection[self.plotdata.valid_mask] self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view) self.graph.plot_widget.addItem(self._circle) self.graph.scatterplot_points = ScatterPlotItem( x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1] ) self._update_points_labels() self.graph.plot_widget.addItem(self.graph.scatterplot_points) def randomize_indices(self): ec = self.plotdata.embedding_coords self.plotdata.rand = ( np.random.choice(len(ec), MAX_POINTS, replace=False) if len(ec) > MAX_POINTS else None ) def manual_move(self): self.__replot_requested = False if self.plotdata.rand is not None: rand = self.plotdata.rand valid_mask = self.plotdata.valid_mask data = self.data[valid_mask] selection = self._selection[valid_mask] selection = selection[rand] ec, _, valid_mask = radviz( data, list(self.model_selected), self.plotdata.points ) assert sum(valid_mask) == len(data) data = data[rand] ec = ec[rand] data_x = data.X data_y = data.Y data_metas = data.metas else: self.prepare_radviz_data(list(self.model_selected)) ec = self.plotdata.embedding_coords valid_mask = self.plotdata.valid_mask data_x = self.data.X[valid_mask] data_y = self.data.Y[valid_mask] data_metas = self.data.metas[valid_mask] selection = self._selection[valid_mask] attributes = (self.variable_x, self.variable_y) + self.data.domain.attributes domain = Domain( attributes=attributes, class_vars=self.data.domain.class_vars, metas=self.data.domain.metas, ) data = Table.from_numpy( domain, X=np.hstack((ec, data_x)), Y=data_y, metas=data_metas ) self.graph.new_data(data, None) self.graph.selection = selection self.graph.update_data(self.variable_x, self.variable_y, reset_view=True) self.graph.plot_widget.addItem(self._circle) self.graph.scatterplot_points = ScatterPlotItem( x=self.plotdata.points[:, 0], y=self.plotdata.points[:, 1] ) self._update_points_labels() self.graph.plot_widget.addItem(self.graph.scatterplot_points) def _update_points_labels(self): if self.plotdata.points is None: return for point_label in self.plotdata.point_labels: self.graph.plot_widget.removeItem(point_label) self.plotdata.point_labels = [] sx, sy = self.graph.view_box.viewPixelSize() for row in self.plotdata.points: ti = TextItem() metrics = QFontMetrics(ti.textItem.font()) text_width = ((RANGE.width()) / 2.0 - np.abs(row[0])) / sx name = row[2].name ti.setText(name) ti.setTextWidth(text_width) ti.setColor(QColor(0, 0, 0)) br = ti.boundingRect() width = ( metrics.width(name) if metrics.width(name) < br.width() else br.width() ) width = sx * (width + 5) height = sy * br.height() ti.setPos(row[0] - (row[0] < 0) * width, row[1] + (row[1] > 0) * height) self.plotdata.point_labels.append(ti) self.graph.plot_widget.addItem(ti) def _update_jitter(self): self.invalidate_plot() def reset_graph_data(self, *_): if self.data is not None: self.graph.rescale_data() self._update_graph() def _update_graph(self, reset_view=True, **_): self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.variable_x, self.variable_y, reset_view=reset_view) def update_density(self): self._update_graph(reset_view=True) def selection_changed(self): if self.graph.selection is not None: self._selection[self.plotdata.valid_mask] = self.graph.selection self.commit() def prepare_data(self): pass def commit(self): selected = annotated = components = None graph = self.graph if self.plotdata.data is not None: name = self.data.name data = self.plotdata.data mask = self.plotdata.valid_mask.astype(int) mask[mask == 1] = ( graph.selection if graph.selection is not None else [False * len(mask)] ) selection = ( np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask) ) if len(selection): selected = data[selection] selected.name = name + ": selected" selected.attributes = self.data.attributes if graph.selection is not None and np.max(graph.selection) > 1: annotated = create_groups_table(data, mask) else: annotated = create_annotated_table(data, selection) annotated.attributes = self.data.attributes annotated.name = name + ": annotated" comp_domain = Domain( self.plotdata.points[:, 2], metas=[StringVariable(name="component")] ) metas = np.array([["RX"], ["RY"], ["angle"]]) angle = np.arctan2( np.array(self.plotdata.points[:, 1].T, dtype=float), np.array(self.plotdata.points[:, 0].T, dtype=float), ) components = Table.from_numpy( comp_domain, X=np.row_stack((self.plotdata.points[:, :2].T, angle)), metas=metas, ) components.name = name + ": components" self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) self.Outputs.components.send(components) def send_report(self): if self.data is None: return def name(var): return var and var.name caption = report.render_items_vert( ( ("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ( "Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size), ), ) ) self.report_plot() if caption: self.report_caption(caption)
class OWLinearProjection(widget.OWWidget): name = "Linear Projection" description = "A multi-axis projection of data onto " \ "a two-dimensional plane." icon = "icons/LinearProjection.svg" priority = 240 selection_indices = settings.Setting(None, schema_only=True) class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) projection = Input("Projection", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) components = Output("Components", Table) Placement = Enum("Placement", dict(Circular=0, LDA=1, PCA=2, Projection=3), type=int, qualname="OWLinearProjection.Placement") Component_name = {Placement.Circular: "C", Placement.LDA: "LD", Placement.PCA: "PC"} Variable_name = {Placement.Circular: "circular", Placement.LDA: "lda", Placement.PCA: "pca", Placement.Projection: "projection"} jitter_sizes = [0, 0.1, 0.5, 1.0, 2.0] settings_version = 3 settingsHandler = settings.DomainContextHandler() variable_state = settings.ContextSetting({}) placement = settings.Setting(Placement.Circular) radius = settings.Setting(0) auto_commit = settings.Setting(True) resolution = 256 graph = settings.SettingProvider(OWLinProjGraph) ReplotRequest = QEvent.registerEventType() vizrank = settings.SettingProvider(LinearProjectionVizRank) graph_name = "graph.plot_widget.plotItem" class Warning(widget.OWWidget.Warning): no_cont_features = widget.Msg("Plotting requires numeric features") not_enough_components = widget.Msg("Input projection has less than 2 components") trivial_components = widget.Msg( "All components of the PCA are trivial (explain 0 variance). " "Input data is constant (or near constant).") class Error(widget.OWWidget.Error): proj_and_domain_match = widget.Msg("Projection and Data domains do not match") no_valid_data = widget.Msg("No projection due to invalid data") def __init__(self): super().__init__() self.data = None self.projection = None self.subset_data = None self._subset_mask = None self._selection = None self.__replot_requested = False self.n_cont_var = 0 #: Remember the saved state to restore self.__pending_selection_restore = self.selection_indices self.selection_indices = None self.variable_x = None self.variable_y = None box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWLinProjGraph(self, box, "Plot", view_box=LinProjInteractiveViewBox) box.layout().addWidget(self.graph.plot_widget) plot = self.graph.plot_widget SIZE_POLICY = (QSizePolicy.Minimum, QSizePolicy.Maximum) self.variables_selection = VariablesSelection() self.model_selected = VariableListModel(enable_dnd=True) self.model_other = VariableListModel(enable_dnd=True) self.variables_selection(self, self.model_selected, self.model_other) self.vizrank, self.btn_vizrank = LinearProjectionVizRank.add_vizrank( self.controlArea, self, "Suggest Features", self._vizrank) self.variables_selection.add_remove.layout().addWidget(self.btn_vizrank) box = gui.widgetBox( self.controlArea, "Placement", sizePolicy=SIZE_POLICY) self.radio_placement = gui.radioButtonsInBox( box, self, "placement", btnLabels=["Circular Placement", "Linear Discriminant Analysis", "Principal Component Analysis", "Use input projection"], callback=self._change_placement ) self.viewbox = plot.getViewBox() self.replot = None g = self.graph.gui box = g.point_properties_box(self.controlArea) self.models = g.points_models g.add_widget(g.JitterSizeSlider, box) box.setSizePolicy(*SIZE_POLICY) box = gui.widgetBox(self.controlArea, "Hide axes", sizePolicy=SIZE_POLICY) self.rslider = gui.hSlider( box, self, "radius", minValue=0, maxValue=100, step=5, label="Radius", createLabel=False, ticks=True, callback=self.update_radius) self.rslider.setTickInterval(0) self.rslider.setPageStep(10) box = gui.vBox(self.controlArea, "Plot Properties") box.setSizePolicy(*SIZE_POLICY) g.add_widgets([g.ShowLegend, g.ToolTipShowsAll, g.ClassDensity, g.LabelOnlySelected], box) box = self.graph.box_zoom_select(self.controlArea) box.setSizePolicy(*SIZE_POLICY) self.icons = gui.attributeIconDict p = self.graph.plot_widget.palette() self.graph.set_palette(p) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", auto_label="Send Automatically") self.graph.zoom_actions(self) self._new_plotdata() self._change_placement() self.graph.jitter_continuous = True def reset_graph_data(self): if self.data is not None: self.graph.rescale_data() self._update_graph(reset_view=True) def keyPressEvent(self, event): super().keyPressEvent(event) self.graph.update_tooltip(event.modifiers()) def keyReleaseEvent(self, event): super().keyReleaseEvent(event) self.graph.update_tooltip(event.modifiers()) def _vizrank(self, attrs): self.variables_selection.display_none() self.model_selected[:] = attrs[:] self.model_other[:] = [var for var in self.model_other if var not in attrs] def _change_placement(self): placement = self.placement p_Circular = self.Placement.Circular p_LDA = self.Placement.LDA self.variables_selection.set_enabled(placement in [p_Circular, p_LDA]) self._vizrank_color_change() self.rslider.setEnabled(placement != p_Circular) self._setup_plot() self.commit() def _get_min_radius(self): return self.radius * np.max(np.linalg.norm(self.plotdata.axes, axis=1)) / 100 + 1e-5 def update_radius(self): # Update the anchor/axes visibility pd = self.plotdata assert pd is not None if pd.hidecircle is None: return min_radius = self._get_min_radius() for anchor, item in zip(pd.axes, pd.axisitems): item.setVisible(np.linalg.norm(anchor) > min_radius) pd.hidecircle.setRect(QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius)) def _new_plotdata(self): self.plotdata = namespace( valid_mask=None, embedding_coords=None, axisitems=[], axes=[], variables=[], data=None, hidecircle=None ) def _anchor_circle(self, variables): # minimum visible anchor radius (radius) min_radius = self._get_min_radius() axisitems = [] for anchor, var in zip(self.plotdata.axes, variables[:]): axitem = AnchorItem(line=QLineF(0, 0, *anchor), text=var.name,) axitem.setVisible(np.linalg.norm(anchor) > min_radius) axitem.setPen(pg.mkPen((100, 100, 100))) axitem.setArrowVisible(True) self.viewbox.addItem(axitem) axisitems.append(axitem) self.plotdata.axisitems = axisitems if self.placement == self.Placement.Circular: return hidecircle = QGraphicsEllipseItem() hidecircle.setRect(QRectF(-min_radius, -min_radius, 2 * min_radius, 2 * min_radius)) _pen = QPen(Qt.lightGray, 1) _pen.setCosmetic(True) hidecircle.setPen(_pen) self.viewbox.addItem(hidecircle) self.plotdata.hidecircle = hidecircle def update_colors(self): self._vizrank_color_change() def clear(self): # Clear/reset the widget state self.data = None self.model_selected.clear() self.model_other.clear() self._clear_plot() self.selection_indices = None def _clear_plot(self): self.Warning.trivial_components.clear() for axisitem in self.plotdata.axisitems: self.viewbox.removeItem(axisitem) if self.plotdata.hidecircle: self.viewbox.removeItem(self.plotdata.hidecircle) self._new_plotdata() self.graph.hide_axes() def invalidate_plot(self): """ Schedule a delayed replot. """ if not self.__replot_requested: self.__replot_requested = True QApplication.postEvent(self, QEvent(self.ReplotRequest), Qt.LowEventPriority - 10) def init_attr_values(self): self.graph.set_domain(self.data) def _vizrank_color_change(self): is_enabled = False if self.data is None: self.btn_vizrank.setToolTip("There is no data.") return vars = [v for v in chain(self.data.domain.variables, self.data.domain.metas) if v.is_primitive and v is not self.graph.attr_color] self.n_cont_var = len(vars) if self.placement not in [self.Placement.Circular, self.Placement.LDA]: msg = "Suggest Features works only for Circular and " \ "Linear Discriminant Analysis Projection" elif self.graph.attr_color is None: msg = "Color variable has to be selected" elif self.graph.attr_color.is_continuous and self.placement == self.Placement.LDA: msg = "Suggest Features does not work for Linear Discriminant Analysis Projection " \ "when continuous color variable is selected." elif len(vars) < 3: msg = "Not enough available continuous variables" else: is_enabled = True msg = "" self.btn_vizrank.setToolTip(msg) self.btn_vizrank.setEnabled(is_enabled) self.vizrank.stop_and_reset(is_enabled) @Inputs.projection def set_projection(self, projection): self.Warning.not_enough_components.clear() if projection and len(projection) < 2: self.Warning.not_enough_components() projection = None if projection is not None: self.placement = self.Placement.Projection self.projection = projection @Inputs.data def set_data(self, data): """ Set the input dataset. Args: data (Orange.data.table): data instances """ def sql(data): if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.information("Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) return data def settings(data): # get the default encoded state, replacing the position with Inf state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)] ) state = {key: (source_ind, np.inf) for key, (source_ind, _) in state.items()} self.openContext(data.domain) selected_keys = [key for key, (sind, _) in self.variable_state.items() if sind == 0] if set(selected_keys).issubset(set(state.keys())): pass if self.__pending_selection_restore is not None: self._selection = np.array(self.__pending_selection_restore, dtype=int) self.__pending_selection_restore = None # update the defaults state (the encoded state must contain # all variables in the input domain) state.update(self.variable_state) # ... and restore it with saved positions taking precedence over # the defaults selected, other = VariablesSelection.decode_var_state( state, [list(self.model_selected), list(self.model_other)]) return selected, other self.closeContext() self.clear() self.Warning.no_cont_features.clear() self.information() data = sql(data) if data is not None: domain = data.domain vars = [var for var in chain(domain.variables, domain.metas) if var.is_continuous] if not len(vars): self.Warning.no_cont_features() data = None self.data = data self.init_attr_values() if data is not None and len(data): self._initialize(data) self.model_selected[:], self.model_other[:] = settings(data) self.vizrank.stop_and_reset() self.vizrank.attrs = self.data.domain.attributes if self.data is not None else [] def _check_possible_opt(self): def set_enabled(is_enabled): for btn in self.radio_placement.buttons: btn.setEnabled(is_enabled) self.variables_selection.set_enabled(is_enabled) p_Circular = self.Placement.Circular p_LDA = self.Placement.LDA p_Input = self.Placement.Projection if self.data: set_enabled(True) domain = self.data.domain if not domain.has_discrete_class or len(domain.class_var.values) < 2: self.radio_placement.buttons[p_LDA].setEnabled(False) if self.placement == p_LDA: self.placement = p_Circular if not self.projection: self.radio_placement.buttons[p_Input].setEnabled(False) if self.placement == p_Input: self.placement = p_Circular self._setup_plot() else: self.graph.new_data(None) self.rslider.setEnabled(False) set_enabled(False) self.commit() @Inputs.data_subset def set_subset_data(self, subset): """ Set the supplementary input subset dataset. Args: subset (Orange.data.table): subset of data instances """ self.subset_data = subset self._subset_mask = None self.controls.graph.alpha_value.setEnabled(subset is None) def handleNewSignals(self): if self.data is not None and self.subset_data is not None: # Update the plot's highlight items dataids = self.data.ids.ravel() subsetids = np.unique(self.subset_data.ids) self._subset_mask = np.in1d(dataids, subsetids, assume_unique=True) self._check_possible_opt() self._change_placement() self.commit() def customEvent(self, event): if event.type() == OWLinearProjection.ReplotRequest: self.__replot_requested = False self._setup_plot() self.commit() else: super().customEvent(event) def closeContext(self): self.variable_state = VariablesSelection.encode_var_state( [list(self.model_selected), list(self.model_other)] ) super().closeContext() def _initialize(self, data): # Initialize the GUI controls from data's domain. vars = [v for v in chain(data.domain.metas, data.domain.attributes) if v.is_continuous] self.model_other[:] = vars[3:] self.model_selected[:] = vars[:3] def prepare_plot_data(self, variables): def projection(variables): if set(self.projection.domain.attributes).issuperset(variables): axes = self.projection[:2, variables].X elif set(f.name for f in self.projection.domain.attributes).issuperset(f.name for f in variables): axes = self.projection[:2, [f.name for f in variables]].X else: self.Error.proj_and_domain_match() axes = None return axes def get_axes(variables): self.Error.proj_and_domain_match.clear() axes = None if self.placement == self.Placement.Circular: axes = LinProj.defaultaxes(len(variables)) elif self.placement == self.Placement.LDA: axes = self._get_lda(self.data, variables) elif self.placement == self.Placement.Projection and self.projection: axes = projection(variables) return axes coords = [column_data(self.data, var, dtype=float) for var in variables] coords = np.vstack(coords) p, N = coords.shape assert N == len(self.data), p == len(variables) axes = get_axes(variables) if axes is None: return None, None, None assert axes.shape == (2, p) valid_mask = ~np.isnan(coords).any(axis=0) coords = coords[:, valid_mask] X, Y = np.dot(axes, coords) if X.size and Y.size: X = normalized(X) Y = normalized(Y) return valid_mask, np.stack((X, Y), axis=1), axes.T def _setup_plot(self): self._clear_plot() if self.data is None: return self.__replot_requested = False names = get_unique_names([v.name for v in chain(self.data.domain.variables, self.data.domain.metas)], ["{}-x".format(self.Variable_name[self.placement]), "{}-y".format(self.Variable_name[self.placement])]) self.variable_x = ContinuousVariable(names[0]) self.variable_y = ContinuousVariable(names[1]) if self.placement in [self.Placement.Circular, self.Placement.LDA]: variables = list(self.model_selected) elif self.placement == self.Placement.Projection: variables = self.model_selected[:] + self.model_other[:] elif self.placement == self.Placement.PCA: variables = [var for var in self.data.domain.attributes if var.is_continuous] if not variables: self.graph.new_data(None) return if self.placement == self.Placement.PCA: valid_mask, ec, axes = self._get_pca() variables = self._pca.orig_domain.attributes else: valid_mask, ec, axes = self.prepare_plot_data(variables) self.plotdata.variables = variables self.plotdata.valid_mask = valid_mask self.plotdata.embedding_coords = ec self.plotdata.axes = axes if any(e is None for e in (valid_mask, ec, axes)): return if not sum(valid_mask): self.Error.no_valid_data() self.graph.new_data(None, None) return self.Error.no_valid_data.clear() self._anchor_circle(variables=variables) self._plot() def _plot(self): domain = self.data.domain new_metas = domain.metas + (self.variable_x, self.variable_y) domain = Domain(attributes=domain.attributes, class_vars=domain.class_vars, metas=new_metas) valid_mask = self.plotdata.valid_mask array = np.zeros((len(self.data), 2), dtype=np.float) array[valid_mask] = self.plotdata.embedding_coords self.plotdata.data = data = self.data.transform(domain) data[:, self.variable_x] = array[:, 0].reshape(-1, 1) data[:, self.variable_y] = array[:, 1].reshape(-1, 1) subset_data = data[self._subset_mask & valid_mask]\ if self._subset_mask is not None and len(self._subset_mask) else None self.plotdata.data = data self.graph.new_data(data[valid_mask], subset_data) if self._selection is not None: self.graph.selection = self._selection[valid_mask] self.graph.update_data(self.variable_x, self.variable_y, False) def _get_lda(self, data, variables): domain = Domain(attributes=variables, class_vars=data.domain.class_vars) data = data.transform(domain) lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2) lda.fit(data.X, data.Y) scalings = lda.scalings_[:, :2].T if scalings.shape == (1, 1): scalings = np.array([[1.], [0.]]) return scalings def _get_pca(self): data = self.data MAX_COMPONENTS = 2 ncomponents = 2 DECOMPOSITIONS = [PCA] # TruncatedSVD cls = DECOMPOSITIONS[0] pca_projector = cls(n_components=MAX_COMPONENTS) pca_projector.component = ncomponents pca_projector.preprocessors = cls.preprocessors + [Normalize()] pca = pca_projector(data) variance_ratio = pca.explained_variance_ratio_ cumulative = np.cumsum(variance_ratio) self._pca = pca if not np.isfinite(cumulative[-1]): self.Warning.trivial_components() coords = pca(data).X valid_mask = ~np.isnan(coords).any(axis=1) # scale axes max_radius = np.min([np.abs(np.min(coords, axis=0)), np.max(coords, axis=0)]) axes = pca.components_.T.copy() axes *= max_radius / np.max(np.linalg.norm(axes, axis=1)) return valid_mask, coords, axes def _update_graph(self, reset_view=False): self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.variable_x, self.variable_y, reset_view) def update_density(self): self._update_graph(reset_view=False) def selection_changed(self): if self.graph.selection is not None: self._selection = np.zeros(len(self.data), dtype=np.uint8) self._selection[self.plotdata.valid_mask] = self.graph.selection self.selection_indices = self._selection.tolist() else: self._selection = self.selection_indices = None self.commit() def prepare_data(self): pass def commit(self): def prepare_components(): if self.placement in [self.Placement.Circular, self.Placement.LDA]: attrs = [a for a in self.model_selected[:]] axes = self.plotdata.axes elif self.placement == self.Placement.PCA: axes = self._pca.components_.T attrs = [a for a in self._pca.orig_domain.attributes] if self.placement != self.Placement.Projection: domain = Domain([ContinuousVariable(a.name, compute_value=lambda _: None) for a in attrs], metas=[StringVariable(name='component')]) metas = np.array([["{}{}".format(self.Component_name[self.placement], i + 1) for i in range(axes.shape[1])]], dtype=object).T components = Table(domain, axes.T, metas=metas) components.name = 'components' else: components = self.projection return components selected = annotated = components = None if self.data is not None and self.plotdata.data is not None: components = prepare_components() graph = self.graph mask = self.plotdata.valid_mask.astype(int) mask[mask == 1] = graph.selection if graph.selection is not None \ else [False * len(mask)] selection = np.array([], dtype=np.uint8) if mask is None else np.flatnonzero(mask) name = self.data.name data = self.plotdata.data if len(selection): selected = data[selection] selected.name = name + ": selected" selected.attributes = self.data.attributes if graph.selection is not None and np.max(graph.selection) > 1: annotated = create_groups_table(data, mask) else: annotated = create_annotated_table(data, selection) annotated.attributes = self.data.attributes annotated.name = name + ": annotated" self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated) self.Outputs.components.send(components) def send_report(self): if self.data is None: return def name(var): return var and var.name def projection_name(): name = ("Circular Placement", "Linear Discriminant Analysis", "Principal Component Analysis", "Input projection") return name[self.placement] caption = report.render_items_vert(( ("Projection", projection_name()), ("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ("Jittering", self.graph.jitter_size != 0 and "{} %".format(self.graph.jitter_size)))) self.report_plot() if caption: self.report_caption(caption) @classmethod def migrate_settings(cls, settings_, version): if version < 2: settings_["point_width"] = settings_["point_size"] if version < 3: settings_graph = {} settings_graph["jitter_size"] = settings_["jitter_value"] settings_graph["point_width"] = settings_["point_width"] settings_graph["alpha_value"] = settings_["alpha_value"] settings_graph["class_density"] = settings_["class_density"] settings_["graph"] = settings_graph @classmethod def migrate_context(cls, context, version): if version < 2: domain = context.ordered_domain c_domain = [t for t in context.ordered_domain if t[1] == 2] d_domain = [t for t in context.ordered_domain if t[1] == 1] for d, old_val, new_val in ((domain, "color_index", "attr_color"), (d_domain, "shape_index", "attr_shape"), (c_domain, "size_index", "attr_size")): index = context.values[old_val][0] - 1 context.values[new_val] = (d[index][0], d[index][1] + 100) \ if 0 <= index < len(d) else None if version < 3: context.values["graph"] = { "attr_color": context.values["attr_color"], "attr_shape": context.values["attr_shape"], "attr_size": context.values["attr_size"] }