class OWMDS(widget.OWWidget): name = "MDS" description = "Two-dimensional data projection by multidimensional " \ "scaling constructed from a distance matrix." icon = "icons/MDS.svg" inputs = [("Data", Orange.data.Table, "set_data", widget.Default), ("Distances", Orange.misc.DistMatrix, "set_disimilarity"), ("Data Subset", Orange.data.Table, "set_subset_data")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Data", Orange.data.Table)] #: Initialization type PCA, Random = 0, 1 #: Refresh rate RefreshRate = [ ("Every iteration", 1), ("Every 5 steps", 5), ("Every 10 steps", 10), ("Every 25 steps", 25), ("Every 50 steps", 50), ("None", -1) ] JitterAmount = [ ("None", 0), ("0.1 %", 0.1), ("0.5 %", 0.5), ("1 %", 1.0), ("2 %", 2.0) ] #: Runtime state Running, Finished, Waiting = 1, 2, 3 settingsHandler = settings.DomainContextHandler() max_iter = settings.Setting(300) initialization = settings.Setting(PCA) refresh_rate = settings.Setting(3) # output embedding role. NoRole, AttrRole, AddAttrRole, MetaRole = 0, 1, 2, 3 output_embedding_role = settings.Setting(2) autocommit = settings.Setting(True) color_value = settings.ContextSetting("") shape_value = settings.ContextSetting("") size_value = settings.ContextSetting("") label_value = settings.ContextSetting("") label_only_selected = settings.Setting(False) symbol_size = settings.Setting(8) symbol_opacity = settings.Setting(230) connected_pairs = settings.Setting(5) jitter = settings.Setting(0) legend_anchor = settings.Setting(((1, 0), (1, 0))) graph_name = "plot.plotItem" def __init__(self): super().__init__() self.matrix = None self.data = None self.subset_data = None # type: Optional[Orange.data.Table] self.matrix_data = None self.signal_data = None self._pen_data = None self._brush_data = None self._shape_data = None self._size_data = None self._label_data = None self._similar_pairs = None self._scatter_item = None self._legend_item = None self._selection_mask = None self._subset_mask = None # type: Optional[numpy.ndarray] self._invalidated = False self._effective_matrix = None self.__update_loop = None self.__state = OWMDS.Waiting self.__in_next_step = False self.__draw_similar_pairs = False box = gui.vBox(self.controlArea, "MDS Optimization") form = QtGui.QFormLayout( labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QtGui.QFormLayout.AllNonFixedFieldsGrow, verticalSpacing=10 ) form.addRow("Max iterations:", gui.spin(box, self, "max_iter", 10, 10 ** 4, step=1)) form.addRow("Initialization:", gui.comboBox(box, self, "initialization", items=["PCA (Torgerson)", "Random"], callback=self.__invalidate_embedding)) box.layout().addLayout(form) form.addRow("Refresh:", gui.comboBox( box, self, "refresh_rate", items=[t for t, _ in OWMDS.RefreshRate], callback=self.__invalidate_refresh)) gui.separator(box, 10) self.runbutton = gui.button( box, self, "Run", callback=self._toggle_run) box = gui.vBox(self.controlArea, "Graph") self.colorvar_model = itemmodels.VariableListModel() common_options = dict( sendSelectedValue=True, valueType=str, orientation=Qt.Horizontal, labelWidth=50, contentsLength=12) self.cb_color_value = gui.comboBox( box, self, "color_value", label="Color:", callback=self._on_color_index_changed, **common_options) self.cb_color_value.setModel(self.colorvar_model) self.shapevar_model = itemmodels.VariableListModel() self.cb_shape_value = gui.comboBox( box, self, "shape_value", label="Shape:", callback=self._on_shape_index_changed, **common_options) self.cb_shape_value.setModel(self.shapevar_model) self.sizevar_model = itemmodels.VariableListModel() self.cb_size_value = gui.comboBox( box, self, "size_value", label="Size:", callback=self._on_size_index_changed, **common_options) self.cb_size_value.setModel(self.sizevar_model) self.labelvar_model = itemmodels.VariableListModel() self.cb_label_value = gui.comboBox( box, self, "label_value", label="Label:", callback=self._on_label_index_changed, **common_options) self.cb_label_value.setModel(self.labelvar_model) gui.checkBox( gui.indentedBox(box), self, 'label_only_selected', 'Label only selected points', callback=self._on_label_index_changed) form = QtGui.QFormLayout( labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QtGui.QFormLayout.AllNonFixedFieldsGrow, verticalSpacing=10 ) form.addRow("Symbol size:", gui.hSlider(box, self, "symbol_size", minValue=1, maxValue=20, callback=self._on_size_index_changed, createLabel=False)) form.addRow("Symbol opacity:", gui.hSlider(box, self, "symbol_opacity", minValue=100, maxValue=255, step=100, callback=self._on_color_index_changed, createLabel=False)) form.addRow("Show similar pairs:", gui.hSlider( gui.hBox(self.controlArea), self, "connected_pairs", minValue=0, maxValue=20, createLabel=False, callback=self._on_connected_changed)) form.addRow("Jitter:", gui.comboBox( box, self, "jitter", items=[text for text, _ in self.JitterAmount], callback=self._update_plot)) box.layout().addLayout(form) gui.rubber(self.controlArea) box = QtGui.QGroupBox("Zoom/Select", ) box.setLayout(QtGui.QHBoxLayout()) box.layout().setContentsMargins(2, 2, 2, 2) group = QtGui.QActionGroup(self, exclusive=True) def icon(name): path = "icons/Dlg_{}.png".format(name) path = pkg_resources.resource_filename(widget.__name__, path) return QtGui.QIcon(path) action_select = QtGui.QAction( "Select", self, checkable=True, checked=True, icon=icon("arrow"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_1)) action_zoom = QtGui.QAction( "Zoom", self, checkable=True, checked=False, icon=icon("zoom"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_2)) action_pan = QtGui.QAction( "Pan", self, checkable=True, checked=False, icon=icon("pan_hand"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_3)) action_reset_zoom = QtGui.QAction( "Zoom to fit", self, icon=icon("zoom_reset"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_0)) action_reset_zoom.triggered.connect( lambda: self.plot.autoRange(padding=0.1, items=[self._scatter_item])) group.addAction(action_select) group.addAction(action_zoom) group.addAction(action_pan) self.addActions(group.actions() + [action_reset_zoom]) action_select.setChecked(True) def button(action): b = QtGui.QToolButton() b.setToolButtonStyle(Qt.ToolButtonIconOnly) b.setDefaultAction(action) return b box.layout().addWidget(button(action_select)) box.layout().addWidget(button(action_zoom)) box.layout().addWidget(button(action_pan)) box.layout().addSpacing(4) box.layout().addWidget(button(action_reset_zoom)) box.layout().addStretch() self.controlArea.layout().addWidget(box) box = gui.vBox(self.controlArea, "Output") self.output_combo = gui.comboBox( box, self, "output_embedding_role", items=["Original features only", "Coordinates only", "Coordinates as features", "Coordinates as meta attributes"], callback=self._invalidate_output, addSpace=4) gui.auto_commit(box, self, "autocommit", "Send Selected", checkbox_label="Send selected automatically", box=None) self.plot = pg.PlotWidget(background="w", enableMenu=False) self.plot.setAspectLocked(True) self.plot.getPlotItem().hideAxis("bottom") self.plot.getPlotItem().hideAxis("left") self.plot.getPlotItem().hideButtons() self.plot.setRenderHint(QtGui.QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plot) self.selection_tool = PlotSelectionTool(parent=self) self.zoom_tool = PlotZoomTool(parent=self) self.pan_tool = PlotPanTool(parent=self) self.pinch_tool = PlotPinchZoomTool(parent=self) self.pinch_tool.setViewBox(self.plot.getViewBox()) self.selection_tool.setViewBox(self.plot.getViewBox()) self.selection_tool.selectionFinished.connect(self.__selection_end) self.current_tool = self.selection_tool def activate_tool(action): self.current_tool.setViewBox(None) if action is action_select: active, cur = self.selection_tool, Qt.ArrowCursor elif action is action_zoom: active, cur = self.zoom_tool, Qt.ArrowCursor elif action is action_pan: active, cur = self.pan_tool, Qt.OpenHandCursor self.current_tool = active self.current_tool.setViewBox(self.plot.getViewBox()) self.plot.getViewBox().setCursor(QtGui.QCursor(cur)) group.triggered[QtGui.QAction].connect(activate_tool) self._initialize() @check_sql_input def set_data(self, data): """Set the input data set. Parameters ---------- data : Optional[Orange.data.Table] """ self.signal_data = data if self.matrix is not None and data is not None and len(self.matrix) == len(data): self.closeContext() self.data = data self.update_controls() self.openContext(data) else: self._invalidated = True self._selection_mask = None def set_disimilarity(self, matrix): """Set the dissimilarity (distance) matrix. Parameters ---------- matrix : Optional[Orange.misc.DistMatrix] """ self.matrix = matrix if matrix is not None and matrix.row_items: self.matrix_data = matrix.row_items if matrix is None: self.matrix_data = None self._invalidated = True self._selection_mask = None def set_subset_data(self, subset_data): """Set a subset of `data` input to highlight in the plot. Parameters ---------- subset_data: Optional[Orange.data.Table] """ self.subset_data = subset_data # invalidate the pen/brush when the subset is changed self._pen_data = self._brush_data = None self._subset_mask = None # type: Optional[numpy.ndarray] def _clear(self): self._pen_data = None self._brush_data = None self._shape_data = None self._size_data = None self._label_data = None self._similar_pairs = None self.colorvar_model[:] = ["Same color"] self.shapevar_model[:] = ["Same shape"] self.sizevar_model[:] = ["Same size"] self.labelvar_model[:] = ["No labels"] self.color_value = self.colorvar_model[0] self.shape_value = self.shapevar_model[0] self.size_value = self.sizevar_model[0] self.label_value = self.labelvar_model[0] self.__set_update_loop(None) self.__state = OWMDS.Waiting def _clear_plot(self): self.plot.clear() self._scatter_item = None if self._legend_item is not None: anchor = legend_anchor_pos(self._legend_item) if anchor is not None: self.legend_anchor = anchor if self._legend_item.scene() is not None: self._legend_item.scene().removeItem(self._legend_item) self._legend_item = None def update_controls(self): if self.data is None and getattr(self.matrix, 'axis', 1) == 0: # Column-wise distances attr = "Attribute names" self.labelvar_model[:] = ["No labels", attr] self.shapevar_model[:] = ["Same shape", attr] self.colorvar_model[:] = ["Same solor", attr] self.color_value = attr self.shape_value = attr else: # initialize the graph state from data domain = self.data.domain all_vars = list(filter_visible(domain.variables + domain.metas)) cd_vars = [var for var in all_vars if var.is_primitive()] disc_vars = [var for var in all_vars if var.is_discrete] cont_vars = [var for var in all_vars if var.is_continuous] shape_vars = [var for var in disc_vars if len(var.values) <= len(ScatterPlotItem.Symbols) - 1] self.colorvar_model[:] = chain(["Same color"], [self.colorvar_model.Separator] if cd_vars else [], cd_vars) self.shapevar_model[:] = chain(["Same shape"], [self.shapevar_model.Separator] if shape_vars else [], shape_vars) self.sizevar_model[:] = chain(["Same size", "Stress"], [self.sizevar_model.Separator] if cont_vars else [], cont_vars) self.labelvar_model[:] = chain(["No labels"], [self.labelvar_model.Separator] if all_vars else [], all_vars) if domain.class_var is not None: self.color_value = domain.class_var.name def _initialize(self): # clear everything self.closeContext() self._clear() self.data = None self._effective_matrix = None self.embedding = None # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix is None: return if self.signal_data and self.matrix is not None and len(self.signal_data) != len(self.matrix): self.error(1, "Data and distances dimensions do not match.") self._update_plot() return self.error(1) if self.signal_data: self.data = self.signal_data elif self.matrix_data: self.data = self.matrix_data if self.matrix is not None: self._effective_matrix = self.matrix if self.matrix.axis == 0 and self.data is self.matrix_data: self.data = None else: preprocessed_data = Orange.projection.MDS().preprocess(self.data) self._effective_matrix = Orange.distance.Euclidean(preprocessed_data) self.update_controls() self.openContext(self.data) def _toggle_run(self): if self.__state == OWMDS.Running: self.stop() self._invalidate_output() else: self.start() def start(self): if self.__state == OWMDS.Running: return elif self.__state == OWMDS.Finished: # Resume/continue from a previous run self.__start() elif self.__state == OWMDS.Waiting and \ self._effective_matrix is not None: self.__start() def stop(self): if self.__state == OWMDS.Running: self.__set_update_loop(None) def __start(self): self.__draw_similar_pairs = False X = self._effective_matrix if self.embedding is not None: init = self.embedding elif self.initialization == OWMDS.PCA: init = torgerson(X, n_components=2) else: init = None # number of iterations per single GUI update step _, step_size = OWMDS.RefreshRate[self.refresh_rate] if step_size == -1: step_size = self.max_iter def update_loop(X, max_iter, step, init): """ return an iterator over successive improved MDS point embeddings. """ # NOTE: this code MUST NOT call into QApplication.processEvents done = False iterations_done = 0 oldstress = numpy.finfo(numpy.float).max while not done: step_iter = min(max_iter - iterations_done, step) mds = Orange.projection.MDS( dissimilarity="precomputed", n_components=2, n_init=1, max_iter=step_iter) mdsfit = mds.fit(X, init=init) iterations_done += step_iter embedding, stress = mdsfit.embedding_, mdsfit.stress_ stress /= numpy.sqrt(numpy.sum(embedding ** 2, axis=1)).sum() if iterations_done >= max_iter: done = True elif (oldstress - stress) < mds.params["eps"]: done = True init = embedding oldstress = stress yield embedding, mdsfit.stress_, iterations_done / max_iter self.__set_update_loop(update_loop(X, self.max_iter, step_size, init)) self.progressBarInit(processEvents=None) def __set_update_loop(self, loop): """ Set the update `loop` coroutine. The `loop` is a generator yielding `(embedding, stress, progress)` tuples where `embedding` is a `(N, 2) ndarray` of current updated MDS points, `stress` is the current stress and `progress` a float ratio (0 <= progress <= 1) If an existing update loop is already in palace it is interrupted (closed). .. note:: The `loop` must not explicitly yield control flow to the event loop (i.e. call `QApplication.processEvents`) """ if self.__update_loop is not None: self.__update_loop.close() self.__update_loop = None self.progressBarFinished(processEvents=None) self.__update_loop = loop if loop is not None: self.progressBarInit(processEvents=None) self.setStatusMessage("Running") self.runbutton.setText("Stop") self.__state = OWMDS.Running QtGui.QApplication.postEvent(self, QEvent(QEvent.User)) else: self.setStatusMessage("") self.runbutton.setText("Start") self.__state = OWMDS.Finished def __next_step(self): if self.__update_loop is None: return loop = self.__update_loop try: embedding, stress, progress = next(self.__update_loop) assert self.__update_loop is loop except StopIteration: self.__set_update_loop(None) self.unconditional_commit() self.__draw_similar_pairs = True self._update_plot() self.plot.autoRange(padding=0.1, items=[self._scatter_item]) else: self.progressBarSet(100.0 * progress, processEvents=None) self.embedding = embedding self._update_plot() self.plot.autoRange(padding=0.1, items=[self._scatter_item]) # schedule next update QtGui.QApplication.postEvent( self, QEvent(QEvent.User), Qt.LowEventPriority) def customEvent(self, event): if event.type() == QEvent.User and self.__update_loop is not None: if not self.__in_next_step: self.__in_next_step = True try: self.__next_step() finally: self.__in_next_step = False else: warnings.warn( "Re-entry in update loop detected. " "A rogue `proccessEvents` is on the loose.", RuntimeWarning) # re-schedule the update iteration. QtGui.QApplication.postEvent(self, QEvent(QEvent.User)) return super().customEvent(event) def __invalidate_embedding(self): # reset/invalidate the MDS embedding, to the default initialization # (Random or PCA), restarting the optimization if necessary. if self.embedding is None: return state = self.__state if self.__update_loop is not None: self.__set_update_loop(None) X = self._effective_matrix if self.initialization == OWMDS.PCA: self.embedding = torgerson(X) else: self.embedding = numpy.random.rand(len(X), 2) self._update_plot() self.plot.autoRange(padding=0.1, items=[self._scatter_item]) # restart the optimization if it was interrupted. if state == OWMDS.Running: self.__start() def __invalidate_refresh(self): state = self.__state if self.__update_loop is not None: self.__set_update_loop(None) # restart the optimization if it was interrupted. # TODO: decrease the max iteration count by the already # completed iterations count. if state == OWMDS.Running: self.__start() def handleNewSignals(self): if self._invalidated: self._invalidated = False self._initialize() self.start() self.__draw_similar_pairs = False if self._subset_mask is None and self.subset_data is not None and \ self.data is not None: self._subset_mask = numpy.in1d(self.data.ids, self.subset_data.ids) self._update_plot() self.plot.autoRange(padding=0.1) self.unconditional_commit() def _invalidate_output(self): self.commit() def _on_color_index_changed(self): self._pen_data = None self._update_plot() def _on_shape_index_changed(self): self._shape_data = None self._update_plot() def _on_size_index_changed(self): self._size_data = None self._update_plot() def _on_label_index_changed(self): self._label_data = None self._update_plot() def _on_connected_changed(self): self._similar_pairs = None self._update_plot() def _update_plot(self): self._clear_plot() if self.embedding is not None: self._setup_plot() def _setup_plot(self): have_data = self.data is not None have_matrix_transposed = self.matrix is not None and not self.matrix.axis plotstyle = mdsplotutils.plotstyle size = self._effective_matrix.shape[0] def column(data, variable): a, _ = data.get_column_view(variable) return a.ravel() def attributes(matrix): return matrix.row_items.domain.attributes def scale(a): dmin, dmax = numpy.nanmin(a), numpy.nanmax(a) if dmax - dmin > 0: return (a - dmin) / (dmax - dmin) else: return numpy.zeros_like(a) def jitter(x, factor=1, rstate=None): if rstate is None: rstate = numpy.random.RandomState() elif not isinstance(rstate, numpy.random.RandomState): rstate = numpy.random.RandomState(rstate) span = numpy.nanmax(x) - numpy.nanmin(x) if span < numpy.finfo(x.dtype).eps * 100: span = 1 a = factor * span / 100. return x + (rstate.random_sample(x.shape) - 0.5) * a if self._pen_data is None: if self._selection_mask is not None: pointflags = numpy.where( self._selection_mask, mdsplotutils.Selected, mdsplotutils.NoFlags) else: pointflags = None color_index = self.cb_color_value.currentIndex() if have_data and color_index > 0: color_var = self.colorvar_model[color_index] if color_var.is_discrete: palette = colorpalette.ColorPaletteGenerator( len(color_var.values) ) plotstyle = plotstyle.updated(discrete_palette=palette) else: palette = None color_data = mdsplotutils.color_data( self.data, color_var, plotstyle=plotstyle) color_data = numpy.hstack( (color_data, numpy.full((len(color_data), 1), self.symbol_opacity, dtype=float)) ) pen_data = mdsplotutils.pen_data(color_data * 0.8, pointflags) brush_data = mdsplotutils.brush_data(color_data) elif have_matrix_transposed and \ self.colorvar_model[color_index] == 'Attribute names': attr = attributes(self.matrix) palette = colorpalette.ColorPaletteGenerator(len(attr)) color_data = [palette.getRGB(i) for i in range(len(attr))] color_data = numpy.hstack(( color_data, numpy.full((len(color_data), 1), self.symbol_opacity, dtype=float)) ) pen_data = mdsplotutils.pen_data(color_data * 0.8, pointflags) brush_data = mdsplotutils.brush_data(color_data) else: pen_data = make_pen(QtGui.QColor(Qt.darkGray), cosmetic=True) if self._selection_mask is not None: pen_data = numpy.array( [pen_data, plotstyle.selected_pen]) pen_data = pen_data[self._selection_mask.astype(int)] else: pen_data = numpy.full(self._effective_matrix.dim, pen_data, dtype=object) brush_data = numpy.full( size, pg.mkColor((192, 192, 192, self.symbol_opacity)), dtype=object) if self._subset_mask is not None and have_data and \ self._subset_mask.shape == (size, ): # clear brush fill for non subset data brush_data[~self._subset_mask] = QtGui.QBrush(Qt.NoBrush) self._pen_data = pen_data self._brush_data = brush_data if self._shape_data is None: shape_index = self.cb_shape_value.currentIndex() if have_data and shape_index > 0: Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) shape_var = self.shapevar_model[shape_index] data = column(self.data, shape_var).astype(numpy.float) data = data % (len(Symbols) - 1) data[numpy.isnan(data)] = len(Symbols) - 1 shape_data = symbols[data.astype(int)] elif have_matrix_transposed and \ self.shapevar_model[shape_index] == 'Attribute names': Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) attr = [i % (len(Symbols) - 1) for i, _ in enumerate(attributes(self.matrix))] shape_data = symbols[attr] else: shape_data = "o" self._shape_data = shape_data if self._size_data is None: MinPointSize = 3 point_size = self.symbol_size + MinPointSize size_index = self.cb_size_value.currentIndex() if have_data and size_index == 1: # size by stress size_data = stress(self.embedding, self._effective_matrix) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size elif have_data and size_index > 0: size_var = self.sizevar_model[size_index] size_data = column(self.data, size_var) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size else: size_data = point_size self._size_data = size_data if self._label_data is None: label_index = self.cb_label_value.currentIndex() if have_data and label_index > 0: label_var = self.labelvar_model[label_index] label_data = column(self.data, label_var) label_data = [label_var.str_val(val) for val in label_data] label_items = [pg.TextItem(text, anchor=(0.5, 0), color=0.0) for text in label_data] elif have_matrix_transposed and \ self.labelvar_model[label_index] == 'Attribute names': attr = attributes(self.matrix) label_items = [pg.TextItem(str(text), anchor=(0.5, 0)) for text in attr] else: label_items = None self._label_data = label_items emb_x, emb_y = self.embedding[:, 0], self.embedding[:, 1] if self.jitter > 0: _, jitter_factor = self.JitterAmount[self.jitter] emb_x = jitter(emb_x, jitter_factor, rstate=42) emb_y = jitter(emb_y, jitter_factor, rstate=667) if self.connected_pairs and self.__draw_similar_pairs: if self._similar_pairs is None: # This code requires storing lower triangle of X (n x n / 2 # doubles), n x n / 2 * 2 indices to X, n x n / 2 indices for # argsort result. If this becomes an issue, it can be reduced to # n x n argsort indices by argsorting the entire X. Then we # take the first n + 2 * p indices. We compute their coordinates # i, j in the original matrix. We keep those for which i < j. # n + 2 * p will suffice to exclude the diagonal (i = j). If the # number of those for which i < j is smaller than p, we instead # take i > j. Among those that remain, we take the first p. # Assuming that MDS can't show so many points that memory could # become an issue, I preferred using simpler code. m = self._effective_matrix n = len(m) p = (n * (n - 1) // 2 * self.connected_pairs) // 100 indcs = numpy.triu_indices(n, 1) sorted = numpy.argsort(m[indcs])[:p] self._similar_pairs = fpairs = numpy.empty(2 * p, dtype=int) fpairs[::2] = indcs[0][sorted] fpairs[1::2] = indcs[1][sorted] for i in range(int(len(emb_x[self._similar_pairs]) / 2)): item = QtGui.QGraphicsLineItem( emb_x[self._similar_pairs][i * 2], emb_y[self._similar_pairs][i * 2], emb_x[self._similar_pairs][i * 2 + 1], emb_y[self._similar_pairs][i * 2 + 1] ) pen = QtGui.QPen(QtGui.QBrush(QtGui.QColor(204, 204, 204)), 2) pen.setCosmetic(True) item.setPen(pen) self.plot.addItem(item) data = numpy.arange(size) self._scatter_item = item = ScatterPlotItem( x=emb_x, y=emb_y, pen=self._pen_data, brush=self._brush_data, symbol=self._shape_data, size=self._size_data, data=data, antialias=True ) self.plot.addItem(item) if self._label_data is not None: if self.label_only_selected: if self._selection_mask is not None: for (x, y), text_item, selected \ in zip(self.embedding, self._label_data, self._selection_mask): if selected: self.plot.addItem(text_item) text_item.setPos(x, y) else: for (x, y), text_item in zip(self.embedding, self._label_data): self.plot.addItem(text_item) text_item.setPos(x, y) self._legend_item = LegendItem() viewbox = self.plot.getViewBox() self._legend_item.setParentItem(self.plot.getViewBox()) self._legend_item.setZValue(viewbox.zValue() + 10) self._legend_item.restoreAnchor(self.legend_anchor) color_var = shape_var = None color_index = self.cb_color_value.currentIndex() if have_data and 1 <= color_index < len(self.colorvar_model): color_var = self.colorvar_model[color_index] assert isinstance(color_var, Orange.data.Variable) shape_index = self.cb_shape_value.currentIndex() if have_data and 1 <= shape_index < len(self.shapevar_model): shape_var = self.shapevar_model[shape_index] assert isinstance(shape_var, Orange.data.Variable) if shape_var is not None or \ (color_var is not None and color_var.is_discrete): legend_data = mdsplotutils.legend_data( color_var, shape_var, plotstyle=plotstyle) for color, symbol, text in legend_data: self._legend_item.addItem( ScatterPlotItem(pen=color, brush=color, symbol=symbol, size=10), escape(text) ) else: self._legend_item.hide() def commit(self): if self.embedding is not None: output = embedding = Orange.data.Table.from_numpy( Orange.data.Domain([Orange.data.ContinuousVariable("X"), Orange.data.ContinuousVariable("Y")]), self.embedding ) else: output = embedding = None if self.embedding is not None and self.data is not None: domain = self.data.domain attrs = domain.attributes class_vars = domain.class_vars metas = domain.metas if self.output_embedding_role == OWMDS.AttrRole: attrs = embedding.domain.attributes elif self.output_embedding_role == OWMDS.AddAttrRole: attrs = domain.attributes + embedding.domain.attributes elif self.output_embedding_role == OWMDS.MetaRole: metas += embedding.domain.attributes domain = Orange.data.Domain(attrs, class_vars, metas) output = Orange.data.Table.from_table(domain, self.data) if self.output_embedding_role == OWMDS.AttrRole: output.X[:] = embedding.X if self.output_embedding_role == OWMDS.AddAttrRole: output.X[:, -2:] = embedding.X elif self.output_embedding_role == OWMDS.MetaRole: output.metas[:, -2:] = embedding.X self.send("Data", output) if output is not None and self._selection_mask is not None and \ numpy.any(self._selection_mask): subset = output[self._selection_mask] else: subset = None self.send("Selected Data", subset) def onDeleteWidget(self): super().onDeleteWidget() self._clear_plot() self._clear() def __selection_end(self, path): self.select(path) self._pen_data = None self._update_plot() self._invalidate_output() def select(self, region): item = self._scatter_item if item is None: return indices = numpy.array( [spot.data() for spot in item.points() if region.contains(spot.pos())], dtype=int) if not QtGui.QApplication.keyboardModifiers(): self._selection_mask = None self.select_indices(indices, QtGui.QApplication.keyboardModifiers()) def select_indices(self, indices, modifiers=Qt.NoModifier): if self.data is None: return if self._selection_mask is None or \ not modifiers & (Qt.ControlModifier | Qt.ShiftModifier | Qt.AltModifier): self._selection_mask = numpy.zeros(len(self.data), dtype=bool) if modifiers & Qt.AltModifier: self._selection_mask[indices] = False elif modifiers & Qt.ControlModifier: self._selection_mask[indices] = ~self._selection_mask[indices] else: self._selection_mask[indices] = True def send_report(self): if self.data is None: return self.report_plot() caption = report.render_items_vert(( ("Color", self.color_value != "Same color" and self.color_value), ("Shape", self.shape_value != "Same shape" and self.shape_value), ("Size", self.size_value != "Same size" and self.size_value), ("Labels", self.label_value != "No labels" and self.label_value))) if caption: self.report_caption(caption) self.report_items((("Output", self.output_combo.currentText()),))
class OWDistributions(widget.OWWidget): name = "Distributions" description = "Display value distributions of a data feature in a graph." icon = "icons/Distribution.svg" priority = 100 inputs = [InputSignal("Data", Orange.data.Table, "set_data", doc="Set the input data set")] settingsHandler = settings.DomainContextHandler( match_values=settings.DomainContextHandler.MATCH_VALUES_ALL) #: Selected variable index variable_idx = settings.ContextSetting(-1) #: Selected group variable groupvar_idx = settings.ContextSetting(0) relative_freq = settings.Setting(False) disc_cont = settings.Setting(False) smoothing_index = settings.Setting(5) show_prob = settings.ContextSetting(0) want_graph = True ASH_HIST = 50 bins = [ 2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50 ] smoothing_facs = list(reversed([ 0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 4, 6, 10 ])) def __init__(self): super().__init__() self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.widgetBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.varview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.varview.setSizePolicy( QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) box = gui.widgetBox(self.controlArea, "Precision") gui.separator(self.controlArea, 4, 4) box2 = gui.widgetBox(box, orientation="horizontal") self.l_smoothing_l = gui.widgetLabel(box2, "Smooth") gui.hSlider(box2, self, "smoothing_index", minValue=0, maxValue=len(self.smoothing_facs) - 1, callback=self._on_set_smoothing, createLabel=False) self.l_smoothing_r = gui.widgetLabel(box2, "Precise") self.cb_disc_cont = gui.checkBox( gui.indentedBox(box, sep=4), self, "disc_cont", "Bin continuous variables", callback=self._on_groupvar_idx_changed) box = gui.widgetBox(self.controlArea, "Group by") self.icons = gui.attributeIconDict self.groupvarview = gui.comboBox(box, self, "groupvar_idx", callback=self._on_groupvar_idx_changed, valueType=str, contentsLength=12) box2 = gui.indentedBox(box, sep=4) self.cb_rel_freq = gui.checkBox( box2, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed) gui.separator(box2) self.cb_prob = gui.comboBox( box2, self, "show_prob", label="Show probabilities", orientation="horizontal", callback=self._on_relative_freq_changed) plotview = pg.PlotWidget(background=None) self.mainArea.layout().addWidget(plotview) w = QtGui.QLabel() w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.plot = pg.PlotItem() self.plot.hideButtons() plotview.setCentralItem(self.plot) self.plot_prob = pg.ViewBox() self.plot.hideAxis('right') self.plot.scene().addItem(self.plot_prob) self.plot.getAxis("right").linkToView(self.plot_prob) self.plot.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.plot) self.update_views() self.plot.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0,1]) def disable_mouse(plot): plot.setMouseEnabled(False, False) plot.setMenuEnabled(False) disable_mouse(self.plot.getViewBox()) disable_mouse(self.plot_prob) pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text)) for axis in ("left", "bottom"): self.plot.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot.getViewBox()) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) self.graphButton.clicked.connect(self.save_graph) def update_views(self): self.plot_prob.setGeometry(self.plot.vb.sceneBoundingRect()) self.plot_prob.linkedViewChanged(self.plot.vb, self.plot_prob.XAxis) def set_data(self, data): self.closeContext() self.clear() self.data = data if self.data is not None: domain = self.data.domain self.varmodel[:] = list(domain) self.groupvarview.clear() self.groupvarmodel = \ ["(None)"] + [var for var in domain if var.is_discrete] self.groupvarview.addItem("(None)") for var in self.groupvarmodel[1:]: self.groupvarview.addItem(self.icons[var], var.name) if domain.has_discrete_class: self.groupvar_idx = \ self.groupvarmodel.index(domain.class_var) self.openContext(domain) self.variable_idx = min(max(self.variable_idx, 0), len(self.varmodel) - 1) self.groupvar_idx = min(max(self.groupvar_idx, 0), len(self.groupvarmodel) - 1) itemmodels.select_row(self.varview, self.variable_idx) self._setup() def clear(self): self.plot.clear() self.plot_prob.clear() self.varmodel[:] = [] self.groupvarmodel = [] self.variable_idx = -1 self.groupvar_idx = 0 self._legend.clear() self._legend.hide() def _setup_smoothing(self): if not self.disc_cont and self.var and self.var.is_continuous: self.cb_disc_cont.setText("Bin continuous variables") self.l_smoothing_l.setText("Smooth") self.l_smoothing_r.setText("Precise") else: self.cb_disc_cont.setText("Bin continuous variables into {} bins". format(self.bins[self.smoothing_index])) self.l_smoothing_l.setText(" " + str(self.bins[0])) self.l_smoothing_r.setText(" " + str(self.bins[-1])) def _setup(self): self.plot.clear() self.plot_prob.clear() self._legend.clear() self._legend.hide() varidx = self.variable_idx self.var = self.cvar = None if varidx >= 0: self.var = self.varmodel[varidx] if self.groupvar_idx > 0: self.cvar = self.groupvarmodel[self.groupvar_idx] self.cb_prob.clear() self.cb_prob.addItem("(None)") self.cb_prob.addItems(self.cvar.values) self.cb_prob.addItem("(All)") self.show_prob = min(max(self.show_prob, 0), len(self.cvar.values) + 1) data = self.data self._setup_smoothing() if self.var is None: return if self.disc_cont: data = self.data[:, (self.var, self.cvar) if self.cvar else self.var ] disc = Orange.preprocess.discretize.EqualWidth(n=self.bins[self.smoothing_index]) data = Orange.preprocess.Discretize(data, method=disc) self.var = data.domain.variables[0] self.set_left_axis_name() self.enable_disable_rel_freq() if self.cvar: self.contingencies = \ contingency.get_contingency(data, self.var, self.cvar) self.display_contingency() else: self.distributions = \ distribution.get_distribution(data, self.var) self.display_distribution() self.plot.autoRange() def display_distribution(self): dist = self.distributions var = self.var assert len(dist) > 0 self.plot.clear() self.plot_prob.clear() self.plot.hideAxis('right') bottomaxis = self.plot.getAxis("bottom") bottomaxis.setLabel(var.name) self.set_left_axis_name() if var and var.is_continuous: bottomaxis.setTicks(None) edges, curve = ash_curve(dist, None, m=OWDistributions.ASH_HIST, smoothing_factor=self.smoothing_facs[self.smoothing_index]) edges = edges + (edges[1] - edges[0])/2 edges = edges[:-1] item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(Qt.white), 3) pen.setCosmetic(True) item.setData(edges, curve, antialias=True, stepMode=False, fillLevel=0, brush=QtGui.QBrush(Qt.gray), pen=pen) self.plot.addItem(item) else: bottomaxis.setTicks([list(enumerate(var.values))]) for i, w in enumerate(dist): geom = QtCore.QRectF(i - 0.33, 0, 0.66, w) item = DistributionBarItem(geom, [1.0], [QtGui.QColor(128, 128, 128)]) self.plot.addItem(item) def _on_relative_freq_changed(self): self.set_left_axis_name() if self.cvar and self.cvar.is_discrete: self.display_contingency() else: self.display_distribution() self.plot.autoRange() def display_contingency(self): """ Set the contingency to display. """ cont = self.contingencies var, cvar = self.var, self.cvar assert len(cont) > 0 self.plot.clear() self.plot_prob.clear() self._legend.clear() if self.show_prob: self.plot.showAxis('right') else: self.plot.hideAxis('right') bottomaxis = self.plot.getAxis("bottom") bottomaxis.setLabel(var.name) cvar_values = cvar.values palette = colorpalette.ColorPaletteGenerator(len(cvar_values)) colors = [palette[i].lighter() for i in range(len(cvar_values))] if var and var.is_continuous: bottomaxis.setTicks(None) weights, cols, cvar_values, curves = [], [], [], [] for i, dist in enumerate(cont): v, W = dist if len(v): weights.append(numpy.sum(W)) cols.append(colors[i]) cvar_values.append(cvar.values[i]) curves.append(ash_curve(dist, cont, m=OWDistributions.ASH_HIST, smoothing_factor=self.smoothing_facs[self.smoothing_index])) weights = numpy.array(weights) sumw = numpy.sum(weights) weights /= sumw colors = cols curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)] ncval = len(cvar_values) curvesline = [] #from histograms to lines for (X,Y) in curves: X = X + (X[1] - X[0])/2 X = X[:-1] X = numpy.array(X) Y = numpy.array(Y) curvesline.append((X,Y)) for t in [ "fill", "line" ]: for (X, Y), color, w in reversed(list(zip(curvesline, colors, weights))): item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(color), 3) pen.setCosmetic(True) color = QtGui.QColor(color) color.setAlphaF(0.2) item.setData(X, Y/(w if self.relative_freq else 1), antialias=True, stepMode=False, fillLevel=0 if t == "fill" else None, brush=QtGui.QBrush(color), pen=pen) self.plot.addItem(item) if self.show_prob: M_EST = 5 #for M estimate all_X = numpy.array(numpy.unique(numpy.hstack([X for X,_ in curvesline]))) inter_X = numpy.array(numpy.linspace(all_X[0], all_X[-1], len(all_X)*2)) curvesinterp = [ numpy.interp(inter_X, X, Y) for (X,Y) in curvesline ] sumprob = numpy.sum(curvesinterp, axis=0) # allcorrection = M_EST/sumw*numpy.sum(sumprob)/len(inter_X) legal = sumprob > 0.05 * numpy.max(sumprob) i = len(curvesinterp) + 1 show_all = self.show_prob == i for Y, color in reversed(list(zip(curvesinterp, colors))): i -= 1 if show_all or self.show_prob == i: item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(color), 3, style=QtCore.Qt.DotLine) pen.setCosmetic(True) #prob = (Y+allcorrection/ncval)/(sumprob+allcorrection) prob = Y[legal] / sumprob[legal] item.setData(inter_X[legal], prob, antialias=True, stepMode=False, fillLevel=None, brush=None, pen=pen) self.plot_prob.addItem(item) elif var and var.is_discrete: bottomaxis.setTicks([list(enumerate(var.values))]) cont = numpy.array(cont) ncval = len(cvar_values) maxh = 0 #maximal column height maxrh = 0 #maximal relative column height scvar = cont.sum(axis=1) #a cvar with sum=0 with allways have distribution counts 0, #therefore we can divide it by anything scvar[scvar==0] = 1 for i, (value, dist) in enumerate(zip(var.values, cont.T)): maxh = max(maxh, max(dist)) maxrh = max(maxrh, max(dist/scvar)) for i, (value, dist) in enumerate(zip(var.values, cont.T)): dsum = sum(dist) geom = QtCore.QRectF(i - 0.333, 0, 0.666, maxrh if self.relative_freq else maxh) if self.show_prob: prob = dist / dsum ci = 1.96 * numpy.sqrt(prob * (1 - prob) / dsum) else: ci = None item = DistributionBarItem(geom, dist/scvar/maxrh if self.relative_freq else dist/maxh, colors) self.plot.addItem(item) if self.show_prob: for ic, a in enumerate(dist): if self.show_prob - 1 != ic and \ self.show_prob - 1 != len(dist): continue position = -0.333 + ((ic+0.5)*0.666/len(dist)) if dsum < 1e-6: continue prob = a / dsum if not 1e-6 < prob < 1 - 1e-6: continue ci = 1.96 * sqrt(prob * (1 - prob) / dsum) mark = pg.ScatterPlotItem() bar = pg.ErrorBarItem() pen = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0)), 1) pen.setCosmetic(True) bar.setData(x=[i+position], y=[prob], bottom=min(numpy.array([ci]), prob), top=min(numpy.array([ci]), 1 - prob), beam=numpy.array([0.05]), brush=QtGui.QColor(1), pen=pen) mark.setData([i+position], [prob], antialias=True, symbol="o", fillLevel=None, pxMode=True, size=10, brush=QtGui.QColor(colors[ic]), pen=pen) self.plot_prob.addItem(bar) self.plot_prob.addItem(mark) for color, name in zip(colors, cvar_values): self._legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, shape="s"), escape(name) ) self._legend.show() def set_left_axis_name(self): set_label = self.plot.getAxis("left").setLabel if self.var and self.var.is_continuous: set_label(["Density", "Relative density"] [self.cvar is not None and self.relative_freq]) else: set_label(["Frequency", "Relative frequency"] [self.cvar is not None and self.relative_freq]) def enable_disable_rel_freq(self): self.cb_prob.setDisabled(self.var is None or self.cvar is None) self.cb_rel_freq.setDisabled( self.var is None or self.cvar is None) def _on_variable_idx_changed(self): self.variable_idx = selected_index(self.varview) self._setup() def _on_groupvar_idx_changed(self): self._setup() def _on_set_smoothing(self): self._setup() def onDeleteWidget(self): self.plot.clear() super().onDeleteWidget() def save_graph(self): from Orange.widgets.data.owsave import OWSave save_img = OWSave(data=self.plot, file_formats=FileFormat.img_writers) save_img.exec_()
def _setup_plot(self): have_data = self.data is not None have_matrix_transposed = self.matrix is not None and not self.matrix.axis plotstyle = mdsplotutils.plotstyle size = self._effective_matrix.shape[0] def column(data, variable): a, _ = data.get_column_view(variable) return a.ravel() def attributes(matrix): return matrix.row_items.domain.attributes def scale(a): dmin, dmax = numpy.nanmin(a), numpy.nanmax(a) if dmax - dmin > 0: return (a - dmin) / (dmax - dmin) else: return numpy.zeros_like(a) def jitter(x, factor=1, rstate=None): if rstate is None: rstate = numpy.random.RandomState() elif not isinstance(rstate, numpy.random.RandomState): rstate = numpy.random.RandomState(rstate) span = numpy.nanmax(x) - numpy.nanmin(x) if span < numpy.finfo(x.dtype).eps * 100: span = 1 a = factor * span / 100. return x + (rstate.random_sample(x.shape) - 0.5) * a if self._pen_data is None: if self._selection_mask is not None: pointflags = numpy.where( self._selection_mask, mdsplotutils.Selected, mdsplotutils.NoFlags) else: pointflags = None color_index = self.cb_color_value.currentIndex() if have_data and color_index > 0: color_var = self.colorvar_model[color_index] if color_var.is_discrete: palette = colorpalette.ColorPaletteGenerator( len(color_var.values) ) plotstyle = plotstyle.updated(discrete_palette=palette) else: palette = None color_data = mdsplotutils.color_data( self.data, color_var, plotstyle=plotstyle) color_data = numpy.hstack( (color_data, numpy.full((len(color_data), 1), self.symbol_opacity, dtype=float)) ) pen_data = mdsplotutils.pen_data(color_data * 0.8, pointflags) brush_data = mdsplotutils.brush_data(color_data) elif have_matrix_transposed and \ self.colorvar_model[color_index] == 'Attribute names': attr = attributes(self.matrix) palette = colorpalette.ColorPaletteGenerator(len(attr)) color_data = [palette.getRGB(i) for i in range(len(attr))] color_data = numpy.hstack(( color_data, numpy.full((len(color_data), 1), self.symbol_opacity, dtype=float)) ) pen_data = mdsplotutils.pen_data(color_data * 0.8, pointflags) brush_data = mdsplotutils.brush_data(color_data) else: pen_data = make_pen(QtGui.QColor(Qt.darkGray), cosmetic=True) if self._selection_mask is not None: pen_data = numpy.array( [pen_data, plotstyle.selected_pen]) pen_data = pen_data[self._selection_mask.astype(int)] else: pen_data = numpy.full(self._effective_matrix.dim, pen_data, dtype=object) brush_data = numpy.full( size, pg.mkColor((192, 192, 192, self.symbol_opacity)), dtype=object) if self._subset_mask is not None and have_data and \ self._subset_mask.shape == (size, ): # clear brush fill for non subset data brush_data[~self._subset_mask] = QtGui.QBrush(Qt.NoBrush) self._pen_data = pen_data self._brush_data = brush_data if self._shape_data is None: shape_index = self.cb_shape_value.currentIndex() if have_data and shape_index > 0: Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) shape_var = self.shapevar_model[shape_index] data = column(self.data, shape_var).astype(numpy.float) data = data % (len(Symbols) - 1) data[numpy.isnan(data)] = len(Symbols) - 1 shape_data = symbols[data.astype(int)] elif have_matrix_transposed and \ self.shapevar_model[shape_index] == 'Attribute names': Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) attr = [i % (len(Symbols) - 1) for i, _ in enumerate(attributes(self.matrix))] shape_data = symbols[attr] else: shape_data = "o" self._shape_data = shape_data if self._size_data is None: MinPointSize = 3 point_size = self.symbol_size + MinPointSize size_index = self.cb_size_value.currentIndex() if have_data and size_index == 1: # size by stress size_data = stress(self.embedding, self._effective_matrix) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size elif have_data and size_index > 0: size_var = self.sizevar_model[size_index] size_data = column(self.data, size_var) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size else: size_data = point_size self._size_data = size_data if self._label_data is None: label_index = self.cb_label_value.currentIndex() if have_data and label_index > 0: label_var = self.labelvar_model[label_index] label_data = column(self.data, label_var) label_data = [label_var.str_val(val) for val in label_data] label_items = [pg.TextItem(text, anchor=(0.5, 0), color=0.0) for text in label_data] elif have_matrix_transposed and \ self.labelvar_model[label_index] == 'Attribute names': attr = attributes(self.matrix) label_items = [pg.TextItem(str(text), anchor=(0.5, 0)) for text in attr] else: label_items = None self._label_data = label_items emb_x, emb_y = self.embedding[:, 0], self.embedding[:, 1] if self.jitter > 0: _, jitter_factor = self.JitterAmount[self.jitter] emb_x = jitter(emb_x, jitter_factor, rstate=42) emb_y = jitter(emb_y, jitter_factor, rstate=667) if self.connected_pairs and self.__draw_similar_pairs: if self._similar_pairs is None: # This code requires storing lower triangle of X (n x n / 2 # doubles), n x n / 2 * 2 indices to X, n x n / 2 indices for # argsort result. If this becomes an issue, it can be reduced to # n x n argsort indices by argsorting the entire X. Then we # take the first n + 2 * p indices. We compute their coordinates # i, j in the original matrix. We keep those for which i < j. # n + 2 * p will suffice to exclude the diagonal (i = j). If the # number of those for which i < j is smaller than p, we instead # take i > j. Among those that remain, we take the first p. # Assuming that MDS can't show so many points that memory could # become an issue, I preferred using simpler code. m = self._effective_matrix n = len(m) p = (n * (n - 1) // 2 * self.connected_pairs) // 100 indcs = numpy.triu_indices(n, 1) sorted = numpy.argsort(m[indcs])[:p] self._similar_pairs = fpairs = numpy.empty(2 * p, dtype=int) fpairs[::2] = indcs[0][sorted] fpairs[1::2] = indcs[1][sorted] for i in range(int(len(emb_x[self._similar_pairs]) / 2)): item = QtGui.QGraphicsLineItem( emb_x[self._similar_pairs][i * 2], emb_y[self._similar_pairs][i * 2], emb_x[self._similar_pairs][i * 2 + 1], emb_y[self._similar_pairs][i * 2 + 1] ) pen = QtGui.QPen(QtGui.QBrush(QtGui.QColor(204, 204, 204)), 2) pen.setCosmetic(True) item.setPen(pen) self.plot.addItem(item) data = numpy.arange(size) self._scatter_item = item = ScatterPlotItem( x=emb_x, y=emb_y, pen=self._pen_data, brush=self._brush_data, symbol=self._shape_data, size=self._size_data, data=data, antialias=True ) self.plot.addItem(item) if self._label_data is not None: if self.label_only_selected: if self._selection_mask is not None: for (x, y), text_item, selected \ in zip(self.embedding, self._label_data, self._selection_mask): if selected: self.plot.addItem(text_item) text_item.setPos(x, y) else: for (x, y), text_item in zip(self.embedding, self._label_data): self.plot.addItem(text_item) text_item.setPos(x, y) self._legend_item = LegendItem() viewbox = self.plot.getViewBox() self._legend_item.setParentItem(self.plot.getViewBox()) self._legend_item.setZValue(viewbox.zValue() + 10) self._legend_item.restoreAnchor(self.legend_anchor) color_var = shape_var = None color_index = self.cb_color_value.currentIndex() if have_data and 1 <= color_index < len(self.colorvar_model): color_var = self.colorvar_model[color_index] assert isinstance(color_var, Orange.data.Variable) shape_index = self.cb_shape_value.currentIndex() if have_data and 1 <= shape_index < len(self.shapevar_model): shape_var = self.shapevar_model[shape_index] assert isinstance(shape_var, Orange.data.Variable) if shape_var is not None or \ (color_var is not None and color_var.is_discrete): legend_data = mdsplotutils.legend_data( color_var, shape_var, plotstyle=plotstyle) for color, symbol, text in legend_data: self._legend_item.addItem( ScatterPlotItem(pen=color, brush=color, symbol=symbol, size=10), escape(text) ) else: self._legend_item.hide()
class OWDistributions(widget.OWWidget): name = "Distributions" description = "Display value distributions of a data feature in a graph." icon = "icons/Distribution.svg" priority = 120 class Inputs: data = Input("Data", Orange.data.Table, doc="Set the input data set") settingsHandler = settings.DomainContextHandler( match_values=settings.DomainContextHandler.MATCH_VALUES_ALL) #: Selected variable index variable_idx = settings.ContextSetting(-1) #: Selected group variable groupvar_idx = settings.ContextSetting(0) relative_freq = settings.Setting(False) disc_cont = settings.Setting(False) smoothing_index = settings.Setting(5) show_prob = settings.ContextSetting(0) graph_name = "plot" ASH_HIST = 50 bins = [2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50] smoothing_facs = list(reversed([0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 4, 6, 10])) def __init__(self): super().__init__() self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.vBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.varview = QListView( selectionMode=QListView.SingleSelection) self.varview.setSizePolicy( QSizePolicy.Minimum, QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) box = gui.vBox(self.controlArea, "Precision") gui.separator(self.controlArea, 4, 4) box2 = gui.hBox(box) self.l_smoothing_l = gui.widgetLabel(box2, "Smooth") gui.hSlider(box2, self, "smoothing_index", minValue=0, maxValue=len(self.smoothing_facs) - 1, callback=self._on_set_smoothing, createLabel=False) self.l_smoothing_r = gui.widgetLabel(box2, "Precise") self.cb_disc_cont = gui.checkBox( gui.indentedBox(box, sep=4), self, "disc_cont", "Bin numeric variables", callback=self._on_groupvar_idx_changed, tooltip="Show numeric variables as categorical.") box = gui.vBox(self.controlArea, "Group by") self.icons = gui.attributeIconDict self.groupvarview = gui.comboBox( box, self, "groupvar_idx", callback=self._on_groupvar_idx_changed, valueType=str, contentsLength=12) box2 = gui.indentedBox(box, sep=4) self.cb_rel_freq = gui.checkBox( box2, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed, tooltip="Normalize probabilities so that probabilities " "for each group-by value sum to 1.") gui.separator(box2) self.cb_prob = gui.comboBox( box2, self, "show_prob", label="Show probabilities:", orientation=Qt.Horizontal, callback=self._on_relative_freq_changed, tooltip="Show probabilities for a chosen group-by value " "(at each point probabilities for all group-by values sum to 1).") self.plotview = pg.PlotWidget(background=None) self.plotview.setRenderHint(QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) w = QLabel() w.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.ploti = pg.PlotItem() self.plot = self.ploti.vb self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_prob = pg.ViewBox() self.ploti.hideAxis('right') self.ploti.scene().addItem(self.plot_prob) self.ploti.getAxis("right").linkToView(self.plot_prob) self.ploti.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.ploti) self.update_views() self.ploti.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0, 1]) def disable_mouse(plot): plot.setMouseEnabled(False, False) plot.setMenuEnabled(False) disable_mouse(self.plot) disable_mouse(self.plot_prob) self.tooltip_items = [] self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) pen = QPen(self.palette().color(QPalette.Text)) for axis in ("left", "bottom"): self.ploti.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) def update_views(self): self.plot_prob.setGeometry(self.plot.sceneBoundingRect()) self.plot_prob.linkedViewChanged(self.plot, self.plot_prob.XAxis) @Inputs.data def set_data(self, data): self.closeContext() self.clear() self.warning() self.data = data self.distributions = None self.contingencies = None if self.data is not None: if not self.data: self.warning("Empty input data cannot be visualized") return domain = self.data.domain self.varmodel[:] = list(domain.variables) + \ [meta for meta in domain.metas if meta.is_continuous or meta.is_discrete] self.groupvarview.clear() self.groupvarmodel = \ ["(None)"] + [var for var in domain.variables if var.is_discrete] + \ [meta for meta in domain.metas if meta.is_discrete] self.groupvarview.addItem("(None)") for var in self.groupvarmodel[1:]: self.groupvarview.addItem(self.icons[var], var.name) if domain.has_discrete_class: self.groupvar_idx = \ self.groupvarmodel[1:].index(domain.class_var) + 1 self.openContext(domain) self.variable_idx = min(max(self.variable_idx, 0), len(self.varmodel) - 1) self.groupvar_idx = min(max(self.groupvar_idx, 0), len(self.groupvarmodel) - 1) itemmodels.select_row(self.varview, self.variable_idx) self._setup() def clear(self): self.plot.clear() self.plot_prob.clear() self.varmodel[:] = [] self.groupvarmodel = [] self.variable_idx = -1 self.groupvar_idx = 0 self._legend.clear() self._legend.hide() self.groupvarview.clear() self.cb_prob.clear() def _setup_smoothing(self): if not self.disc_cont and self.var and self.var.is_continuous: self.cb_disc_cont.setText("Bin numeric variables") self.l_smoothing_l.setText("Smooth") self.l_smoothing_r.setText("Precise") else: self.cb_disc_cont.setText("Bin numeric variables into {} bins". format(self.bins[self.smoothing_index])) self.l_smoothing_l.setText(" " + str(self.bins[0])) self.l_smoothing_r.setText(" " + str(self.bins[-1])) @property def smoothing_factor(self): return self.smoothing_facs[self.smoothing_index] def _setup(self): self.plot.clear() self.plot_prob.clear() self._legend.clear() self._legend.hide() varidx = self.variable_idx self.var = self.cvar = None if varidx >= 0: self.var = self.varmodel[varidx] if self.groupvar_idx > 0: self.cvar = self.groupvarmodel[self.groupvar_idx] self.cb_prob.clear() self.cb_prob.addItem("(None)") self.cb_prob.addItems(self.cvar.values) self.cb_prob.addItem("(All)") self.show_prob = min(max(self.show_prob, 0), len(self.cvar.values) + 1) data = self.data self._setup_smoothing() if self.var is None: return if self.disc_cont: domain = Orange.data.Domain( [self.var, self.cvar] if self.cvar else [self.var]) data = Orange.data.Table(domain, data) disc = EqualWidth(n=self.bins[self.smoothing_index]) data = Discretize(method=disc, remove_const=False)(data) self.var = data.domain[0] self.set_left_axis_name() self.enable_disable_rel_freq() if self.cvar: self.contingencies = \ contingency.get_contingency(data, self.var, self.cvar) self.display_contingency() else: self.distributions = \ distribution.get_distribution(data, self.var) self.display_distribution() self.plot.autoRange() def help_event(self, ev): self.plot.mapSceneToView(ev.scenePos()) ctooltip = [] for vb, item in self.tooltip_items: mouse_over_curve = isinstance(item, pg.PlotCurveItem) \ and item.mouseShape().contains(vb.mapSceneToView(ev.scenePos())) mouse_over_bar = isinstance(item, DistributionBarItem) \ and item.boundingRect().contains(vb.mapSceneToView(ev.scenePos())) if mouse_over_curve or mouse_over_bar: ctooltip.append(item.tooltip) if ctooltip: QToolTip.showText(ev.screenPos(), "\n\n".join(ctooltip), widget=self.plotview) return True return False def display_distribution(self): dist = self.distributions var = self.var if dist is None or not len(dist): return self.plot.clear() self.plot_prob.clear() self.ploti.hideAxis('right') self.tooltip_items = [] bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(var.name) bottomaxis.resizeEvent() self.set_left_axis_name() if var and var.is_continuous: bottomaxis.setTicks(None) if not len(dist[0]): return edges, curve = ash_curve(dist, None, m=OWDistributions.ASH_HIST, smoothing_factor=self.smoothing_factor) edges = edges + (edges[1] - edges[0])/2 edges = edges[:-1] item = pg.PlotCurveItem() pen = QPen(QBrush(Qt.white), 3) pen.setCosmetic(True) item.setData(edges, curve, antialias=True, stepMode=False, fillLevel=0, brush=QBrush(Qt.gray), pen=pen) self.plot.addItem(item) item.tooltip = "Density" self.tooltip_items.append((self.plot, item)) else: bottomaxis.setTicks([list(enumerate(var.values))]) for i, w in enumerate(dist): geom = QRectF(i - 0.33, 0, 0.66, w) item = DistributionBarItem(geom, [1.0], [QColor(128, 128, 128)]) self.plot.addItem(item) item.tooltip = "Frequency for %s: %r" % (var.values[i], w) self.tooltip_items.append((self.plot, item)) def _on_relative_freq_changed(self): self.set_left_axis_name() if self.cvar and self.cvar.is_discrete: self.display_contingency() else: self.display_distribution() self.plot.autoRange() def display_contingency(self): """ Set the contingency to display. """ cont = self.contingencies var, cvar = self.var, self.cvar if cont is None or not len(cont): return self.plot.clear() self.plot_prob.clear() self._legend.clear() self.tooltip_items = [] if self.show_prob: self.ploti.showAxis('right') else: self.ploti.hideAxis('right') bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(var.name) bottomaxis.resizeEvent() cvar_values = cvar.values colors = [QColor(*col) for col in cvar.colors] if var and var.is_continuous: bottomaxis.setTicks(None) weights, cols, cvar_values, curves = [], [], [], [] for i, dist in enumerate(cont): v, W = dist if len(v): weights.append(numpy.sum(W)) cols.append(colors[i]) cvar_values.append(cvar.values[i]) curves.append(ash_curve( dist, cont, m=OWDistributions.ASH_HIST, smoothing_factor=self.smoothing_factor)) weights = numpy.array(weights) sumw = numpy.sum(weights) weights /= sumw colors = cols curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)] curvesline = [] #from histograms to lines for X, Y in curves: X = X + (X[1] - X[0])/2 X = X[:-1] X = numpy.array(X) Y = numpy.array(Y) curvesline.append((X, Y)) for t in ["fill", "line"]: curve_data = list(zip(curvesline, colors, weights, cvar_values)) for (X, Y), color, w, cval in reversed(curve_data): item = pg.PlotCurveItem() pen = QPen(QBrush(color), 3) pen.setCosmetic(True) color = QColor(color) color.setAlphaF(0.2) item.setData(X, Y/(w if self.relative_freq else 1), antialias=True, stepMode=False, fillLevel=0 if t == "fill" else None, brush=QBrush(color), pen=pen) self.plot.addItem(item) if t == "line": item.tooltip = "{}\n{}={}".format( "Normalized density " if self.relative_freq else "Density ", cvar.name, cval) self.tooltip_items.append((self.plot, item)) if self.show_prob: all_X = numpy.array(numpy.unique(numpy.hstack([X for X, _ in curvesline]))) inter_X = numpy.array(numpy.linspace(all_X[0], all_X[-1], len(all_X)*2)) curvesinterp = [numpy.interp(inter_X, X, Y) for (X, Y) in curvesline] sumprob = numpy.sum(curvesinterp, axis=0) legal = sumprob > 0.05 * numpy.max(sumprob) i = len(curvesinterp) + 1 show_all = self.show_prob == i for Y, color, cval in reversed(list(zip(curvesinterp, colors, cvar_values))): i -= 1 if show_all or self.show_prob == i: item = pg.PlotCurveItem() pen = QPen(QBrush(color), 3, style=Qt.DotLine) pen.setCosmetic(True) prob = Y[legal] / sumprob[legal] item.setData( inter_X[legal], prob, antialias=True, stepMode=False, fillLevel=None, brush=None, pen=pen) self.plot_prob.addItem(item) item.tooltip = "Probability that \n" + cvar.name + "=" + cval self.tooltip_items.append((self.plot_prob, item)) elif var and var.is_discrete: bottomaxis.setTicks([list(enumerate(var.values))]) cont = numpy.array(cont) maxh = 0 #maximal column height maxrh = 0 #maximal relative column height scvar = cont.sum(axis=1) #a cvar with sum=0 with allways have distribution counts 0, #therefore we can divide it by anything scvar[scvar == 0] = 1 for i, (value, dist) in enumerate(zip(var.values, cont.T)): maxh = max(maxh, max(dist)) maxrh = max(maxrh, max(dist/scvar)) for i, (value, dist) in enumerate(zip(var.values, cont.T)): dsum = sum(dist) geom = QRectF(i - 0.333, 0, 0.666, maxrh if self.relative_freq else maxh) if self.show_prob: prob = dist / dsum ci = 1.96 * numpy.sqrt(prob * (1 - prob) / dsum) else: ci = None item = DistributionBarItem(geom, dist/scvar/maxrh if self.relative_freq else dist/maxh, colors) self.plot.addItem(item) tooltip = "\n".join( "%s: %.*f" % (n, 3 if self.relative_freq else 1, v) for n, v in zip(cvar_values, dist/scvar if self.relative_freq else dist)) item.tooltip = "{} ({}={}):\n{}".format( "Normalized frequency " if self.relative_freq else "Frequency ", cvar.name, value, tooltip) self.tooltip_items.append((self.plot, item)) if self.show_prob: item.tooltip += "\n\nProbabilities:" for ic, a in enumerate(dist): if self.show_prob - 1 != ic and \ self.show_prob - 1 != len(dist): continue position = -0.333 + ((ic+0.5)*0.666/len(dist)) if dsum < 1e-6: continue prob = a / dsum if not 1e-6 < prob < 1 - 1e-6: continue ci = 1.96 * sqrt(prob * (1 - prob) / dsum) item.tooltip += "\n%s: %.3f ± %.3f" % (cvar_values[ic], prob, ci) mark = pg.ScatterPlotItem() errorbar = pg.ErrorBarItem() pen = QPen(QBrush(QColor(0)), 1) pen.setCosmetic(True) errorbar.setData(x=[i+position], y=[prob], bottom=min(numpy.array([ci]), prob), top=min(numpy.array([ci]), 1 - prob), beam=numpy.array([0.05]), brush=QColor(1), pen=pen) mark.setData([i+position], [prob], antialias=True, symbol="o", fillLevel=None, pxMode=True, size=10, brush=QColor(colors[ic]), pen=pen) self.plot_prob.addItem(errorbar) self.plot_prob.addItem(mark) for color, name in zip(colors, cvar_values): self._legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, shape="s"), escape(name) ) self._legend.show() def set_left_axis_name(self): leftaxis = self.ploti.getAxis("left") set_label = leftaxis.setLabel if self.var and self.var.is_continuous: set_label(["Density", "Relative density"] [self.cvar is not None and self.relative_freq]) else: set_label(["Frequency", "Relative frequency"] [self.cvar is not None and self.relative_freq]) leftaxis.resizeEvent() def enable_disable_rel_freq(self): self.cb_prob.setDisabled(self.var is None or self.cvar is None) self.cb_rel_freq.setDisabled( self.var is None or self.cvar is None) def _on_variable_idx_changed(self): self.variable_idx = selected_index(self.varview) self._setup() def _on_groupvar_idx_changed(self): self._setup() def _on_set_smoothing(self): self._setup() def onDeleteWidget(self): self.plot.clear() super().onDeleteWidget() def get_widget_name_extension(self): if self.variable_idx >= 0: return self.varmodel[self.variable_idx] def send_report(self): self.plotview.scene().setSceneRect(self.plotview.sceneRect()) if self.variable_idx < 0: return self.report_plot() text = "Distribution of '{}'".format( self.varmodel[self.variable_idx]) if self.groupvar_idx: group_var = self.groupvarmodel[self.groupvar_idx] prob = self.cb_prob indiv_probs = 0 < prob.currentIndex() < prob.count() - 1 if not indiv_probs or self.relative_freq: text += " grouped by '{}'".format(group_var) if self.relative_freq: text += " (relative frequencies)" if indiv_probs: text += "; probabilites for '{}={}'".format( group_var, prob.currentText()) self.report_caption(text)
def __init__(self): super().__init__() self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.widgetBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.varview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.varview.setSizePolicy( QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) box = gui.widgetBox(self.controlArea, "Precision") gui.separator(self.controlArea, 4, 4) box2 = gui.widgetBox(box, orientation="horizontal") self.l_smoothing_l = gui.widgetLabel(box2, "Smooth") gui.hSlider(box2, self, "smoothing_index", minValue=0, maxValue=len(self.smoothing_facs) - 1, callback=self._on_set_smoothing, createLabel=False) self.l_smoothing_r = gui.widgetLabel(box2, "Precise") self.cb_disc_cont = gui.checkBox( gui.indentedBox(box, sep=4), self, "disc_cont", "Bin continuous variables", callback=self._on_groupvar_idx_changed) box = gui.widgetBox(self.controlArea, "Group by") self.icons = gui.attributeIconDict self.groupvarview = gui.comboBox(box, self, "groupvar_idx", callback=self._on_groupvar_idx_changed, valueType=str, contentsLength=12) box2 = gui.indentedBox(box, sep=4) self.cb_rel_freq = gui.checkBox( box2, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed) gui.separator(box2) self.cb_prob = gui.comboBox( box2, self, "show_prob", label="Show probabilities", orientation="horizontal", callback=self._on_relative_freq_changed) plotview = pg.PlotWidget(background=None) self.mainArea.layout().addWidget(plotview) w = QtGui.QLabel() w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.plot = pg.PlotItem() self.plot.hideButtons() plotview.setCentralItem(self.plot) self.plot_prob = pg.ViewBox() self.plot.hideAxis('right') self.plot.scene().addItem(self.plot_prob) self.plot.getAxis("right").linkToView(self.plot_prob) self.plot.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.plot) self.update_views() self.plot.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0,1]) def disable_mouse(plot): plot.setMouseEnabled(False, False) plot.setMenuEnabled(False) disable_mouse(self.plot.getViewBox()) disable_mouse(self.plot_prob) pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text)) for axis in ("left", "bottom"): self.plot.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot.getViewBox()) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) self.graphButton.clicked.connect(self.save_graph)
class OWDistributions(widget.OWWidget): name = "Distributions" description = "Display value distributions of a data feature in a graph." icon = "icons/Distribution.svg" priority = 100 inputs = [ InputSignal("Data", Orange.data.Table, "set_data", doc="Set the input data set") ] settingsHandler = settings.DomainContextHandler( match_values=settings.DomainContextHandler.MATCH_VALUES_ALL) #: Selected variable index variable_idx = settings.ContextSetting(-1) #: Selected group variable groupvar_idx = settings.ContextSetting(0) relative_freq = settings.Setting(False) disc_cont = settings.Setting(False) smoothing_index = settings.Setting(5) show_prob = settings.ContextSetting(0) graph_name = "plot" ASH_HIST = 50 bins = [2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50] smoothing_facs = list( reversed([0.1, 0.2, 0.4, 0.6, 0.8, 1, 1.5, 2, 4, 6, 10])) def __init__(self): super().__init__() self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.widgetBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.varview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.varview.setSizePolicy(QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) box = gui.widgetBox(self.controlArea, "Precision") gui.separator(self.controlArea, 4, 4) box2 = gui.widgetBox(box, orientation="horizontal") self.l_smoothing_l = gui.widgetLabel(box2, "Smooth") gui.hSlider(box2, self, "smoothing_index", minValue=0, maxValue=len(self.smoothing_facs) - 1, callback=self._on_set_smoothing, createLabel=False) self.l_smoothing_r = gui.widgetLabel(box2, "Precise") self.cb_disc_cont = gui.checkBox( gui.indentedBox(box, sep=4), self, "disc_cont", "Bin continuous variables", callback=self._on_groupvar_idx_changed, tooltip="Show continuous variables as discrete.") box = gui.widgetBox(self.controlArea, "Group by") self.icons = gui.attributeIconDict self.groupvarview = gui.comboBox( box, self, "groupvar_idx", callback=self._on_groupvar_idx_changed, valueType=str, contentsLength=12) box2 = gui.indentedBox(box, sep=4) self.cb_rel_freq = gui.checkBox( box2, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed, tooltip= "Normalize probabilities so that probabilities for each group-by value sum to 1." ) gui.separator(box2) self.cb_prob = gui.comboBox( box2, self, "show_prob", label="Show probabilities", orientation="horizontal", callback=self._on_relative_freq_changed, tooltip= "Show probabilities for a chosen group-by value (at each point probabilities for all group-by values sum to 1)." ) self.plotview = pg.PlotWidget(background=None) self.plotview.setRenderHint(QtGui.QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) w = QtGui.QLabel() w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.ploti = pg.PlotItem() self.plot = self.ploti.vb self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_prob = pg.ViewBox() self.ploti.hideAxis('right') self.ploti.scene().addItem(self.plot_prob) self.ploti.getAxis("right").linkToView(self.plot_prob) self.ploti.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.ploti) self.update_views() self.ploti.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0, 1]) self.inline_graph_report() def disable_mouse(plot): plot.setMouseEnabled(False, False) plot.setMenuEnabled(False) disable_mouse(self.plot) disable_mouse(self.plot_prob) self.tooltip_items = [] self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text)) for axis in ("left", "bottom"): self.ploti.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) def update_views(self): self.plot_prob.setGeometry(self.plot.sceneBoundingRect()) self.plot_prob.linkedViewChanged(self.plot, self.plot_prob.XAxis) def set_data(self, data): self.closeContext() self.clear() self.data = data if self.data is not None: domain = self.data.domain self.varmodel[:] = list(domain) self.groupvarview.clear() self.groupvarmodel = \ ["(None)"] + [var for var in domain if var.is_discrete] self.groupvarview.addItem("(None)") for var in self.groupvarmodel[1:]: self.groupvarview.addItem(self.icons[var], var.name) if domain.has_discrete_class: self.groupvar_idx = \ self.groupvarmodel[1:].index(domain.class_var) + 1 self.openContext(domain) self.variable_idx = min(max(self.variable_idx, 0), len(self.varmodel) - 1) self.groupvar_idx = min(max(self.groupvar_idx, 0), len(self.groupvarmodel) - 1) itemmodels.select_row(self.varview, self.variable_idx) self._setup() def clear(self): self.plot.clear() self.plot_prob.clear() self.varmodel[:] = [] self.groupvarmodel = [] self.variable_idx = -1 self.groupvar_idx = 0 self._legend.clear() self._legend.hide() def _setup_smoothing(self): if not self.disc_cont and self.var and self.var.is_continuous: self.cb_disc_cont.setText("Bin continuous variables") self.l_smoothing_l.setText("Smooth") self.l_smoothing_r.setText("Precise") else: self.cb_disc_cont.setText( "Bin continuous variables into {} bins".format( self.bins[self.smoothing_index])) self.l_smoothing_l.setText(" " + str(self.bins[0])) self.l_smoothing_r.setText(" " + str(self.bins[-1])) def _setup(self): self.plot.clear() self.plot_prob.clear() self._legend.clear() self._legend.hide() varidx = self.variable_idx self.var = self.cvar = None if varidx >= 0: self.var = self.varmodel[varidx] if self.groupvar_idx > 0: self.cvar = self.groupvarmodel[self.groupvar_idx] self.cb_prob.clear() self.cb_prob.addItem("(None)") self.cb_prob.addItems(self.cvar.values) self.cb_prob.addItem("(All)") self.show_prob = min(max(self.show_prob, 0), len(self.cvar.values) + 1) data = self.data self._setup_smoothing() if self.var is None: return if self.disc_cont: data = self.data[:, (self.var, self.cvar) if self.cvar else self.var] disc = Orange.preprocess.discretize.EqualWidth( n=self.bins[self.smoothing_index]) data = Orange.preprocess.Discretize(data, method=disc, remove_const=False) self.var = data.domain[0] self.set_left_axis_name() self.enable_disable_rel_freq() if self.cvar: self.contingencies = \ contingency.get_contingency(data, self.var, self.cvar) self.display_contingency() else: self.distributions = \ distribution.get_distribution(data, self.var) self.display_distribution() self.plot.autoRange() def help_event(self, ev): in_graph_coor = self.plot.mapSceneToView(ev.scenePos()) ctooltip = [] for vb, item in self.tooltip_items: if isinstance(item, pg.PlotCurveItem) and item.mouseShape().contains( vb.mapSceneToView(ev.scenePos())): ctooltip.append(item.tooltip) elif isinstance( item, DistributionBarItem) and item.boundingRect().contains( vb.mapSceneToView(ev.scenePos())): ctooltip.append(item.tooltip) if ctooltip: QToolTip.showText(ev.screenPos(), "\n\n".join(ctooltip), widget=self.plotview) return True return False def display_distribution(self): dist = self.distributions var = self.var assert len(dist) > 0 self.plot.clear() self.plot_prob.clear() self.ploti.hideAxis('right') self.tooltip_items = [] bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(var.name) bottomaxis.resizeEvent() self.set_left_axis_name() if var and var.is_continuous: bottomaxis.setTicks(None) if not len(dist[0]): return edges, curve = ash_curve( dist, None, m=OWDistributions.ASH_HIST, smoothing_factor=self.smoothing_facs[self.smoothing_index]) edges = edges + (edges[1] - edges[0]) / 2 edges = edges[:-1] item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(Qt.white), 3) pen.setCosmetic(True) item.setData(edges, curve, antialias=True, stepMode=False, fillLevel=0, brush=QtGui.QBrush(Qt.gray), pen=pen) self.plot.addItem(item) item.tooltip = "Density" self.tooltip_items.append((self.plot, item)) else: bottomaxis.setTicks([list(enumerate(var.values))]) for i, w in enumerate(dist): geom = QtCore.QRectF(i - 0.33, 0, 0.66, w) item = DistributionBarItem(geom, [1.0], [QtGui.QColor(128, 128, 128)]) self.plot.addItem(item) item.tooltip = "Frequency for %s: %r" % (var.values[i], w) self.tooltip_items.append((self.plot, item)) def _on_relative_freq_changed(self): self.set_left_axis_name() if self.cvar and self.cvar.is_discrete: self.display_contingency() else: self.display_distribution() self.plot.autoRange() def display_contingency(self): """ Set the contingency to display. """ cont = self.contingencies var, cvar = self.var, self.cvar assert len(cont) > 0 self.plot.clear() self.plot_prob.clear() self._legend.clear() self.tooltip_items = [] if self.show_prob: self.ploti.showAxis('right') else: self.ploti.hideAxis('right') bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(var.name) bottomaxis.resizeEvent() cvar_values = cvar.values colors = [QtGui.QColor(*col) for col in cvar.colors] if var and var.is_continuous: bottomaxis.setTicks(None) weights, cols, cvar_values, curves = [], [], [], [] for i, dist in enumerate(cont): v, W = dist if len(v): weights.append(numpy.sum(W)) cols.append(colors[i]) cvar_values.append(cvar.values[i]) curves.append( ash_curve(dist, cont, m=OWDistributions.ASH_HIST, smoothing_factor=self.smoothing_facs[ self.smoothing_index])) weights = numpy.array(weights) sumw = numpy.sum(weights) weights /= sumw colors = cols curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)] ncval = len(cvar_values) curvesline = [] #from histograms to lines for (X, Y) in curves: X = X + (X[1] - X[0]) / 2 X = X[:-1] X = numpy.array(X) Y = numpy.array(Y) curvesline.append((X, Y)) for t in ["fill", "line"]: for (X, Y), color, w, cval in reversed( list(zip(curvesline, colors, weights, cvar_values))): item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(color), 3) pen.setCosmetic(True) color = QtGui.QColor(color) color.setAlphaF(0.2) item.setData(X, Y / (w if self.relative_freq else 1), antialias=True, stepMode=False, fillLevel=0 if t == "fill" else None, brush=QtGui.QBrush(color), pen=pen) self.plot.addItem(item) if t == "line": item.tooltip = ("Normalized density " if self.relative_freq else "Density ") \ + "\n"+ cvar.name + "=" + cval self.tooltip_items.append((self.plot, item)) if self.show_prob: M_EST = 5 #for M estimate all_X = numpy.array( numpy.unique(numpy.hstack([X for X, _ in curvesline]))) inter_X = numpy.array( numpy.linspace(all_X[0], all_X[-1], len(all_X) * 2)) curvesinterp = [ numpy.interp(inter_X, X, Y) for (X, Y) in curvesline ] sumprob = numpy.sum(curvesinterp, axis=0) # allcorrection = M_EST/sumw*numpy.sum(sumprob)/len(inter_X) legal = sumprob > 0.05 * numpy.max(sumprob) i = len(curvesinterp) + 1 show_all = self.show_prob == i for Y, color, cval in reversed( list(zip(curvesinterp, colors, cvar_values))): i -= 1 if show_all or self.show_prob == i: item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(color), 3, style=QtCore.Qt.DotLine) pen.setCosmetic(True) #prob = (Y+allcorrection/ncval)/(sumprob+allcorrection) prob = Y[legal] / sumprob[legal] item.setData(inter_X[legal], prob, antialias=True, stepMode=False, fillLevel=None, brush=None, pen=pen) self.plot_prob.addItem(item) item.tooltip = "Probability that \n" + cvar.name + "=" + cval self.tooltip_items.append((self.plot_prob, item)) elif var and var.is_discrete: bottomaxis.setTicks([list(enumerate(var.values))]) cont = numpy.array(cont) ncval = len(cvar_values) maxh = 0 #maximal column height maxrh = 0 #maximal relative column height scvar = cont.sum(axis=1) #a cvar with sum=0 with allways have distribution counts 0, #therefore we can divide it by anything scvar[scvar == 0] = 1 for i, (value, dist) in enumerate(zip(var.values, cont.T)): maxh = max(maxh, max(dist)) maxrh = max(maxrh, max(dist / scvar)) for i, (value, dist) in enumerate(zip(var.values, cont.T)): dsum = sum(dist) geom = QtCore.QRectF(i - 0.333, 0, 0.666, maxrh if self.relative_freq else maxh) if self.show_prob: prob = dist / dsum ci = 1.96 * numpy.sqrt(prob * (1 - prob) / dsum) else: ci = None item = DistributionBarItem( geom, dist / scvar / maxrh if self.relative_freq else dist / maxh, colors) self.plot.addItem(item) tooltip = "\n".join("%s: %.*f" % (n, 3 if self.relative_freq else 1, v) for n, v in zip( cvar_values, dist / scvar if self.relative_freq else dist)) item.tooltip = ("Normalized frequency " if self.relative_freq else "Frequency ") \ + "(" + cvar.name + "=" + value + "):" \ + "\n" + tooltip self.tooltip_items.append((self.plot, item)) if self.show_prob: item.tooltip += "\n\nProbabilities:" for ic, a in enumerate(dist): if self.show_prob - 1 != ic and \ self.show_prob - 1 != len(dist): continue position = -0.333 + ((ic + 0.5) * 0.666 / len(dist)) if dsum < 1e-6: continue prob = a / dsum if not 1e-6 < prob < 1 - 1e-6: continue ci = 1.96 * sqrt(prob * (1 - prob) / dsum) item.tooltip += "\n%s: %.3f ± %.3f" % (cvar_values[ic], prob, ci) mark = pg.ScatterPlotItem() bar = pg.ErrorBarItem() pen = QtGui.QPen(QtGui.QBrush(QtGui.QColor(0)), 1) pen.setCosmetic(True) bar.setData(x=[i + position], y=[prob], bottom=min(numpy.array([ci]), prob), top=min(numpy.array([ci]), 1 - prob), beam=numpy.array([0.05]), brush=QtGui.QColor(1), pen=pen) mark.setData([i + position], [prob], antialias=True, symbol="o", fillLevel=None, pxMode=True, size=10, brush=QtGui.QColor(colors[ic]), pen=pen) self.plot_prob.addItem(bar) self.plot_prob.addItem(mark) for color, name in zip(colors, cvar_values): self._legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, shape="s"), escape(name)) self._legend.show() def set_left_axis_name(self): leftaxis = self.ploti.getAxis("left") set_label = leftaxis.setLabel if self.var and self.var.is_continuous: set_label(["Density", "Relative density"][self.cvar is not None and self.relative_freq]) else: set_label(["Frequency", "Relative frequency"][self.cvar is not None and self.relative_freq]) leftaxis.resizeEvent() def enable_disable_rel_freq(self): self.cb_prob.setDisabled(self.var is None or self.cvar is None) self.cb_rel_freq.setDisabled(self.var is None or self.cvar is None) def _on_variable_idx_changed(self): self.variable_idx = selected_index(self.varview) self._setup() def _on_groupvar_idx_changed(self): self._setup() def _on_set_smoothing(self): self._setup() def onDeleteWidget(self): self.plot.clear() super().onDeleteWidget() def get_widget_name_extension(self): if self.variable_idx >= 0: return self.varmodel[self.variable_idx] def send_report(self): if self.variable_idx < 0: return self.report_plot() text = "Distribution of '{}'".format(self.varmodel[self.variable_idx]) if self.groupvar_idx: group_var = self.groupvarmodel[self.groupvar_idx] prob = self.cb_prob indiv_probs = 0 < prob.currentIndex() < prob.count() - 1 if not indiv_probs or self.relative_freq: text += " grouped by '{}'".format(group_var) if self.relative_freq: text += " (relative frequencies)" if indiv_probs: text += "; probabilites for '{}={}'".format( group_var, prob.currentText()) self.report_caption(text)
def __init__(self): super().__init__() self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.vBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.varview = QListView( selectionMode=QListView.SingleSelection) self.varview.setSizePolicy( QSizePolicy.Minimum, QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) box = gui.vBox(self.controlArea, "Precision") gui.separator(self.controlArea, 4, 4) box2 = gui.hBox(box) self.l_smoothing_l = gui.widgetLabel(box2, "Smooth") gui.hSlider(box2, self, "smoothing_index", minValue=0, maxValue=len(self.smoothing_facs) - 1, callback=self._on_set_smoothing, createLabel=False) self.l_smoothing_r = gui.widgetLabel(box2, "Precise") self.cb_disc_cont = gui.checkBox( gui.indentedBox(box, sep=4), self, "disc_cont", "Bin numeric variables", callback=self._on_groupvar_idx_changed, tooltip="Show numeric variables as categorical.") box = gui.vBox(self.controlArea, "Group by") self.icons = gui.attributeIconDict self.groupvarview = gui.comboBox( box, self, "groupvar_idx", callback=self._on_groupvar_idx_changed, valueType=str, contentsLength=12) box2 = gui.indentedBox(box, sep=4) self.cb_rel_freq = gui.checkBox( box2, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed, tooltip="Normalize probabilities so that probabilities " "for each group-by value sum to 1.") gui.separator(box2) self.cb_prob = gui.comboBox( box2, self, "show_prob", label="Show probabilities:", orientation=Qt.Horizontal, callback=self._on_relative_freq_changed, tooltip="Show probabilities for a chosen group-by value " "(at each point probabilities for all group-by values sum to 1).") self.plotview = pg.PlotWidget(background=None) self.plotview.setRenderHint(QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) w = QLabel() w.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.ploti = pg.PlotItem() self.plot = self.ploti.vb self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_prob = pg.ViewBox() self.ploti.hideAxis('right') self.ploti.scene().addItem(self.plot_prob) self.ploti.getAxis("right").linkToView(self.plot_prob) self.ploti.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.ploti) self.update_views() self.ploti.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0, 1]) def disable_mouse(plot): plot.setMouseEnabled(False, False) plot.setMenuEnabled(False) disable_mouse(self.plot) disable_mouse(self.plot_prob) self.tooltip_items = [] self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) pen = QPen(self.palette().color(QPalette.Text)) for axis in ("left", "bottom"): self.ploti.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot) self._legend.hide() self._legend.anchor((1, 0), (1, 0))
class OWMDS(widget.OWWidget): name = "MDS" description = "Two-dimensional data projection by multidimensional " \ "scaling constructed from a distance matrix." icon = "icons/MDS.svg" inputs = [("Data", Orange.data.Table, "set_data"), ("Distances", Orange.misc.DistMatrix, "set_disimilarity")] outputs = [("Data", Orange.data.Table, widget.Default), ("Data Subset", Orange.data.Table)] #: Initialization type PCA, Random = 0, 1 #: Refresh rate RefreshRate = [ ("Every iteration", 1), ("Every 5 steps", 5), ("Every 10 steps", 10), ("Every 25 steps", 25), ("Every 50 steps", 50), ("None", -1) ] #: Runtime state Running, Finished, Waiting = 1, 2, 3 settingsHandler = settings.DomainContextHandler() max_iter = settings.Setting(300) initialization = settings.Setting(PCA) refresh_rate = settings.Setting(3) # output embedding role. NoRole, AttrRole, MetaRole = 0, 1, 2 output_embedding_role = settings.Setting(1) autocommit = settings.Setting(True) color_index = settings.ContextSetting(0, not_attribute=True) shape_index = settings.ContextSetting(0, not_attribute=True) size_index = settings.ContextSetting(0, not_attribute=True) label_index = settings.ContextSetting(0, not_attribute=True) symbol_size = settings.Setting(8) symbol_opacity = settings.Setting(230) legend_anchor = settings.Setting(((1, 0), (1, 0))) def __init__(self, parent=None): super().__init__(parent) self.matrix = None self.data = None self.matrix_data = None self.signal_data = None self._pen_data = None self._shape_data = None self._size_data = None self._label_data = None self._scatter_item = None self._legend_item = None self._selection_mask = None self._invalidated = False self._effective_matrix = None self.__update_loop = None self.__state = OWMDS.Waiting self.__in_next_step = False box = gui.widgetBox(self.controlArea, "MDS Optimization") form = QtGui.QFormLayout( labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QtGui.QFormLayout.AllNonFixedFieldsGrow, ) form.addRow("Max iterations:", gui.spin(box, self, "max_iter", 10, 10 ** 4, step=1)) form.addRow("Initialization", gui.comboBox(box, self, "initialization", items=["PCA (Torgerson)", "Random"], callback=self.__invalidate_embedding)) box.layout().addLayout(form) form.addRow("Refresh", gui.comboBox( box, self, "refresh_rate", items=[t for t, _ in OWMDS.RefreshRate], callback=self.__invalidate_refresh)) self.runbutton = gui.button( box, self, "Run", callback=self._toggle_run) box = gui.widgetBox(self.controlArea, "Graph") self.colorvar_model = itemmodels.VariableListModel() cb = gui.comboBox(box, self, "color_index", box="Color", callback=self._on_color_index_changed) cb.setModel(self.colorvar_model) cb.box.setFlat(True) self.shapevar_model = itemmodels.VariableListModel() cb = gui.comboBox(box, self, "shape_index", box="Shape", callback=self._on_shape_index_changed) cb.setModel(self.shapevar_model) cb.box.setFlat(True) self.sizevar_model = itemmodels.VariableListModel() cb = gui.comboBox(box, self, "size_index", "Size", callback=self._on_size_index_changed) cb.setModel(self.sizevar_model) cb.box.setFlat(True) self.labelvar_model = itemmodels.VariableListModel() cb = gui.comboBox(box, self, "label_index", "Label", callback=self._on_label_index_changed) cb.setModel(self.labelvar_model) cb.box.setFlat(True) form = QtGui.QFormLayout( labelAlignment=Qt.AlignLeft, formAlignment=Qt.AlignLeft, fieldGrowthPolicy=QtGui.QFormLayout.AllNonFixedFieldsGrow, ) form.addRow("Symbol size", gui.hSlider(box, self, "symbol_size", minValue=1, maxValue=20, callback=self._on_size_index_changed, createLabel=False)) form.addRow("Symbol opacity", gui.hSlider(box, self, "symbol_opacity", minValue=100, maxValue=255, step=100, callback=self._on_color_index_changed, createLabel=False)) box.layout().addLayout(form) box = QtGui.QGroupBox("Zoom/Select", ) box.setLayout(QtGui.QHBoxLayout()) group = QtGui.QActionGroup(self, exclusive=True) def icon(name): path = "icons/Dlg_{}.png".format(name) path = pkg_resources.resource_filename(widget.__name__, path) return QtGui.QIcon(path) action_select = QtGui.QAction( "Select", self, checkable=True, checked=True, icon=icon("arrow"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_1)) action_zoom = QtGui.QAction( "Zoom", self, checkable=True, checked=False, icon=icon("zoom"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_2)) action_pan = QtGui.QAction( "Pan", self, checkable=True, checked=False, icon=icon("pan_hand"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_3)) action_reset_zoom = QtGui.QAction( "Zoom to fit", self, icon=icon("zoom_reset"), shortcut=QtGui.QKeySequence(Qt.ControlModifier + Qt.Key_0)) action_reset_zoom.triggered.connect( lambda: self.plot.autoRange()) group.addAction(action_select) group.addAction(action_zoom) group.addAction(action_pan) self.addActions(group.actions() + [action_reset_zoom]) action_select.setChecked(True) def button(action): b = QtGui.QToolButton() b.setToolButtonStyle(Qt.ToolButtonIconOnly) b.setDefaultAction(action) return b box.layout().addWidget(button(action_select)) box.layout().addWidget(button(action_zoom)) box.layout().addWidget(button(action_pan)) box.layout().addSpacing(4) box.layout().addWidget(button(action_reset_zoom)) box.layout().addStretch() self.controlArea.layout().addWidget(box) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, "Output") cb = gui.comboBox(box, self, "output_embedding_role", box="Append coordinates", items=["Do not append", "As attributes", "As metas"], callback=self._invalidate_output) cb.box.setFlat(True) gui.auto_commit(box, self, "autocommit", "Send data", checkbox_label="Send after any change", box=None) self.plot = pg.PlotWidget(background="w", enableMenu=False) self.mainArea.layout().addWidget(self.plot) self.selection_tool = PlotSelectionTool( parent=self, selectionMode=PlotSelectionTool.Lasso) self.zoom_tool = PlotZoomTool(parent=self) self.pan_tool = PlotPanTool(parent=self) self.pinch_tool = PlotPinchZoomTool(parent=self) self.pinch_tool.setViewBox(self.plot.getViewBox()) self.selection_tool.setViewBox(self.plot.getViewBox()) self.selection_tool.selectionFinished.connect(self.__selection_end) self.current_tool = self.selection_tool def activate_tool(action): self.current_tool.setViewBox(None) if action is action_select: active, cur = self.selection_tool, Qt.ArrowCursor elif action is action_zoom: active, cur = self.zoom_tool, Qt.ArrowCursor elif action is action_pan: active, cur = self.pan_tool, Qt.OpenHandCursor self.current_tool = active self.current_tool.setViewBox(self.plot.getViewBox()) self.plot.getViewBox().setCursor(QtGui.QCursor(cur)) group.triggered[QtGui.QAction].connect(activate_tool) def set_data(self, data): self.signal_data = data if self.matrix and data is not None and len(self.matrix.X) == len(data): self.closeContext() self.data = data self.update_controls() self.openContext(data) else: self._invalidated = True self._selection_mask = None def set_disimilarity(self, matrix): self.matrix = matrix if matrix and matrix.row_items: self.matrix_data = matrix.row_items if matrix is None: self.matrix_data = None self._invalidated = True self._selection_mask = None def _clear(self): self._pen_data = None self._shape_data = None self._size_data = None self._label_data = None self.colorvar_model[:] = ["Same color"] self.shapevar_model[:] = ["Same shape"] self.sizevar_model[:] = ["Same size"] self.labelvar_model[:] = ["No labels"] self.color_index = 0 self.shape_index = 0 self.size_index = 0 self.label_index = 0 self.__set_update_loop(None) self.__state = OWMDS.Waiting def _clear_plot(self): self.plot.clear() self._scatter_item = None if self._legend_item is not None: anchor = legend_anchor_pos(self._legend_item) if anchor is not None: self.legend_anchor = anchor if self._legend_item.scene() is not None: self._legend_item.scene().removeItem(self._legend_item) self._legend_item = None def update_controls(self): if getattr(self.matrix, 'axis', 1) == 0: # Column-wise distances attr = "Attribute names" self.labelvar_model[:] = ["No labels", attr] self.shapevar_model[:] = ["Same shape", attr] self.colorvar_model[:] = ["Same color", attr] self.color_index = list(self.colorvar_model).index(attr) self.shape_index = list(self.shapevar_model).index(attr) else: # initialize the graph state from data domain = self.data.domain all_vars = list(domain.variables + domain.metas) cd_vars = [var for var in all_vars if var.is_primitive()] disc_vars = [var for var in all_vars if var.is_discrete] cont_vars = [var for var in all_vars if var.is_continuous] str_vars = [var for var in all_vars if var.is_discrete or var.is_string] self.colorvar_model[:] = chain(["Same color"], [self.colorvar_model.Separator], cd_vars) self.shapevar_model[:] = chain(["Same shape"], [self.shapevar_model.Separator], disc_vars) self.sizevar_model[:] = chain(["Same size", "Stress"], [self.sizevar_model.Separator], cont_vars) self.labelvar_model[:] = chain(["No labels"], [self.labelvar_model.Separator], str_vars) if domain.class_var is not None: self.color_index = list(self.colorvar_model).index(domain.class_var) def _initialize(self): # clear everything self.closeContext() self._clear() self.data = None self._effective_matrix = None self.embedding = None # if no data nor matrix is present reset plot if self.signal_data is None and self.matrix is None: self._update_plot() return if self.signal_data and self.matrix_data and len(self.signal_data) != len(self.matrix_data): self.error(1, "Data and distances dimensions do not match.") self._update_plot() return self.error(1) if self.signal_data: self.data = self.signal_data elif self.matrix_data: self.data = self.matrix_data if self.matrix: self._effective_matrix = self.matrix if self.matrix.axis == 0: self.data = None else: self._effective_matrix = Orange.distance.Euclidean(self.data) self.update_controls() self.openContext(self.data) def _toggle_run(self): if self.__state == OWMDS.Running: self.stop() self._invalidate_output() else: self.start() def start(self): if self.__state == OWMDS.Running: return elif self.__state == OWMDS.Finished: # Resume/continue from a previous run self.__start() elif self.__state == OWMDS.Waiting and \ self._effective_matrix is not None: self.__start() def stop(self): if self.__state == OWMDS.Running: self.__set_update_loop(None) def __start(self): X = self._effective_matrix.X if self.embedding is not None: init = self.embedding elif self.initialization == OWMDS.PCA: init = torgerson(X, n_components=2) else: init = None # number of iterations per single GUI update step _, step_size = OWMDS.RefreshRate[self.refresh_rate] if step_size == -1: step_size = self.max_iter def update_loop(X, max_iter, step, init): """ return an iterator over successive improved MDS point embeddings. """ # NOTE: this code MUST NOT call into QApplication.processEvents done = False iterations_done = 0 oldstress = numpy.finfo(numpy.float).max while not done: step_iter = min(max_iter - iterations_done, step) mds = Orange.projection.MDS( dissimilarity="precomputed", n_components=2, n_init=1, max_iter=step_iter) mdsfit = mds.fit(X, init=init) iterations_done += step_iter embedding, stress = mdsfit.embedding_, mdsfit.stress_ stress /= numpy.sqrt(numpy.sum(embedding ** 2, axis=1)).sum() if iterations_done >= max_iter: done = True elif (oldstress - stress) < mds.params["eps"]: done = True init = embedding oldstress = stress yield embedding, mdsfit.stress_, iterations_done / max_iter self.__set_update_loop(update_loop(X, self.max_iter, step_size, init)) self.progressBarInit(processEvents=None) def __set_update_loop(self, loop): """ Set the update `loop` coroutine. The `loop` is a generator yielding `(embedding, stress, progress)` tuples where `embedding` is a `(N, 2) ndarray` of current updated MDS points, `stress` is the current stress and `progress` a float ratio (0 <= progress <= 1) If an existing update loop is already in palace it is interrupted (closed). .. note:: The `loop` must not explicitly yield control flow to the event loop (i.e. call `QApplication.proceesEvents`) """ if self.__update_loop is not None: self.__update_loop.close() self.__update_loop = None self.progressBarFinished(processEvents=None) self.__update_loop = loop if loop is not None: self.progressBarInit(processEvents=None) self.setStatusMessage("Running") self.runbutton.setText("Stop") self.__state = OWMDS.Running QtGui.QApplication.postEvent(self, QEvent(QEvent.User)) else: self.setStatusMessage("") self.runbutton.setText("Start") self.__state = OWMDS.Finished def __next_step(self): if self.__update_loop is None: return loop = self.__update_loop try: embedding, stress, progress = next(self.__update_loop) assert self.__update_loop is loop except StopIteration: self.__set_update_loop(None) self.unconditional_commit() else: self.progressBarSet(100.0 * progress, processEvents=None) self.embedding = embedding self._update_plot() # schedule next update QtGui.QApplication.postEvent( self, QEvent(QEvent.User), Qt.LowEventPriority) def customEvent(self, event): if event.type() == QEvent.User and self.__update_loop is not None: if not self.__in_next_step: self.__in_next_step = True try: self.__next_step() finally: self.__in_next_step = False else: warnings.warn( "Re-entry in update loop detected. " "A rogue `proccessEvents` is on the loose.", RuntimeWarning) # re-schedule the update iteration. QtGui.QApplication.postEvent(self, QEvent(QEvent.User)) return super().customEvent(event) def __invalidate_embedding(self): state = self.__state if self.__update_loop is not None: self.__set_update_loop(None) X = self._effective_matrix.X if self.initialization == OWMDS.PCA: self.embedding = torgerson(X) else: self.embedding = numpy.random.rand(len(X), 2) self._update_plot() # restart the optimization if it was interrupted. if state == OWMDS.Running: self.__start() def __invalidate_refresh(self): state = self.__state if self.__update_loop is not None: self.__set_update_loop(None) # restart the optimization if it was interrupted. # TODO: decrease the max iteration count by the already # completed iterations count. if state == OWMDS.Running: self.__start() def handleNewSignals(self): if self._invalidated: self._invalidated = False self._initialize() self.start() self._update_plot() self.unconditional_commit() def _invalidate_output(self): self.commit() def _on_color_index_changed(self): self._pen_data = None self._update_plot() def _on_shape_index_changed(self): self._shape_data = None self._update_plot() def _on_size_index_changed(self): self._size_data = None self._update_plot() def _on_label_index_changed(self): self._label_data = None self._update_plot() def _update_plot(self): self._clear_plot() if self.embedding is not None: self._setup_plot() def _setup_plot(self): have_data = self.data is not None have_matrix_transposed = self.matrix is not None and not self.matrix.axis def column(data, variable): a, _ = data.get_column_view(variable) return a.ravel() def attributes(matrix): return matrix.row_items.domain.attributes def scale(a): dmin, dmax = numpy.nanmin(a), numpy.nanmax(a) if dmax - dmin > 0: return (a - dmin) / (dmax - dmin) else: return numpy.zeros_like(a) if self._pen_data is None: if self._selection_mask is not None: pointflags = numpy.where( self._selection_mask, mdsplotutils.Selected, mdsplotutils.NoFlags) else: pointflags = None if have_data and self.color_index > 0: color_var = self.colorvar_model[self.color_index] if color_var.is_discrete: palette = colorpalette.ColorPaletteGenerator( len(color_var.values) ) else: palette = None color_data = mdsplotutils.color_data( self.data, color_var, plotstyle=mdsplotutils.plotstyle) color_data = numpy.hstack( (color_data, numpy.full((len(color_data), 1), self.symbol_opacity)) ) pen_data = mdsplotutils.pen_data(color_data, pointflags) elif have_matrix_transposed and self.colorvar_model[self.color_index] == 'Attribute names': attr = attributes(self.matrix) palette = colorpalette.ColorPaletteGenerator(len(attr)) color_data = [palette.getRGB(i) for i in range(len(attr))] color_data = numpy.hstack( color_data, numpy.full((len(color_data), 1), self.symbol_opacity) ) pen_data = mdsplotutils.pen_data(color_data, pointflags) else: pen_data = make_pen(QtGui.QColor(Qt.darkGray), cosmetic=True) pen_data = numpy.full(len(self.data), pen_data, dtype=object) self._pen_data = pen_data if self._shape_data is None: if have_data and self.shape_index > 0: Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) shape_var = self.shapevar_model[self.shape_index] data = column(self.data, shape_var) data = data % (len(Symbols) - 1) data[numpy.isnan(data)] = len(Symbols) - 1 shape_data = symbols[data.astype(int)] elif have_matrix_transposed and self.shapevar_model[self.shape_index] == 'Attribute names': Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) attr = [i % (len(Symbols) - 1) for i, _ in enumerate(attributes(self.matrix))] shape_data = symbols[attr] else: shape_data = "o" self._shape_data = shape_data if self._size_data is None: MinPointSize = 3 point_size = self.symbol_size + MinPointSize if have_data and self.size_index == 1: # size by stress size_data = stress(self.embedding, self._effective_matrix.X) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size elif have_data and self.size_index > 0: size_var = self.sizevar_model[self.size_index] size_data = column(self.data, size_var) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size else: size_data = point_size if self._label_data is None: if have_data and self.label_index > 0: label_var = self.labelvar_model[self.label_index] label_data = column(self.data, label_var) label_data = [label_var.repr_val(val) for val in label_data] label_items = [pg.TextItem(text, anchor=(0.5, 0)) for text in label_data] elif have_matrix_transposed and self.labelvar_model[self.label_index] == 'Attribute names': attr = attributes(self.matrix) label_items = [pg.TextItem(str(text), anchor=(0.5, 0)) for text in attr] else: label_items = None self._label_data = label_items self._scatter_item = item = ScatterPlotItem( x=self.embedding[:, 0], y=self.embedding[:, 1], pen=self._pen_data, symbol=self._shape_data, brush=QtGui.QBrush(Qt.transparent), size=size_data, data=numpy.arange(len(self.data)), antialias=True ) self.plot.addItem(item) if self._label_data is not None: for (x, y), text_item in zip(self.embedding, self._label_data): self.plot.addItem(text_item) text_item.setPos(x, y) self._legend_item = LegendItem() self._legend_item.setParentItem(self.plot.getViewBox()) self._legend_item.anchor(*self.legend_anchor) color_var = shape_var = None if have_data and 1 <= self.color_index < len(self.colorvar_model): color_var = self.colorvar_model[self.color_index] assert isinstance(color_var, Orange.data.Variable) if have_data and 1 <= self.shape_index < len(self.shapevar_model): shape_var = self.shapevar_model[self.shape_index] assert isinstance(shape_var, Orange.data.Variable) if shape_var is not None or \ (color_var is not None and color_var.is_discrete): legend_data = mdsplotutils.legend_data( color_var, shape_var, plotstyle=mdsplotutils.plotstyle) for color, symbol, text in legend_data: self._legend_item.addItem( ScatterPlotItem(pen=color, brush=color, symbol=symbol, size=10), text ) else: self._legend_item.hide() def commit(self): if self.embedding is not None: output = embedding = Orange.data.Table.from_numpy( Orange.data.Domain([Orange.data.ContinuousVariable("X"), Orange.data.ContinuousVariable("Y")]), self.embedding ) else: output = embedding = None if self.embedding is not None and self.data is not None: domain = self.data.domain attrs = domain.attributes class_vars = domain.class_vars metas = domain.metas if self.output_embedding_role == OWMDS.AttrRole: attrs = attrs + embedding.domain.attributes elif self.output_embedding_role == OWMDS.MetaRole: metas = metas + embedding.domain.attributes domain = Orange.data.Domain(attrs, class_vars, metas) output = Orange.data.Table.from_table(domain, self.data) if self.output_embedding_role == OWMDS.AttrRole: output.X[:, -2:] = embedding.X elif self.output_embedding_role == OWMDS.MetaRole: output.metas[:, -2:] = embedding.X self.send("Data", output) if output is not None and self._selection_mask is not None and \ numpy.any(self._selection_mask): subset = output[self._selection_mask] else: subset = None self.send("Data Subset", subset) def onDeleteWidget(self): super().onDeleteWidget() self._clear_plot() self._clear() def __selection_end(self, path): self.select(path) self._pen_data = None self._update_plot() self._invalidate_output() def select(self, region): item = self._scatter_item if item is None: return indices = numpy.array( [spot.data() for spot in item.points() if region.contains(spot.pos())], dtype=int) if not QtGui.QApplication.keyboardModifiers() & Qt.ControlModifier: self._selection_mask = None self.select_indices(indices) def select_indices(self, indices): if self.data is None: return if self._selection_mask is None: self._selection_mask = numpy.zeros(len(self.data), dtype=bool) self._selection_mask[indices] = True
def __init__(self): super().__init__() self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.widgetBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.varview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.varview.setSizePolicy(QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) box = gui.widgetBox(self.controlArea, "Precision") gui.separator(self.controlArea, 4, 4) box2 = gui.widgetBox(box, orientation="horizontal") self.l_smoothing_l = gui.widgetLabel(box2, "Smooth") gui.hSlider(box2, self, "smoothing_index", minValue=0, maxValue=len(self.smoothing_facs) - 1, callback=self._on_set_smoothing, createLabel=False) self.l_smoothing_r = gui.widgetLabel(box2, "Precise") self.cb_disc_cont = gui.checkBox( gui.indentedBox(box, sep=4), self, "disc_cont", "Bin continuous variables", callback=self._on_groupvar_idx_changed, tooltip="Show continuous variables as discrete.") box = gui.widgetBox(self.controlArea, "Group by") self.icons = gui.attributeIconDict self.groupvarview = gui.comboBox( box, self, "groupvar_idx", callback=self._on_groupvar_idx_changed, valueType=str, contentsLength=12) box2 = gui.indentedBox(box, sep=4) self.cb_rel_freq = gui.checkBox( box2, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed, tooltip= "Normalize probabilities so that probabilities for each group-by value sum to 1." ) gui.separator(box2) self.cb_prob = gui.comboBox( box2, self, "show_prob", label="Show probabilities", orientation="horizontal", callback=self._on_relative_freq_changed, tooltip= "Show probabilities for a chosen group-by value (at each point probabilities for all group-by values sum to 1)." ) self.plotview = pg.PlotWidget(background=None) self.plotview.setRenderHint(QtGui.QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) w = QtGui.QLabel() w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.ploti = pg.PlotItem() self.plot = self.ploti.vb self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_prob = pg.ViewBox() self.ploti.hideAxis('right') self.ploti.scene().addItem(self.plot_prob) self.ploti.getAxis("right").linkToView(self.plot_prob) self.ploti.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.ploti) self.update_views() self.ploti.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0, 1]) self.inline_graph_report() def disable_mouse(plot): plot.setMouseEnabled(False, False) plot.setMenuEnabled(False) disable_mouse(self.plot) disable_mouse(self.plot_prob) self.tooltip_items = [] self.plot.scene().installEventFilter( HelpEventDelegate(self.help_event, self)) pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text)) for axis in ("left", "bottom"): self.ploti.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot) self._legend.hide() self._legend.anchor((1, 0), (1, 0))
def _setup_plot(self): have_data = self.data is not None have_matrix_transposed = self.matrix is not None and not self.matrix.axis def column(data, variable): a, _ = data.get_column_view(variable) return a.ravel() def attributes(matrix): return matrix.row_items.domain.attributes def scale(a): dmin, dmax = numpy.nanmin(a), numpy.nanmax(a) if dmax - dmin > 0: return (a - dmin) / (dmax - dmin) else: return numpy.zeros_like(a) if self._pen_data is None: if self._selection_mask is not None: pointflags = numpy.where( self._selection_mask, mdsplotutils.Selected, mdsplotutils.NoFlags) else: pointflags = None if have_data and self.color_index > 0: color_var = self.colorvar_model[self.color_index] if color_var.is_discrete: palette = colorpalette.ColorPaletteGenerator( len(color_var.values) ) else: palette = None color_data = mdsplotutils.color_data( self.data, color_var, plotstyle=mdsplotutils.plotstyle) color_data = numpy.hstack( (color_data, numpy.full((len(color_data), 1), self.symbol_opacity)) ) pen_data = mdsplotutils.pen_data(color_data, pointflags) elif have_matrix_transposed and self.colorvar_model[self.color_index] == 'Attribute names': attr = attributes(self.matrix) palette = colorpalette.ColorPaletteGenerator(len(attr)) color_data = [palette.getRGB(i) for i in range(len(attr))] color_data = numpy.hstack( color_data, numpy.full((len(color_data), 1), self.symbol_opacity) ) pen_data = mdsplotutils.pen_data(color_data, pointflags) else: pen_data = make_pen(QtGui.QColor(Qt.darkGray), cosmetic=True) pen_data = numpy.full(len(self.data), pen_data, dtype=object) self._pen_data = pen_data if self._shape_data is None: if have_data and self.shape_index > 0: Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) shape_var = self.shapevar_model[self.shape_index] data = column(self.data, shape_var) data = data % (len(Symbols) - 1) data[numpy.isnan(data)] = len(Symbols) - 1 shape_data = symbols[data.astype(int)] elif have_matrix_transposed and self.shapevar_model[self.shape_index] == 'Attribute names': Symbols = ScatterPlotItem.Symbols symbols = numpy.array(list(Symbols.keys())) attr = [i % (len(Symbols) - 1) for i, _ in enumerate(attributes(self.matrix))] shape_data = symbols[attr] else: shape_data = "o" self._shape_data = shape_data if self._size_data is None: MinPointSize = 3 point_size = self.symbol_size + MinPointSize if have_data and self.size_index == 1: # size by stress size_data = stress(self.embedding, self._effective_matrix.X) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size elif have_data and self.size_index > 0: size_var = self.sizevar_model[self.size_index] size_data = column(self.data, size_var) size_data = scale(size_data) size_data = MinPointSize + size_data * point_size else: size_data = point_size if self._label_data is None: if have_data and self.label_index > 0: label_var = self.labelvar_model[self.label_index] label_data = column(self.data, label_var) label_data = [label_var.repr_val(val) for val in label_data] label_items = [pg.TextItem(text, anchor=(0.5, 0)) for text in label_data] elif have_matrix_transposed and self.labelvar_model[self.label_index] == 'Attribute names': attr = attributes(self.matrix) label_items = [pg.TextItem(str(text), anchor=(0.5, 0)) for text in attr] else: label_items = None self._label_data = label_items self._scatter_item = item = ScatterPlotItem( x=self.embedding[:, 0], y=self.embedding[:, 1], pen=self._pen_data, symbol=self._shape_data, brush=QtGui.QBrush(Qt.transparent), size=size_data, data=numpy.arange(len(self.data)), antialias=True ) self.plot.addItem(item) if self._label_data is not None: for (x, y), text_item in zip(self.embedding, self._label_data): self.plot.addItem(text_item) text_item.setPos(x, y) self._legend_item = LegendItem() self._legend_item.setParentItem(self.plot.getViewBox()) self._legend_item.anchor(*self.legend_anchor) color_var = shape_var = None if have_data and 1 <= self.color_index < len(self.colorvar_model): color_var = self.colorvar_model[self.color_index] assert isinstance(color_var, Orange.data.Variable) if have_data and 1 <= self.shape_index < len(self.shapevar_model): shape_var = self.shapevar_model[self.shape_index] assert isinstance(shape_var, Orange.data.Variable) if shape_var is not None or \ (color_var is not None and color_var.is_discrete): legend_data = mdsplotutils.legend_data( color_var, shape_var, plotstyle=mdsplotutils.plotstyle) for color, symbol, text in legend_data: self._legend_item.addItem( ScatterPlotItem(pen=color, brush=color, symbol=symbol, size=10), text ) else: self._legend_item.hide()
def __init__(self): super().__init__() self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.distributions = [ distribution.value for distribution in self.available_distributions ] box = gui.vBox(self.controlArea, 'Tests') gui.radioButtonsInBox( box, self, 'test_idx', btnLabels=[test.name for test in self.available_tests], callback=self.test_changed, ) box = gui.vBox(self.controlArea, 'Distributions') self.distribution_choose = gui.radioButtonsInBox( box, self, 'distribution_idx', btnLabels=self.distributions, callback=self.distribution_changed, ) self.column_chose = gui.comboBox( self.controlArea, self, 'column_idx', box='Selected column', items=[], orientation=Qt.Horizontal, callback=self.column_changed, ) self.available_columns = itemmodels.VariableListModel(parent=self) self.column_chose.setModel(self.available_columns) self.infolabel = gui.widgetLabel(box, "<center>p-value: </center>") self.mainArea.setMinimumWidth(800) self.own_distribution_choose = gui.comboBox( self.controlArea, self, 'own_distribution_idx', box='Own distribution', items=[], orientation=Qt.Horizontal, callback=self.column_changed, ) self.own_distribution_choose.setModel(self.available_columns) self.data = None self.plotview = pg.PlotWidget(background=None) self.plotview.setRenderHint(QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) w = QLabel() w.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.ploti = pg.PlotItem() self.box_scene = self.ploti.vb self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_prob = pg.ViewBox() self.ploti.scene().addItem(self.plot_prob) self.ploti.getAxis("right").linkToView(self.plot_prob) self.ploti.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.ploti) self.update_views() self.ploti.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0, 1]) def disable_mouse(box_scene): box_scene.setMouseEnabled(False, False) box_scene.setMenuEnabled(False) disable_mouse(self.box_scene) disable_mouse(self.plot_prob) self.tooltip_items = [] pen = QPen(self.palette().color(QPalette.Text)) for axis in ("left", "bottom"): self.ploti.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.box_scene) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) self.test_changed()
class DistributionTest(OWWidget): name = 'Distribution Test' description = 'Check if data is in given distribution.' icon = 'icons/disttest.svg' want_main_area = True buttons_area_orientation = Qt.Vertical resizing_enabled = True inputs = [('Data', Orange.data.Table, 'set_data')] available_tests = ( KolmogorovSmirnov, AndersonDarling, ShapiroWilk, ChiSquare, ) settingsHandler = settings.DomainContextHandler( match_values=settings.DomainContextHandler.MATCH_VALUES_ALL) #: Selected variable index available_distributions = [d for d in Distribution] test_idx = 0 distribution_idx = 0 column_idx = 0 own_distribution_idx = 0 relative_freq = settings.Setting(False) smoothing_index = settings.Setting(5) show_prob = settings.ContextSetting(0) graph_name = "box_scene" ASH_HIST = 50 bins = [2, 3, 4, 5, 8, 10, 12, 15, 20, 30, 50] def __init__(self): super().__init__() self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = [] self.distributions = [ distribution.value for distribution in self.available_distributions ] box = gui.vBox(self.controlArea, 'Tests') gui.radioButtonsInBox( box, self, 'test_idx', btnLabels=[test.name for test in self.available_tests], callback=self.test_changed, ) box = gui.vBox(self.controlArea, 'Distributions') self.distribution_choose = gui.radioButtonsInBox( box, self, 'distribution_idx', btnLabels=self.distributions, callback=self.distribution_changed, ) self.column_chose = gui.comboBox( self.controlArea, self, 'column_idx', box='Selected column', items=[], orientation=Qt.Horizontal, callback=self.column_changed, ) self.available_columns = itemmodels.VariableListModel(parent=self) self.column_chose.setModel(self.available_columns) self.infolabel = gui.widgetLabel(box, "<center>p-value: </center>") self.mainArea.setMinimumWidth(800) self.own_distribution_choose = gui.comboBox( self.controlArea, self, 'own_distribution_idx', box='Own distribution', items=[], orientation=Qt.Horizontal, callback=self.column_changed, ) self.own_distribution_choose.setModel(self.available_columns) self.data = None self.plotview = pg.PlotWidget(background=None) self.plotview.setRenderHint(QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) w = QLabel() w.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.ploti = pg.PlotItem() self.box_scene = self.ploti.vb self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_prob = pg.ViewBox() self.ploti.scene().addItem(self.plot_prob) self.ploti.getAxis("right").linkToView(self.plot_prob) self.ploti.getAxis("right").setLabel("Probability") self.plot_prob.setZValue(10) self.plot_prob.setXLink(self.ploti) self.update_views() self.ploti.vb.sigResized.connect(self.update_views) self.plot_prob.setRange(yRange=[0, 1]) def disable_mouse(box_scene): box_scene.setMouseEnabled(False, False) box_scene.setMenuEnabled(False) disable_mouse(self.box_scene) disable_mouse(self.plot_prob) self.tooltip_items = [] pen = QPen(self.palette().color(QPalette.Text)) for axis in ("left", "bottom"): self.ploti.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.box_scene) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) self.test_changed() def update_views(self): """ resize """ self.plot_prob.setGeometry(self.box_scene.sceneBoundingRect()) self.plot_prob.linkedViewChanged(self.box_scene, self.plot_prob.XAxis) def set_data(self, data): self.closeContext() self.clear() self.warning() self.data = data self.own_distribution_choose.hide() if data is not None: self.available_columns[:] = data.domain domain = self.data.domain self.varmodel[:] = list(domain) + [ meta for meta in domain.metas if meta.is_continuous or meta.is_discrete ] self.groupvarmodel = \ ["(None)"] + [var for var in domain if var.is_discrete] + \ [meta for meta in domain.metas if meta.is_discrete] if domain.has_discrete_class: self.groupvar_idx = \ self.groupvarmodel[1:].index(domain.class_var) + 1 self.openContext(domain) self.column_idx = min(max(self.column_idx, 0), len(self.varmodel) - 1) self.groupvar_idx = min(max(self.groupvar_idx, 0), len(self.groupvarmodel) - 1) self._setup() def test_changed(self): """ Management of buttons, depends from type of distribution test. Own samples are hidden. :return: """ for idx, button in enumerate(self.distribution_choose.buttons): if Distribution(button.text()) in self.test.allowed_distribution: button.show() if self.distribution not in self.test.allowed_distribution: button.toggle() self.distribution_idx = idx else: self.own_distribution_choose.hide() button.hide() self.compute_p_value() def distribution_changed(self): """ Control buttons of allowed distributions - show or hide :return: compute p-value """ if self.distribution == Distribution.OWN: self.own_distribution_choose.show() else: self.own_distribution_choose.hide() self.compute_p_value() def clear(self): self.box_scene.clear() self.plot_prob.clear() self.varmodel[:] = [] self.groupvarmodel = [] self.column_idx = -1 self.groupvar_idx = 0 self._legend.clear() self._legend.hide() def column_changed(self): """ compute p-value if column is changed """ self._setup() self.compute_p_value() def _setup(self): """ set new plot """ self.box_scene.clear() self.plot_prob.clear() self._legend.clear() self._legend.hide() varidx = self.column_idx self.var = self.cvar = None if varidx >= 0: self.var = self.varmodel[varidx] data = self.data if self.var is None: return self.set_left_axis_name() if self.cvar: self.contingencies = \ contingency.get_contingency(data, self.var, self.cvar) self.display_contingency() else: self.distributions = \ distribution.get_distribution(data, self.var) self.display_distribution() self.box_scene.autoRange() def compute_p_value(self): """ :return: p-value """ if self.data is not None: p_value = self.test.compute(self) if isinstance(p_value, float): self.infolabel.setText('\np-value: {}'.format(p_value)) else: self.infolabel.setText('\np-value: {}'.format(round( p_value, 3))) def _on_relative_freq_changed(self): self.set_left_axis_name() if self.cvar and self.cvar.is_discrete: self.display_contingency() else: self.display_distribution() self.box_scene.autoRange() def display_contingency(self): """ Set the contingency to display. """ cont = self.contingencies var, cvar = self.var, self.cvar assert len(cont) > 0 self.box_scene.clear() self.plot_prob.clear() self._legend.clear() self.tooltip_items = [] if self.show_prob: self.ploti.showAxis('right') else: self.ploti.hideAxis('right') bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(var.name) bottomaxis.resizeEvent() cvar_values = cvar.values colors = [QColor(*col) for col in cvar.colors] if var and var.is_continuous: bottomaxis.setTicks(None) weights, cols, cvar_values, curves = [], [], [], [] for i, dist in enumerate(cont): v, W = dist if len(v): weights.append(np.sum(W)) cols.append(colors[i]) cvar_values.append(cvar.values[i]) curves.append( ash_curve(dist, cont, m=DistributionTest.ASH_HIST)) weights = np.array(weights) sumw = np.sum(weights) weights /= sumw colors = cols curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)] curvesline = [] # from histograms to lines for (X, Y) in curves: X += np.array(((X[1] - X[0]) / 2)[:-1]) Y = np.array(Y) curvesline.append((X, Y)) for t in ["fill", "line"]: for (X, Y), color, w, cval in reversed( list(zip(curvesline, colors, weights, cvar_values))): item = pg.PlotCurveItem() pen = QPen(QBrush(color), 3) pen.setCosmetic(True) color = QColor(color) color.setAlphaF(0.2) item.setData(X, Y / (w if self.relative_freq else 1), antialias=True, stepMode=False, fillLevel=0 if t == "fill" else None, brush=QBrush(color), pen=pen) self.box_scene.addItem(item) if t == "line": if self.relative_freq: density = "Normalized density" else: density = "Density" item.tooltip = "{density}\n{name}={value}".format( value=cval, name=cvar.name, density=density) self.tooltip_items.append((self.box_scene, item)) if self.show_prob: all_X = np.array( np.unique(np.hstack([X for X, _ in curvesline]))) inter_X = np.array( np.linspace(all_X[0], all_X[-1], len(all_X) * 2)) curvesinterp = [ np.interp(inter_X, X, Y) for (X, Y) in curvesline ] sumprob = np.sum(curvesinterp, axis=0) legal = sumprob > 0.05 * np.max(sumprob) i = len(curvesinterp) + 1 show_all = self.show_prob == i for Y, color, cval in reversed( list(zip(curvesinterp, colors, cvar_values))): i -= 1 if show_all or self.show_prob == i: item = pg.PlotCurveItem() pen = QPen(QBrush(color), 3, style=Qt.DotLine) pen.setCosmetic(True) prob = Y[legal] / sumprob[legal] item.setData(inter_X[legal], prob, antialias=True, stepMode=False, fillLevel=None, brush=None, pen=pen) self.plot_prob.addItem(item) item.tooltip = \ "Probability that \n{name}={value}".format( name=cvar.name, value=cval) self.tooltip_items.append((self.plot_prob, item)) elif var and var.is_discrete: bottomaxis.setTicks([list(enumerate(var.values))]) cont = np.array(cont) maxh = 0 # maximal column height maxrh = 0 # maximal relative column height scvar = cont.sum(axis=1) # a cvar with sum=0 with allways have distribution counts 0, # therefore we can divide it by anything scvar[scvar == 0] = 1 for i, (value, dist) in enumerate(zip(var.values, cont.T)): maxh = max(maxh, max(dist)) maxrh = max(maxrh, max(dist / scvar)) for i, (value, dist) in enumerate(zip(var.values, cont.T)): dsum = sum(dist) geom = QRectF(i - 0.333, 0, 0.666, maxrh if self.relative_freq else maxh) item = DistributionBarItem( geom, dist / scvar / maxrh if self.relative_freq else dist / maxh, colors) self.box_scene.addItem(item) tooltip = "\n".join("%s: %.*f" % (n, 3 if self.relative_freq else 1, v) for n, v in zip( cvar_values, dist / scvar if self.relative_freq else dist)) if self.relative_freq: frequency = "Normalized frequency" else: frequency = "Frequency" item.tooltip = \ "{frequency}({name}={value}):\n{tooltip}".format( frequency=frequency, name=cvar.name, value=value, tooltip=tooltip, ) self.tooltip_items.append((self.box_scene, item)) if self.show_prob: item.tooltip += "\n\nProbabilities:" for ic, a in enumerate(dist): if (self.show_prob - 1 != ic and self.show_prob - 1 != len(dist)) \ or dsum < 1e-6: continue position = -0.333 + ((ic + 0.5) * 0.666 / len(dist)) prob = a / dsum if not 1e-6 < prob < 1 - 1e-6: continue ci = 1.96 * sqrt(prob * (1 - prob) / dsum) item.tooltip += "\n%s: %.3f ± %.3f" % (cvar_values[ic], prob, ci) mark = pg.ScatterPlotItem() bar = pg.ErrorBarItem() pen = QPen(QBrush(QColor(0)), 1) pen.setCosmetic(True) bar.setData(x=[i + position], y=[prob], bottom=min(np.array([ci]), prob), top=min(np.array([ci]), 1 - prob), beam=np.array([0.05]), brush=QColor(1), pen=pen) mark.setData([i + position], [prob], antialias=True, symbol="o", fillLevel=None, pxMode=True, size=10, brush=QColor(colors[ic]), pen=pen) self.plot_prob.addItem(bar) self.plot_prob.addItem(mark) for color, name in zip(colors, cvar_values): self._legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, shape="s"), escape(name)) self._legend.show() def set_left_axis_name(self): leftaxis = self.ploti.getAxis("left") set_label = leftaxis.setLabel if self.var and self.var.is_continuous: set_label(["Density", "Relative density"][self.cvar is not None and self.relative_freq]) else: set_label(["Frequency", "Relative frequency"][self.cvar is not None and self.relative_freq]) leftaxis.resizeEvent() def display_distribution(self): dist = self.distributions var = self.var assert len(dist) > 0 self.box_scene.clear() self.plot_prob.clear() self.ploti.hideAxis('right') self.tooltip_items = [] bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(var.name) bottomaxis.resizeEvent() self.set_left_axis_name() if var and var.is_continuous: bottomaxis.setTicks(None) if not len(dist[0]): return edges, curve = ash_curve(dist, None, m=DistributionTest.ASH_HIST) edges = edges + (edges[1] - edges[0]) / 2 edges = edges[:-1] item = pg.PlotCurveItem() pen = QPen(QBrush(Qt.black), 3) pen.setCosmetic(True) item.setData(edges, curve, antialias=True, stepMode=False, fillLevel=0, brush=QBrush(Qt.gray), pen=pen) self.box_scene.addItem(item) item.tooltip = "Density" self.tooltip_items.append((self.box_scene, item)) else: bottomaxis.setTicks([list(enumerate(var.values))]) for i, w in enumerate(dist): geom = QRectF(i - 0.33, 0, 0.66, w) item = DistributionBarItem(geom, [1.0], [QColor(128, 128, 128)]) self.box_scene.addItem(item) item.tooltip = "Frequency for %s: %r" % (var.values[i], w) self.tooltip_items.append((self.box_scene, item)) def onDeleteWidget(self): self.box_scene.clear() super().onDeleteWidget() def get_widget_name_extension(self): if self.column_idx >= 0: return self.varmodel[self.column_idx] @property def test(self) -> Test: return self.available_tests[self.test_idx] @property def distribution(self) -> Distribution: return self.available_distributions[self.distribution_idx] @property def column(self): return self.data[:, self.column_idx] @property def own_distribution(self): return self.data[:, self.own_distribution_idx]
def __init__(self, parent=None): super().__init__(parent) self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.widgetBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = itemmodels.VariableListModel() self.varview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.varview.setSizePolicy( QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) gui.separator(varbox, 8, 8) gui.comboBox( varbox, self, "cont_est_type", label="Show continuous variables by", valueType=int, items=["Histograms", "Average shifted histograms", "Kernel density estimators"], callback=self._on_cont_est_type_changed) box = gui.widgetBox(self.controlArea, "Group by") self.groupvarview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.groupvarview.setFixedHeight(100) self.groupvarview.setSizePolicy( QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Preferred) self.groupvarview.setModel(self.groupvarmodel) self.groupvarview.selectionModel().selectionChanged.connect( self._on_groupvar_idx_changed) box.layout().addWidget(self.groupvarview) self.cb_rel_freq = gui.checkBox( box, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed) plotview = pg.PlotWidget(background=None) self.mainArea.layout().addWidget(plotview) w = QtGui.QLabel() w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.plot = pg.PlotItem() # self.plot.getViewBox().setMouseEnabled(False, False) self.plot.getViewBox().setMenuEnabled(False) plotview.setCentralItem(self.plot) pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text)) for axis in ("left", "bottom"): self.plot.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot.getViewBox()) self._legend.hide() self._legend.anchor((1, 0), (1, 0))
class OWDistributions(widget.OWWidget): name = "Distributions" description = "Display value distributions of a data feature in a graph." icon = "icons/Distribution.svg" priority = 100 inputs = [InputSignal("Data", Orange.data.Table, "set_data", doc="Set the input data set")] settingsHandler = settings.DomainContextHandler() #: Selected variable index variable_idx = settings.ContextSetting(-1) #: Selected group variable groupvar_idx = settings.ContextSetting(0) Hist, ASH, Kernel = 0, 1, 2 #: Continuous variable density estimation method cont_est_type = settings.Setting(ASH) relative_freq = settings.Setting(False) def __init__(self, parent=None): super().__init__(parent) self.data = None self.distributions = None self.contingencies = None self.var = self.cvar = None varbox = gui.widgetBox(self.controlArea, "Variable") self.varmodel = itemmodels.VariableListModel() self.groupvarmodel = itemmodels.VariableListModel() self.varview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.varview.setSizePolicy( QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.varview.setModel(self.varmodel) self.varview.setSelectionModel( itemmodels.ListSingleSelectionModel(self.varmodel)) self.varview.selectionModel().selectionChanged.connect( self._on_variable_idx_changed) varbox.layout().addWidget(self.varview) gui.separator(varbox, 8, 8) gui.comboBox( varbox, self, "cont_est_type", label="Show continuous variables by", valueType=int, items=["Histograms", "Average shifted histograms", "Kernel density estimators"], callback=self._on_cont_est_type_changed) box = gui.widgetBox(self.controlArea, "Group by") self.groupvarview = QtGui.QListView( selectionMode=QtGui.QListView.SingleSelection) self.groupvarview.setFixedHeight(100) self.groupvarview.setSizePolicy( QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Preferred) self.groupvarview.setModel(self.groupvarmodel) self.groupvarview.selectionModel().selectionChanged.connect( self._on_groupvar_idx_changed) box.layout().addWidget(self.groupvarview) self.cb_rel_freq = gui.checkBox( box, self, "relative_freq", "Show relative frequencies", callback=self._on_relative_freq_changed) plotview = pg.PlotWidget(background=None) self.mainArea.layout().addWidget(plotview) w = QtGui.QLabel() w.setSizePolicy(QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Fixed) self.mainArea.layout().addWidget(w, Qt.AlignCenter) self.plot = pg.PlotItem() # self.plot.getViewBox().setMouseEnabled(False, False) self.plot.getViewBox().setMenuEnabled(False) plotview.setCentralItem(self.plot) pen = QtGui.QPen(self.palette().color(QtGui.QPalette.Text)) for axis in ("left", "bottom"): self.plot.getAxis(axis).setPen(pen) self._legend = LegendItem() self._legend.setParentItem(self.plot.getViewBox()) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) def set_data(self, data): self.closeContext() self.clear() self.data = data if self.data is not None: domain = self.data.domain self.varmodel[:] = list(domain) self.groupvarmodel[:] = \ ["(None)"] + [var for var in domain if var.is_discrete] if domain.has_discrete_class: self.groupvar_idx = \ list(self.groupvarmodel).index(domain.class_var) self.openContext(domain) self.variable_idx = min(max(self.variable_idx, 0), len(self.varmodel) - 1) self.groupvar_idx = min(max(self.groupvar_idx, 0), len(self.groupvarmodel) - 1) itemmodels.select_row(self.groupvarview, self.groupvar_idx) itemmodels.select_row(self.varview, self.variable_idx) self._setup() def clear(self): self.plot.clear() self.varmodel[:] = [] self.groupvarmodel[:] = [] self.variable_idx = -1 self.groupvar_idx = 0 self._legend.clear() self._legend.hide() def _setup(self): self.plot.clear() self._legend.clear() self._legend.hide() varidx = self.variable_idx self.var = self.cvar = None if varidx >= 0: self.var = self.varmodel[varidx] if self.groupvar_idx > 0: self.cvar = self.groupvarmodel[self.groupvar_idx] self.set_left_axis_name() self.enable_disable_rel_freq() if self.var is None: return if self.cvar: self.contingencies = \ contingency.get_contingency(self.data, self.var, self.cvar) self.display_contingency() else: self.distributions = \ distribution.get_distribution(self.data, self.var) self.display_distribution() def _density_estimator(self): if self.cont_est_type == OWDistributions.Hist: def hist(dist): h, edges = numpy.histogram(dist[0, :], bins=10, weights=dist[1, :]) return edges, h return hist elif self.cont_est_type == OWDistributions.ASH: return lambda dist: ash_curve(dist, m=5) elif self.cont_est_type == OWDistributions.Kernel: return rect_kernel_curve def display_distribution(self): dist = self.distributions var = self.var assert len(dist) > 0 self.plot.clear() bottomaxis = self.plot.getAxis("bottom") bottomaxis.setLabel(var.name) self.set_left_axis_name() if var and var.is_continuous: bottomaxis.setTicks(None) curve_est = self._density_estimator() edges, curve = curve_est(dist) item = pg.PlotCurveItem() item.setData(edges, curve, antialias=True, stepMode=True, fillLevel=0, brush=QtGui.QBrush(Qt.gray), pen=QtGui.QColor(Qt.white)) self.plot.addItem(item) else: bottomaxis.setTicks([list(enumerate(var.values))]) for i, w in enumerate(dist): geom = QtCore.QRectF(i - 0.33, 0, 0.66, w) item = DistributionBarItem(geom, [1.0], [QtGui.QColor(128, 128, 128)]) self.plot.addItem(item) def _on_relative_freq_changed(self): self.set_left_axis_name() if self.cvar and self.cvar.is_discrete: self.display_contingency() else: self.display_distribution() def display_contingency(self): """ Set the contingency to display. """ cont = self.contingencies var, cvar = self.var, self.cvar assert len(cont) > 0 self.plot.clear() self._legend.clear() bottomaxis = self.plot.getAxis("bottom") bottomaxis.setLabel(var.name) palette = colorpalette.ColorPaletteGenerator(len(cvar.values)) colors = [palette[i] for i in range(len(cvar.values))] if var and var.is_continuous: bottomaxis.setTicks(None) weights = numpy.array([numpy.sum(W) for _, W in cont]) weights /= numpy.sum(weights) curve_est = self._density_estimator() curves = [curve_est(dist) for dist in cont] curves = [(X, Y * w) for (X, Y), w in zip(curves, weights)] cum_curves = [curves[0]] for X, Y in curves[1:]: cum_curves.append(sum_rect_curve(X, Y, *cum_curves[-1])) for (X, Y), color in reversed(list(zip(cum_curves, colors))): item = pg.PlotCurveItem() pen = QtGui.QPen(QtGui.QBrush(Qt.white), 0.5) pen.setCosmetic(True) item.setData(X, Y, antialias=True, stepMode=True, fillLevel=0, brush=QtGui.QBrush(color.lighter()), pen=pen) self.plot.addItem(item) # # XXX: sum the individual curves and not the distributions. # # The conditional distributions might be 'smoother' than # # the cumulative one # cum_dist = [cont[0]] # for dist in cont[1:]: # cum_dist.append(dist_sum(dist, cum_dist[-1])) # # curves = [rect_kernel_curve(dist) for dist in cum_dist] # colors = [Qt.blue, Qt.red, Qt.magenta] # for (X, Y), color in reversed(list(zip(curves, colors))): # item = pg.PlotCurveItem() # item.setData(X, Y, antialias=True, stepMode=True, # fillLevel=0, brush=QtGui.QBrush(color)) # item.setPen(QtGui.QPen(color)) # self.plot.addItem(item) elif var and var.is_discrete: bottomaxis.setTicks([list(enumerate(var.values))]) cont = numpy.array(cont) for i, (value, dist) in enumerate(zip(var.values, cont.T)): dsum = sum(dist) geom = QtCore.QRectF(i - 0.333, 0, 0.666, 100 if self.relative_freq else dsum) item = DistributionBarItem(geom, dist / dsum, colors) self.plot.addItem(item) for color, name in zip(colors, cvar.values): self._legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, shape="s"), name ) self._legend.show() def set_left_axis_name(self): set_label = self.plot.getAxis("left").setLabel if (self.var and self.var.is_continuous and self.cont_est_type != OWDistributions.Hist): set_label("Density") else: set_label(["Frequency", "Relative frequency"] [self.cvar is not None and self.relative_freq]) def enable_disable_rel_freq(self): self.cb_rel_freq.setDisabled( self.var is None or self.cvar is None or self.var.is_continuous) def _on_variable_idx_changed(self): self.variable_idx = selected_index(self.varview) self._setup() def _on_groupvar_idx_changed(self): self.groupvar_idx = selected_index(self.groupvarview) self._setup() def _on_cont_est_type_changed(self): self.set_left_axis_name() if self.data is not None: self._setup() def onDeleteWidget(self): self.plot.clear() super().onDeleteWidget()