class OWNxExplorer(OWDataProjectionWidget): name = "Network Explorer" description = "Visually explore the network and its properties." icon = "icons/NetworkExplorer.svg" priority = 6420 class Inputs: node_data = Input("Node Data", Table) node_subset = Input("Node Subset", Table) network = Input("Network", network.Graph, default=True) node_distances = Input("Node Distances", Orange.misc.DistMatrix) class Outputs(OWDataProjectionWidget.Outputs): subgraph = Output("Selected sub-network", network.Graph) unselected_subgraph = Output("Remaining sub-network", network.Graph) distances = Output("Distance matrix", Orange.misc.DistMatrix) UserAdviceMessages = [ widget.Message('Double clicks select connected components', widget.Message.Information), ] GRAPH_CLASS = GraphView graph = SettingProvider(GraphView) randomizePositions = Setting(True) mark_hops = Setting(1) mark_min_conn = Setting(5) mark_max_conn = Setting(5) mark_most_conn = Setting(1) alpha_value = 255 # Override the setting from parent class Warning(widget.OWWidget.Warning): distance_matrix_mismatch = widget.Msg( "Distance matrix size doesn't match the number of network nodes " " and will be ignored.") no_graph_found = widget.Msg( 'Node data is given, graph data is missing') class Error(widget.OWWidget.Error): data_size_mismatch = widget.Msg( 'Length of the data does not match the number of nodes.') network_too_large = widget.Msg('Network is too large to visualize.') single_node_graph = widget.Msg("I don't do single-node graphs today.") def __init__(self): # These are already needed in super().__init__() self.number_of_nodes = 0 self.number_of_edges = 0 self.nHighlighted = 0 self.nSelected = 0 self.nodes_per_edge = 0 self.edges_per_node = 0 self.mark_mode = 0 self.mark_text = "" super().__init__() self.network = None self.node_data = None self.distance_matrix = None self.edges = None self.positions = None self._optimizer = None self._animation_thread = None self._stop_optimization = False self.marked_nodes = None self.searchStringTimer = QTimer(self) self.searchStringTimer.timeout.connect(self.update_marks) self.set_mark_mode() self.setMinimumWidth(600) def sizeHint(self): return QSize(800, 600) def _add_controls(self): self.gui = OWPlotGUI(self) self._add_info_box() self.gui.point_properties_box(self.controlArea) self._add_effects_box() self.gui.plot_properties_box(self.controlArea) gui.rubber(self.controlArea) self.gui.box_zoom_select(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self._add_mark_box() self.controls.attr_label.activated.connect(self.on_change_label_attr) def _add_info_box(self): info = gui.vBox(self.controlArea, True) gui.label( info, self, "Nodes: %(number_of_nodes)i (%(nodes_per_edge).2f per edge); " "%(nSelected)i selected") gui.label( info, self, "Edges: %(number_of_edges)i (%(edges_per_node).2f per node)") lbox = gui.hBox(info) self.relayout_button = gui.button(lbox, self, 'Re-layout', callback=self.relayout, autoDefault=False) self.stop_button = gui.button(lbox, self, 'Stop', callback=self.stop_relayout, autoDefault=False, hidden=True) self.randomize_cb = gui.checkBox(lbox, self, "randomizePositions", "Randomize positions") def _add_effects_box(self): gbox = self.gui.create_gridbox(self.controlArea, True) self.gui.add_widget(self.gui.PointSize, gbox) gbox.layout().itemAtPosition(1, 0).widget().setText("Node Size:") self.gui.add_control(gbox, gui.hSlider, "Edge width:", master=self, value='graph.edge_width', minValue=1, maxValue=10, step=1, callback=self.graph.update_edges) box = gui.vBox(None) gbox.layout().addWidget(box, 3, 0, 1, 2) gui.separator(box) self.checkbox_relative_edges = gui.checkBox( box, self, 'graph.relative_edge_widths', 'Scale edge widths to weights', callback=self.graph.update_edges) self.checkbox_show_weights = gui.checkBox( box, self, 'graph.show_edge_weights', 'Show edge weights', callback=self.graph.update_edge_labels) self.checkbox_show_weights = gui.checkBox( box, self, 'graph.label_selected_edges', 'Label only edges of selected nodes', callback=self.graph.update_edge_labels) # This is ugly: create a slider that controls alpha_value so that # parent can enable and disable it - although it's never added to any # layout and visible to the user gui.hSlider(None, self, "graph.alpha_value") def _add_mark_box(self): hbox = gui.hBox(None, box=True) self.mainArea.layout().addWidget(hbox) vbox = gui.hBox(hbox) def spin(value, label, minv, maxv): return gui.spin(vbox, self, value, label=label, minv=minv, maxv=maxv, step=1, alignment=Qt.AlignRight, callback=self.update_marks).box def text_line(): def set_search_string_timer(): self.searchStringTimer.stop() self.searchStringTimer.start(300) return gui.lineEdit(gui.hBox(vbox), self, "mark_text", label="Text: ", orientation=Qt.Horizontal, minimumWidth=50, callback=set_search_string_timer, callbackOnType=True).box def mark_label_starts(): txt = self.mark_text.lower() if not txt: return None labels = self.get_label_data() if labels is None: return None return [ i for i, label in enumerate(labels) if label.lower().startswith(txt) ] def mark_label_contains(): txt = self.mark_text.lower() if not txt: return None labels = self.get_label_data() if labels is None: return None return [ i for i, label in enumerate(labels) if txt in label.lower() ] def mark_text(): txt = self.mark_text.lower() if not txt or self.data is None: return None return [ i for i, inst in enumerate(self.data) if txt in "\x00".join(map(str, inst.list)).lower() ] def mark_reachable(): selected = self.graph.get_selection() if selected is None: return None return self.get_reachable(selected) def mark_close(): selected = self.graph.get_selection() if selected is None: return None neighbours = set(selected) last_round = list(neighbours) for _ in range(self.mark_hops): next_round = set() for neigh in last_round: next_round |= set(self.network[neigh]) neighbours |= next_round last_round = next_round neighbours -= set(selected) return list(neighbours) def mark_from_input(): if self.subset_data is None or self.data is None: return None ids = set(self.subset_data.ids) return [i for i, ex in enumerate(self.data) if ex.id in ids] def mark_most_connections(): n = self.mark_most_conn if n >= self.number_of_nodes: return np.arange(self.number_of_nodes) degrees = np.array(self.network.degree()) # pylint: disable=invalid-unary-operand-type min_degree = np.partition(degrees[:, 1].flatten(), -n)[-n] return degrees[degrees[:, 1] >= min_degree, 0] self.mark_criteria = [ ("(Select criteria for marking)", None, lambda: []), ("Mark nodes whose label starts with", text_line(), mark_label_starts), ("Mark nodes whose label contains", text_line(), mark_label_contains), ("Mark nodes whose data that contains", text_line(), mark_text), ("Mark nodes reachable from selected", None, mark_reachable), ("Mark nodes in vicinity of selection", spin("mark_hops", "Number of hops:", 1, 20), mark_close), ("Mark nodes from subset signal", None, mark_from_input), ("Mark nodes with few connections", spin("mark_max_conn", "Max. connections:", 0, 1000), lambda: [ node for node, degree in self.network.degree() if degree <= self.mark_max_conn ]), ("Mark nodes with many connections", spin("mark_min_conn", "Min. connections:", 1, 1000), lambda: [ node for node, degree in self.network.degree() if degree >= self.mark_min_conn ]), ("Mark nodes with most connections", spin("mark_most_conn", "Number of marked:", 1, 1000), mark_most_connections), ("Mark nodes with more connections than any neighbour", None, lambda: [ node for node, degree in self.network.degree() if degree > max( (deg for _, deg in self.network.degree(self.network[node])), default=0) ]), ("Mark nodes with more connections than average neighbour", None, lambda: [ node for node, degree in self.network.degree() if degree > np.mean([ deg for _, deg in self.network.degree(self.network[node]) ] or [0]) ]) ] cb = gui.comboBox(hbox, self, "mark_mode", items=[item for item, *_ in self.mark_criteria], maximumContentsLength=-1, callback=self.set_mark_mode) hbox.layout().insertWidget(0, cb) gui.rubber(hbox) self.btselect = gui.button(hbox, self, "Select", callback=self.select_marked) self.btadd = gui.button(hbox, self, "Add to Selection", callback=self.select_add_marked) self.btgroup = gui.button(hbox, self, "Add New Group", callback=self.select_as_group) def set_mark_mode(self, mode=None): if mode is not None: self.mark_mode = mode for i, (_, widget, _) in enumerate(self.mark_criteria): if widget: if i == self.mark_mode: widget.show() else: widget.hide() self.searchStringTimer.stop() self.update_marks() def update_marks(self): if self.network is None: return to_mark = self.mark_criteria[self.mark_mode][2]() if to_mark is None or not len(to_mark): self.marked_nodes = None else: self.marked_nodes = np.asarray(to_mark) self.graph.update_marks() if self.graph.label_only_selected: self.graph.update_labels() self.update_selection_buttons() def update_selection_buttons(self): if self.marked_nodes is None: self.btselect.hide() self.btadd.hide() self.btgroup.hide() return else: self.btselect.show() selection = self.graph.get_selection() if not len(selection) or np.max(selection) == 0: self.btadd.hide() self.btgroup.hide() elif np.max(selection) == 1: self.btadd.setText("Add to Selection") self.btadd.show() self.btgroup.hide() else: self.btadd.setText("Add to Group") self.btadd.show() self.btgroup.show() def selection_changed(self): super().selection_changed() self.update_selection_buttons() self.update_marks() def select_marked(self): self.graph.selection_select(self.marked_nodes) def select_add_marked(self): self.graph.selection_append(self.marked_nodes) def select_as_group(self): self.graph.selection_new_group(self.marked_nodes) def on_change_label_attr(self): if self.mark_mode in (1, 2): self.update_marks() @Inputs.node_data def set_node_data(self, data): self.node_data = data @Inputs.node_subset def set_node_subset(self, data): super().set_subset_data(data) @Inputs.node_distances def set_items_distance_matrix(self, matrix): self.distance_matrix = matrix self.positions = None @Inputs.network def set_graph(self, graph): def set_graph_none(error=None): if error is not None: error() self.network = None self.number_of_nodes = self.edges_per_node = 0 self.number_of_edges = self.nodes_per_edge = 0 def compute_stats(): self.number_of_nodes = graph.number_of_nodes() self.number_of_edges = graph.number_of_edges() self.edges_per_node = self.number_of_edges / self.number_of_nodes self.nodes_per_edge = \ self.number_of_nodes / max(1, self.number_of_edges) if not graph or graph.number_of_nodes == 0: set_graph_none() return if graph.number_of_nodes() + graph.number_of_edges() > 30000: set_graph_none(self.Error.network_too_large) return self.Error.clear() self.mark_text = "" self.set_mark_mode(0) self.network = graph compute_stats() self.positions = None def handleNewSignals(self): network = self.network def set_actual_data(): self.closeContext() self.Error.data_size_mismatch.clear() self.Warning.no_graph_found.clear() self._invalid_data = False if network is None: if self.node_data is not None: self.Warning.no_graph_found() return if self.node_data is not None: if len(self.node_data) != self.number_of_nodes: self.Error.data_size_mismatch() self._invalid_data = True self.data = None else: self.data = self.node_data if self.node_data is None: self.data = network.items() if self.data is not None: # Replicate the necessary parts of set_data self.valid_data = np.full(len(self.data), True, dtype=np.bool) self.init_attr_values() self.openContext(self.data) self.cb_class_density.setEnabled(self.can_draw_density()) def set_actual_edges(): def set_checkboxes(value): self.checkbox_show_weights.setEnabled(value) self.checkbox_relative_edges.setEnabled(value) self.Warning.distance_matrix_mismatch.clear() if self.network is None: self.edges = None set_checkboxes(False) return set_checkboxes(True) edges = network.edges(data='weight') if edges: row, col, data = zip(*edges) if all(w is None for w in data): data = np.ones((len(data), ), dtype=float) self.edges = sp.coo_matrix((data, (row, col))) else: self.edges = sp.coo_matrix((0, 3)) if self.distance_matrix is not None: if len(self.distance_matrix) != self.number_of_nodes: self.Warning.distance_matrix_mismatch() else: self.edges.data = np.fromiter( (self.distance_matrix[u, v] for u, v in zip(self.edges.row, self.edges.col)), dtype=np.int32, count=len(self.edges.row)) if np.allclose(self.edges.data, 0): self.edges.data[:] = 1 set_checkboxes(False) elif len(set(self.edges.data)) == 1: set_checkboxes(False) self.stop_optimization_and_wait() set_actual_data() if self.positions is None: set_actual_edges() self.set_random_positions() self.graph.reset_graph() self.relayout() else: self.graph.update_point_props() self.update_marks() self.update_selection_buttons() def set_random_positions(self): self.positions = np.random.uniform(size=(self.number_of_nodes, 2)) def get_reachable(self, initial): to_check = list(initial) reachable = set(to_check) for node in to_check: new_checks = set(self.network[node]) - reachable to_check += new_checks reachable |= new_checks return list(reachable) def send_data(self): super().send_data() Outputs = self.Outputs selected_indices = self.graph.get_selection() if selected_indices is None or len(selected_indices) == 0: Outputs.subgraph.send(None) Outputs.unselected_subgraph.send(self.network) Outputs.distances.send(None) return selection = self.graph.selection subgraph = self.network.subgraph(selected_indices) sub_data = \ self._get_selected_data(self.data, selected_indices, selection) subgraph.set_items(sub_data) Outputs.subgraph.send(subgraph) Outputs.unselected_subgraph.send( self.network.subgraph(np.flatnonzero(selection == 0))) distances = self.distance_matrix if distances is None: Outputs.distances.send(None) else: Outputs.distances.send( distances.submatrix(sorted(selected_indices))) def get_coordinates_data(self): if self.positions is not None: return self.positions.T else: return None, None def get_embedding(self): return self.positions def get_subset_mask(self): if self.data is None: return None return super().get_subset_mask() def get_edges(self): return self.edges def get_marked_nodes(self): return self.marked_nodes def set_buttons(self, running): self.stop_button.setHidden(not running) self.relayout_button.setHidden(running) def stop_relayout(self): self._stop_optimization = True self.set_buttons(running=False) # TODO: Stop relayout if new data is received def relayout(self): if self.edges is None: return if self.randomizePositions: self.set_random_positions() self.progressbar = gui.ProgressBar(self, FR_ITERATIONS) self.set_buttons(running=True) self._stop_optimization = False Simplifications = self.graph.Simplifications self.graph.set_simplifications(Simplifications.NoDensity + Simplifications.NoLabels * (len(self.graph.labels) > 20) + Simplifications.NoEdgeLabels * (len(self.graph.edge_labels) > 20) + Simplifications.NoEdges * (self.number_of_edges > 1000)) large_graph = self.number_of_nodes + self.number_of_edges > 20000 iterations = 5 if large_graph else FR_ITERATIONS class LayoutOptimizer(QObject): update = Signal(np.ndarray, float) done = Signal(np.ndarray) stopped = Signal() def __init__(self, widget): super().__init__() self.widget = widget def send_update(self, positions, progress): if not large_graph: self.update.emit(np.array(positions), progress) return not self.widget._stop_optimization def run(self): widget = self.widget edges = widget.edges positions = np.array( fruchterman_reingold( edges.data, edges.row, edges.col, 1 / np.sqrt(widget.number_of_nodes), # k widget.positions, np.array([], dtype=np.int32), # fixed iterations, 0.1, # sample ratio self.send_update, 0.25)) self.done.emit(positions) self.stopped.emit() def update(positions, progress): self.progressbar.advance(progress) self.positions = positions self.graph.update_coordinates() def done(positions): self.positions = positions self.set_buttons(running=False) self.graph.set_simplifications( self.graph.Simplifications.NoSimplifications) self.graph.update_coordinates() self.progressbar.finish() def thread_finished(): self._optimizer = None self._animation_thread = None self._optimizer = LayoutOptimizer(self) self._animation_thread = QThread() self._optimizer.update.connect(update) self._optimizer.done.connect(done) self._optimizer.stopped.connect(self._animation_thread.quit) self._optimizer.moveToThread(self._animation_thread) self._animation_thread.started.connect(self._optimizer.run) self._animation_thread.finished.connect(thread_finished) self._animation_thread.start() def stop_optimization_and_wait(self): if self._animation_thread is not None: self._stop_optimization = True self._animation_thread.quit() self._animation_thread.wait() self._animation_thread = None def onDeleteWidget(self): self.stop_optimization_and_wait() super().onDeleteWidget() def send_report(self): self.report_items('Graph info', [ ("Number of vertices", self.network.number_of_nodes()), ("Number of edges", self.network.number_of_edges()), ("Vertices per edge", round(self.nodes_per_edge, 3)), ("Edges per vertex", round(self.edges_per_node, 3)), ]) self.report_data("Data", self.network.items()) if any((self.attr_color, self.attr_shape, self.attr_size, self.attr_label)): self.report_items( "Visual settings", [("Color", self._get_caption_var_name(self.attr_color)), ("Label", self._get_caption_var_name(self.attr_label)), ("Shape", self._get_caption_var_name(self.attr_shape)), ("Size", self._get_caption_var_name(self.attr_size))]) self.report_plot()
class OWInterpolate(widget.OWWidget): name = 'Interpolate' description = 'Induce missing values (nan) in the time series by interpolation.' icon = 'icons/Interpolate.svg' priority = 15 inputs = [("Time series", Table, 'set_data')] outputs = [ (Output.TIMESERIES, Timeseries), (Output.INTERPOLATED, Timeseries), # TODO # (Output.INTERPOLATOR, Model) # TODO ] want_main_area = False resizing_enabled = False interpolation = settings.Setting('linear') multivariate = settings.Setting(False) autoapply = settings.Setting(True) UserAdviceMessages = [ widget.Message( 'While you can freely choose the interpolation method ' 'for continuous variables, discrete variables can only ' 'be interpolated with the <i>nearest</i> method or ' 'their mode (i.e. the most frequent value).', 'discrete-interp', widget.Message.Warning) ] def __init__(self): self.data = None box = gui.vBox(self.controlArea, 'Interpolation Parameters') gui.comboBox(box, self, 'interpolation', callback=self.on_changed, label='Interpolation of missing values:', sendSelectedValue=True, orientation=Qt.Horizontal, items=('linear', 'cubic', 'nearest', 'mean')) gui.checkBox(box, self, 'multivariate', label='Multi-variate interpolation', callback=self.on_changed) gui.auto_commit(box, self, 'autoapply', 'Apply') def set_data(self, data): self.data = None if data is None else Timeseries.from_data_table(data) self.on_changed() def on_changed(self): self.commit() def commit(self): data = self.data if data is not None: data = data.copy() data.set_interpolation(self.interpolation, self.multivariate) self.send(Output.TIMESERIES, data) self.send(Output.INTERPOLATED, try_(lambda: data.interp()) or None)
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 keywords = [] class Inputs: evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) quantities = [ "Number of instances", "Proportion of predicted", "Proportion of actual" ] settings_version = 1 settingsHandler = ClassValuesContextHandler() selected_learner = Setting([0], schema_only=True) selection = ContextSetting(set()) selected_quantity = Setting(0) append_predictions = Setting(True) append_probabilities = Setting(False) autocommit = Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell") ] class Error(widget.OWWidget.Error): no_regression = Msg("Confusion Matrix cannot show regression results.") invalid_values = Msg( "Evaluation Results input contains invalid values") empty_input = widget.Msg("Empty result on input. Nothing to display.") def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.headers = [] self.learners_box = gui.listBox(self.controlArea, self, "selected_learner", "learners", box='Learners', callback=self._learner_changed) self.outputbox = gui.vBox(self.buttonsArea) box = gui.hBox(self.outputbox) gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_apply(self.outputbox, self, "autocommit", box=False) box = gui.vBox(self.mainArea, box=True) sbox = gui.hBox(box) gui.rubber(sbox) gui.comboBox(sbox, self, "selected_quantity", items=self.quantities, label="Show: ", orientation=Qt.Horizontal, callback=self._update) self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) view.clicked.connect(self.cell_clicked) box.layout().addWidget(view) selbox = gui.hBox(box) gui.button(selbox, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(selbox, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(selbox, self, "Clear Selection", callback=self.select_none, autoDefault=False) @staticmethod def sizeHint(): """Initial size""" return QSize(750, 340) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def _init_table(self, nclasses): item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, nclasses) self.tableview.setSpan(2, 0, nclasses, 1) font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(self.headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setSectionResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(nclasses + 3) self.tablemodel.setColumnCount(nclasses + 3) @Inputs.evaluation_results def set_results(self, results): """Set the input results.""" # false positive, pylint: disable=no-member prev_sel_learner = self.selected_learner.copy() self.clear() self.warning() self.closeContext() data = None if results is not None and results.data is not None: data = results.data[results.row_indices] self.Error.no_regression.clear() self.Error.empty_input.clear() if data is not None and not data.domain.has_discrete_class: self.Error.no_regression() data = results = None elif results is not None and not results.actual.size: self.Error.empty_input() data = results = None nan_values = False if results is not None: assert isinstance(results, Orange.evaluation.Results) if np.any(np.isnan(results.actual)) or \ np.any(np.isnan(results.predicted)): # Error out here (could filter them out with a warning # instead). nan_values = True results = data = None self.Error.invalid_values(shown=nan_values) self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) return self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] self.headers = class_values + \ (unicodedata.lookup("N-ARY SUMMATION"), ) # NOTE: The 'learner_names' is set in 'Test Learners' widget. self.learners = getattr(results, "learner_names", [f"Learner #{i + 1}" for i in range(nmodels)]) self._init_table(len(class_values)) self.openContext(data.domain.class_var) if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners): if self.learners: self.selected_learner[:] = [0] else: self.selected_learner[:] = prev_sel_learner self._update() self._set_selection() self.commit.now() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_wrong(self): """Select the off-diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _prepare_data(self): indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner[0]] predicted = self.results.predicted[self.selected_learner[0]] selected = [ i for i, t in enumerate(zip(actual, predicted)) if t in indices ] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas attrs = self.data.domain.attributes names = [var.name for var in chain(metas, [class_var], attrs)] if self.append_predictions: extra.append(predicted.reshape(-1, 1)) proposed = "{}({})".format(class_var.name, learner_name) name = get_unique_names(names, proposed) var = Orange.data.DiscreteVariable(name, class_var.values) metas = metas + (var, ) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner[0]] extra.append(np.array(probs, dtype=object)) pvars = [ Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values ] metas = metas + tuple(pvars) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = self.data.transform(domain) if extra: with data.unlocked(data.metas): data.metas[:, len(self.data.domain.metas):] = \ np.hstack(tuple(extra)) data.name = learner_name if selected: annotated_data = create_annotated_table(data, selected) data = data[selected] else: annotated_data = create_annotated_table(data, []) data = None return data, annotated_data @gui.deferred def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner: data, annotated_data = self._prepare_data() else: data = None annotated_data = None self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send(annotated_data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit.deferred() def _set_selection(self): selection = QItemSelection() index = self.tableview.model().index for row, col in self.selection: sel = index(row + 2, col + 2) selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _learner_changed(self): self._update() self._set_selection() self.commit.deferred() def _update(self): def _isinvalid(x): return isnan(x) or isinf(x) # Update the displayed confusion matrix if self.results is not None and self.selected_learner: cmatrix = confusion_matrix(self.results, self.selected_learner[0]) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = np.diag_indices(n) colors = cmatrix.astype(np.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(int) formatstr = "{}" div = np.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, np.newaxis] div = colors.max(axis=1)[:, np.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div maxval = normalized[diag].max() if maxval > 0: colors[diag] = normalized[diag] / maxval for i in range(n): for j in range(n): val = normalized[i, j] col_val = colors[i, j] item = self._item(i + 2, j + 2) item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) # bkcolor is light-ish so use a black text item.setData(QBrush(Qt.black), Qt.ForegroundRole) item.setData("trbl", BorderRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(n): self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t")) self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l")) self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum()))) def send_report(self): """Send report""" if self.results is not None and self.selected_learner: self.report_table( "Confusion matrix for {} (showing {})".format( self.learners[self.selected_learner[0]], self.quantities[self.selected_quantity].lower()), self.tableview) @classmethod def migrate_settings(cls, settings, version): if not version: # For some period of time the 'selected_learner' property was # changed from List[int] -> int # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0) if "selected_learner" in settings and \ isinstance(settings["selected_learner"], int): settings["selected_learner"] = [settings["selected_learner"]]
class OWPreprocess(OWWidget): name = 'Preprocess Text' description = 'Construct a text pre-processing pipeline.' icon = 'icons/TextPreprocess.svg' priority = 200 class Inputs: corpus = Input("Corpus", Corpus) class Outputs: corpus = Output("Corpus", Corpus) autocommit = settings.Setting(True) preprocessors = [ TransformationModule, TokenizerModule, NormalizationModule, FilteringModule, NgramsModule, POSTaggingModule, ] transformers = settings.SettingProvider(TransformationModule) tokenizer = settings.SettingProvider(TokenizerModule) normalizer = settings.SettingProvider(NormalizationModule) filters = settings.SettingProvider(FilteringModule) ngrams_range = settings.SettingProvider(NgramsModule) pos_tagger = settings.SettingProvider(POSTaggingModule) control_area_width = 180 buttons_area_orientation = Qt.Vertical UserAdviceMessages = [ widget.Message( "Some preprocessing methods require data (like word relationships, stop words, " "punctuation rules etc.) from the NLTK package. This data was downloaded " "to: {}".format(nltk_data_dir()), "nltk_data") ] class Error(OWWidget.Error): stanford_tagger = Msg("Problem while loading Stanford POS Tagger\n{}") stopwords_encoding = Msg( "Invalid stopwords file encoding. Please save the file as UTF-8 and try again." ) lexicon_encoding = Msg( "Invalid lexicon file encoding. Please save the file as UTF-8 and try again." ) error_reading_stopwords = Msg("Error reading file: {}") error_reading_lexicon = Msg("Error reading file: {}") class Warning(OWWidget.Warning): no_token_left = Msg( 'No tokens on output! Please, change configuration.') udpipe_offline = Msg( 'No internet connection! UDPipe now only works with local models.') udpipe_offline_no_models = Msg( 'No internet connection and no local UDPipe models are available.') def __init__(self, parent=None): super().__init__(parent) self.corpus = None self.initial_ngram_range = None # initial range of input corpus — used for inplace self.preprocessor = preprocess.Preprocessor() # -- INFO -- info_box = gui.widgetBox(self.controlArea, 'Info') info_box.setFixedWidth(self.control_area_width) self.controlArea.layout().addStretch() self.info_label = gui.label(info_box, self, '') self.update_info() # -- PIPELINE -- frame = QFrame() frame.setContentsMargins(0, 0, 0, 0) frame.setFrameStyle(QFrame.Box) frame.setStyleSheet('.QFrame { border: 1px solid #B3B3B3; }') frame_layout = QVBoxLayout() frame_layout.setContentsMargins(0, 0, 0, 0) frame_layout.setSpacing(0) frame.setLayout(frame_layout) self.stages = [] for stage in self.preprocessors: widget = stage(self) self.stages.append(widget) setattr(self, stage.attribute, widget) frame_layout.addWidget(widget) widget.change_signal.connect(self.settings_invalidated) frame_layout.addStretch() self.scroll = QScrollArea() self.scroll.setWidget(frame) self.scroll.setWidgetResizable(True) self.scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn) self.scroll.resize(frame_layout.sizeHint()) self.scroll.setMinimumHeight(500) self.set_minimal_width() self.mainArea.layout().sizeHint() self.mainArea.layout().addWidget(self.scroll) # Buttons area self.report_button.setFixedWidth(self.control_area_width) commit_button = gui.auto_commit(self.buttonsArea, self, 'autocommit', 'Commit', box=False) commit_button.setFixedWidth(self.control_area_width) self.buttonsArea.layout().addWidget(commit_button) @Inputs.corpus def set_data(self, data=None): self.corpus = data.copy() if data is not None else None self.initial_ngram_range = data.ngram_range if data is not None else None self.commit() def update_info(self, corpus=None): if corpus is not None: info = 'Document count: {}\n' \ 'Total tokens: {}\n'\ 'Total types: {}'\ .format(len(corpus), sum(map(len, corpus.tokens)), len(corpus.dictionary)) else: info = 'No corpus.' self.info_label.setText(info) def commit(self): self.Warning.no_token_left.clear() if self.corpus is not None: self.apply() else: self.update_info() self.Outputs.corpus.send(None) def apply(self): self.preprocess() @asynchronous def preprocess(self): for module in self.stages: setattr(self.preprocessor, module.attribute, module.value) self.corpus.pos_tags = None # reset pos_tags and ngrams_range self.corpus.ngram_range = self.initial_ngram_range return self.preprocessor(self.corpus, inplace=True, on_progress=self.on_progress) @preprocess.on_start def on_start(self): self.progressBarInit() @preprocess.callback def on_progress(self, i): self.progressBarSet(i) @preprocess.on_result def on_result(self, result): self.update_info(result) if result is not None and len(result.dictionary) == 0: self.Warning.no_token_left() result = None self.Outputs.corpus.send(result) self.progressBarFinished() def set_minimal_width(self): max_width = 250 for widget in self.stages: if widget.enabled: max_width = max(max_width, widget.sizeHint().width()) self.scroll.setMinimumWidth(max_width + 20) @pyqtSlot() def settings_invalidated(self): self.set_minimal_width() self.commit() def send_report(self): self.report_items('Preprocessor', self.preprocessor.report())
class OWMap(widget.OWWidget): name = 'Geo Map' description = 'Show data points on a world map.' icon = "icons/GeoMap.svg" priority = 100 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) learner = Input("Learner", Learner) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) replaces = [ "Orange.widgets.visualize.owmap.OWMap", ] settingsHandler = settings.DomainContextHandler() want_main_area = True autocommit = settings.Setting(True) tile_provider = settings.Setting('Black and white') lat_attr = settings.ContextSetting('') lon_attr = settings.ContextSetting('') class_attr = settings.ContextSetting('(None)') color_attr = settings.ContextSetting('') label_attr = settings.ContextSetting('') shape_attr = settings.ContextSetting('') size_attr = settings.ContextSetting('') opacity = settings.Setting(100) zoom = settings.Setting(100) jittering = settings.Setting(0) cluster_points = settings.Setting(False) show_legend = settings.Setting(True) TILE_PROVIDERS = OrderedDict(( ('Black and white', 'OpenStreetMap.BlackAndWhite'), ('OpenStreetMap', 'OpenStreetMap.Mapnik'), ('Topographic', 'Thunderforest.OpenCycleMap'), ('Topographic 2', 'Thunderforest.Outdoors'), ('Satellite', 'Esri.WorldImagery'), ('Print', 'Stamen.TonerLite'), ('Dark', 'CartoDB.DarkMatter'), ('Watercolor', 'Stamen.Watercolor'), )) class Error(widget.OWWidget.Error): model_error = widget.Msg("Error predicting: {}") learner_error = widget.Msg("Error modelling: {}") class Warning(widget.OWWidget.Warning): all_nan_slice = widget.Msg( 'Latitude and/or longitude has no defined values (is all-NaN)') UserAdviceMessages = [ widget.Message( 'Select markers by holding <b><kbd>Shift</kbd></b> key and dragging ' 'a rectangle around them. Clear the selection by clicking anywhere.', 'shift-selection') ] graph_name = "map" def __init__(self): super().__init__() self.map = map = LeafletMap(self) # type: LeafletMap self.mainArea.layout().addWidget(map) self.selection = None self.data = None self.learner = None def selectionChanged(indices): self.selection = self.data[ indices] if self.data is not None and indices else None self._indices = indices self.commit() map.selectionChanged.connect(selectionChanged) def _set_map_provider(): map.set_map_provider(self.TILE_PROVIDERS[self.tile_provider]) box = gui.vBox(self.controlArea, 'Map') gui.comboBox(box, self, 'tile_provider', orientation=Qt.Horizontal, label='Map:', items=tuple(self.TILE_PROVIDERS.keys()), sendSelectedValue=True, callback=_set_map_provider) self._latlon_model = DomainModel(parent=self, valid_types=ContinuousVariable) self._class_model = DomainModel(parent=self, placeholder='(None)', valid_types=DomainModel.PRIMITIVE) self._color_model = DomainModel(parent=self, placeholder='(Same color)', valid_types=DomainModel.PRIMITIVE) self._shape_model = DomainModel(parent=self, placeholder='(Same shape)', valid_types=DiscreteVariable) self._size_model = DomainModel(parent=self, placeholder='(Same size)', valid_types=ContinuousVariable) self._label_model = DomainModel(parent=self, placeholder='(No labels)') def _set_lat_long(): self.map.set_data(self.data, self.lat_attr, self.lon_attr) self.train_model() self._combo_lat = combo = gui.comboBox(box, self, 'lat_attr', orientation=Qt.Horizontal, label='Latitude:', sendSelectedValue=True, callback=_set_lat_long) combo.setModel(self._latlon_model) self._combo_lon = combo = gui.comboBox(box, self, 'lon_attr', orientation=Qt.Horizontal, label='Longitude:', sendSelectedValue=True, callback=_set_lat_long) combo.setModel(self._latlon_model) def _toggle_legend(): self.map.toggle_legend(self.show_legend) gui.checkBox(box, self, 'show_legend', label='Show legend', callback=_toggle_legend) box = gui.vBox(self.controlArea, 'Overlay') self._combo_class = combo = gui.comboBox(box, self, 'class_attr', orientation=Qt.Horizontal, label='Target:', sendSelectedValue=True, callback=self.train_model) self.controls.class_attr.setModel(self._class_model) self.set_learner(self.learner) box = gui.vBox(self.controlArea, 'Points') self._combo_color = combo = gui.comboBox( box, self, 'color_attr', orientation=Qt.Horizontal, label='Color:', sendSelectedValue=True, callback=lambda: self.map.set_marker_color(self.color_attr)) combo.setModel(self._color_model) self._combo_label = combo = gui.comboBox( box, self, 'label_attr', orientation=Qt.Horizontal, label='Label:', sendSelectedValue=True, callback=lambda: self.map.set_marker_label(self.label_attr)) combo.setModel(self._label_model) self._combo_shape = combo = gui.comboBox( box, self, 'shape_attr', orientation=Qt.Horizontal, label='Shape:', sendSelectedValue=True, callback=lambda: self.map.set_marker_shape(self.shape_attr)) combo.setModel(self._shape_model) self._combo_size = combo = gui.comboBox( box, self, 'size_attr', orientation=Qt.Horizontal, label='Size:', sendSelectedValue=True, callback=lambda: self.map.set_marker_size(self.size_attr)) combo.setModel(self._size_model) def _set_opacity(): map.set_marker_opacity(self.opacity) def _set_zoom(): map.set_marker_size_coefficient(self.zoom) def _set_jittering(): map.set_jittering(self.jittering) def _set_clustering(): map.set_clustering(self.cluster_points) self._opacity_slider = gui.hSlider(box, self, 'opacity', None, 1, 100, 5, label='Opacity:', labelFormat=' %d%%', callback=_set_opacity) self._zoom_slider = gui.valueSlider(box, self, 'zoom', None, values=(20, 50, 100, 200, 300, 400, 500, 700, 1000), label='Symbol size:', labelFormat=' %d%%', callback=_set_zoom) self._jittering = gui.valueSlider(box, self, 'jittering', label='Jittering:', values=(0, .5, 1, 2, 5), labelFormat=' %.1f%%', ticks=True, callback=_set_jittering) self._clustering_check = gui.checkBox(box, self, 'cluster_points', label='Cluster points', callback=_set_clustering) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, 'autocommit', 'Send Selection') QTimer.singleShot(0, _set_map_provider) QTimer.singleShot(0, _toggle_legend) QTimer.singleShot(0, _set_opacity) QTimer.singleShot(0, _set_zoom) QTimer.singleShot(0, _set_jittering) QTimer.singleShot(0, _set_clustering) autocommit = settings.Setting(True) def __del__(self): self.progressBarFinished(None) self.map = None def commit(self): self.Outputs.selected_data.send(self.selection) self.Outputs.annotated_data.send( create_annotated_table(self.data, self._indices)) @Inputs.data def set_data(self, data): self.data = data self.closeContext() if data is None or not len(data): return self.clear() domain = data is not None and data.domain for model in (self._latlon_model, self._class_model, self._color_model, self._shape_model, self._size_model, self._label_model): model.set_domain(domain) lat, lon = find_lat_lon(data) if lat or lon: self._combo_lat.setCurrentIndex( -1 if lat is None else self._latlon_model.indexOf(lat)) self._combo_lon.setCurrentIndex( -1 if lat is None else self._latlon_model.indexOf(lon)) self.lat_attr = lat.name self.lon_attr = lon.name if data.domain.class_var: self.color_attr = data.domain.class_var.name elif len(self._color_model): self._combo_color.setCurrentIndex(0) if len(self._shape_model): self._combo_shape.setCurrentIndex(0) if len(self._size_model): self._combo_size.setCurrentIndex(0) if len(self._label_model): self._combo_label.setCurrentIndex(0) if len(self._class_model): self._combo_class.setCurrentIndex(0) self.openContext(data) self.map.set_data(self.data, self.lat_attr, self.lon_attr) self.map.set_marker_color(self.color_attr, update=False) self.map.set_marker_label(self.label_attr, update=False) self.map.set_marker_shape(self.shape_attr, update=False) self.map.set_marker_size(self.size_attr, update=True) @Inputs.data_subset def set_subset(self, subset): self.map.set_subset_ids( subset.ids if subset is not None else np.array([])) def handleNewSignals(self): super().handleNewSignals() self.train_model() @Inputs.learner def set_learner(self, learner): self.learner = learner self.controls.class_attr.setEnabled(learner is not None) self.controls.class_attr.setToolTip( 'Needs a Learner input for modelling.' if learner is None else '') def train_model(self): model = None self.Error.clear() if self.data and self.learner and self.class_attr != '(None)': domain = self.data.domain if self.lat_attr and self.lon_attr and self.class_attr in domain: domain = Domain([domain[self.lat_attr], domain[self.lon_attr]], [domain[self.class_attr]]) # I am retarded train = Table.from_table(domain, self.data) try: model = self.learner(train) except Exception as e: self.Error.learner_error(e) self.map.set_model(model) def disable_some_controls(self, disabled): tooltip = ("Available when the zoom is close enough to have " "<{} points in the viewport.".format( self.map.N_POINTS_PER_ITER) if disabled else '') for widget in (self._combo_label, self._combo_shape, self._clustering_check): widget.setDisabled(disabled) widget.setToolTip(tooltip) def clear(self): self.map.set_data(None, '', '') for model in (self._latlon_model, self._class_model, self._color_model, self._shape_model, self._size_model, self._label_model): model.set_domain(None) self.lat_attr = self.lon_attr = self.class_attr = self.color_attr = \ self.label_attr = self.shape_attr = self.size_attr = None
class OWMergeData(widget.OWWidget): name = "Merge Data" description = "Merge datasets based on the values of selected features." icon = "icons/MergeData.svg" priority = 1110 keywords = ["join"] class Inputs: data = Input("Data", Orange.data.Table, default=True, replaces=["Data A"]) extra_data = Input("Extra Data", Orange.data.Table, replaces=["Data B"]) class Outputs: data = Output( "Data", Orange.data.Table, replaces=["Merged Data A+B", "Merged Data B+A", "Merged Data"]) LeftJoin, InnerJoin, OuterJoin = range(3) OptionNames = ("Append columns from Extra data", "Find matching pairs of rows", "Concatenate tables") OptionDescriptions = ( "The first table may contain, for instance, city names,\n" "and the second would be a list of cities and their coordinates.\n" "Columns with coordinates would then be appended to the output.", "Input tables contain different features describing the same data " "instances.\n" "Output contains matched instances. Rows without matches are removed.", "Input tables contain different features describing the same data " "instances.\n" "Output contains all instances. Data from merged instances is " "merged into single rows.") UserAdviceMessages = [ widget.Message("Confused about merging options?\nSee the tooltips!", "merging_types") ] settingsHandler = MergeDataContextHandler() attr_pairs = ContextSetting(None, schema_only=True) merging = Setting(LeftJoin) auto_apply = Setting(True) settings_version = 2 want_main_area = False resizing_enabled = False class Warning(widget.OWWidget.Warning): renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") class Error(widget.OWWidget.Error): matching_numeric_with_nonnum = Msg( "Numeric and non-numeric columns ({} and {}) cannot be matched.") matching_index_with_sth = Msg("Row index cannot be matched with {}.") matching_id_with_sth = Msg("Instance cannot be matched with {}.") nonunique_left = Msg( "Some combinations of values on the left appear in multiple rows.\n" "For this type of merging, every possible combination of values " "on the left should appear at most once.") nonunique_right = Msg( "Some combinations of values on the right appear in multiple rows." "\n" "Every possible combination of values on the right should appear " "at most once.") def __init__(self): super().__init__() self.data = None self.extra_data = None content = [ INDEX, INSTANCEID, DomainModel.ATTRIBUTES, DomainModel.CLASSES, DomainModel.METAS ] self.model = DomainModelWithTooltips(content) self.extra_model = DomainModelWithTooltips(content) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) grp = gui.radioButtons(self.controlArea, self, "merging", box="Merging", btnLabels=self.OptionNames, tooltips=self.OptionDescriptions, callback=self.change_merging) self.attr_boxes = ConditionBox(self, self.model, self.extra_model, "", "matches") self.attr_boxes.add_row() box = gui.vBox(self.controlArea, box="Row matching") box.layout().addWidget(self.attr_boxes) gui.auto_apply(self.buttonsArea, self) # connect after wrapping self.commit with gui.auto_commit! self.attr_boxes.vars_changed.connect(self.commit) self.attr_boxes.vars_changed.connect(self.store_combo_state) self.settingsAboutToBePacked.connect(self.store_combo_state) def change_merging(self): self.commit() @Inputs.data @check_sql_input def set_data(self, data): self.data = data self.model.set_domain(data.domain if data else None) @Inputs.extra_data @check_sql_input def set_extra_data(self, data): self.extra_data = data self.extra_model.set_domain(data.domain if data else None) def store_combo_state(self): self.attr_pairs = self.attr_boxes.current_state() def handleNewSignals(self): self.closeContext() self.attr_pairs = [self._find_best_match()] self.openContext(self.data and self.data.domain, self.extra_data and self.extra_data.domain) self.attr_boxes.set_state(self.attr_pairs) summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.extra_data: n_data = len(self.data) if self.data else 0 n_extra_data = len(self.extra_data) if self.extra_data else 0 summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_extra_data)}" kwargs = {"format": Qt.RichText} details = format_multiple_summaries([("Data", self.data), ("Extra data", self.extra_data)]) self.info.set_input_summary(summary, details, **kwargs) self.unconditional_commit() def _find_best_match(self): def get_unique_str_metas_names(model_): return [m for m in model_ if isinstance(m, StringVariable)] attr, extra_attr, n_max_intersect = INDEX, INDEX, 0 str_metas = get_unique_str_metas_names(self.model) extra_str_metas = get_unique_str_metas_names(self.extra_model) for m_a, m_b in product(str_metas, extra_str_metas): col = self.data[:, m_a].metas extra_col = self.extra_data[:, m_b].metas if col.size and extra_col.size \ and isinstance(col[0][0], str) \ and isinstance(extra_col[0][0], str): n_inter = len(np.intersect1d(col, extra_col)) if n_inter > n_max_intersect: n_max_intersect, attr, extra_attr = n_inter, m_a, m_b return attr, extra_attr def commit(self): self.clear_messages() merged = self.merge() if self.data and self.extra_data else None self.Outputs.data.send(merged) details = format_summary_details(merged) if merged else "" summary = len(merged) if merged else self.info.NoOutput self.info.set_output_summary(summary, details) def send_report(self): # pylint: disable=invalid-sequence-index self.report_items( (("Merging", self.OptionNames[self.merging]), ("Match", ", ".join( f"{self._get_col_name(left)} with {self._get_col_name(right)}" for left, right in self.attr_boxes.current_state())))) def merge(self): # pylint: disable=invalid-sequence-index pairs = self.attr_boxes.current_state() if not self._check_pair_types(pairs): return None left_vars, right_vars = zip(*pairs) left_mask = np.full(len(self.data), True) left = np.vstack( tuple( self._values(self.data, var, left_mask) for var in left_vars)).T right_mask = np.full(len(self.extra_data), True) right = np.vstack( tuple( self._values(self.extra_data, var, right_mask) for var in right_vars)).T if not self._check_uniqueness(left, left_mask, right, right_mask): return None method = self._merge_methods[self.merging] lefti, righti, rightu = method(self, left, left_mask, right, right_mask) reduced_extra_data = \ self._compute_reduced_extra_data(right_vars, lefti, righti, rightu) return self._join_table_by_indices(reduced_extra_data, lefti, righti, rightu) def _check_pair_types(self, pairs): for left, right in pairs: if isinstance(left, ContinuousVariable) \ != isinstance(right, ContinuousVariable): self.Error.matching_numeric_with_nonnum(left, right) return False if INDEX in (left, right) and left != right: self.Error.matching_index_with_sth( self._get_col_name(({left, right} - {INDEX}).pop())) return False if INSTANCEID in (left, right) and left != right: self.Error.matching_id_with_sth( self._get_col_name(({left, right} - {INSTANCEID}).pop())) return False return True @staticmethod def _get_col_name(obj): return f"'{obj.name}'" if isinstance(obj, Variable) else obj.lower() def _check_uniqueness(self, left, left_mask, right, right_mask): ok = True masked_right = right[right_mask] if len(set(map(tuple, masked_right))) != len(masked_right): self.Error.nonunique_right() ok = False if self.merging != self.LeftJoin: masked_left = left[left_mask] if len(set(map(tuple, masked_left))) != len(masked_left): self.Error.nonunique_left() ok = False return ok def _compute_reduced_extra_data(self, right_match_vars, lefti, righti, rightu): """Prepare a table with extra columns that will appear in the merged table""" domain = self.data.domain extra_domain = self.extra_data.domain def var_needed(var): if rightu is not None and rightu.size: return True if var in right_match_vars and self.merging != self.OuterJoin: return False if var not in domain: return True both_defined = (lefti != -1) * (righti != -1) left_col = \ self.data.get_column_view(var)[0][lefti[both_defined]] right_col = \ self.extra_data.get_column_view(var)[0][righti[both_defined]] if var.is_primitive(): left_col = left_col.astype(float) right_col = right_col.astype(float) mask_left = np.isfinite(left_col) mask_right = np.isfinite(right_col) return not (np.all(mask_left == mask_right) and np.all( left_col[mask_left] == right_col[mask_right])) else: return not np.all(left_col == right_col) extra_vars = [ var for var in chain(extra_domain.variables, extra_domain.metas) if var_needed(var) ] return self.extra_data[:, extra_vars] @staticmethod def _values(data, var, mask): """Return an iterotor over keys for rows of the table.""" if var == INDEX: return np.arange(len(data)) if var == INSTANCEID: return np.fromiter((inst.id for inst in data), count=len(data), dtype=np.int) col = data.get_column_view(var)[0] if var.is_primitive(): col = col.astype(float, copy=False) nans = np.isnan(col) mask *= ~nans if var.is_discrete: col = col.astype(int) col[nans] = len(var.values) col = np.array(var.values + (np.nan, ))[col] else: col = col.copy() defined = col.astype(bool) mask *= defined col[~mask] = np.nan return col def _left_join_indices(self, left, left_mask, right, right_mask): """Compute a two-row array of indices: - the first row contains indices for the primary table, - the second row contains the matching rows in the extra table or -1""" data = self.data # Don't match nans. This is needed since numpy may change nan to string # nan, so nan's will match each other indices = np.arange(len(right)) indices[~right_mask] = -1 if right.shape[1] == 1: # The more common case can be handled faster right_map = dict(zip(right.flatten(), indices)) righti = (right_map.get(val, -1) for val in left.flatten()) else: right_map = dict(zip(map(tuple, right), indices)) righti = (right_map.get(tuple(val), -1) for val in left) righti = np.fromiter(righti, dtype=np.int64, count=len(data)) lefti = np.arange(len(data), dtype=np.int64) righti[lefti[~left_mask]] = -1 return lefti, righti, None def _inner_join_indices(self, left, left_mask, right, right_mask): """Use _augment_indices to compute the array of indices, then remove those with no match in the second table""" lefti, righti, _ = \ self._left_join_indices(left, left_mask, right, right_mask) mask = righti != [-1] return lefti[mask], righti[mask], None def _outer_join_indices(self, left, left_mask, right, right_mask): """Use _augment_indices to compute the array of indices, then add rows in the second table without a match in the first""" lefti, righti, _ = \ self._left_join_indices(left, left_mask, right, right_mask) unused = np.full(len(right), True) unused[righti] = False if len(right) - 1 not in righti: # righti can include -1, which sets the last element as used unused[-1] = True return lefti, righti, np.nonzero(unused)[0] _merge_methods = [ _left_join_indices, _inner_join_indices, _outer_join_indices ] def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu): """Join (horizontally) self.data and reduced_extra, taking the pairs of rows given in indices""" if not lefti.size: return None lt_dom = self.data.domain xt_dom = reduced_extra.domain domain = self._domain_rename_duplicates( lt_dom.attributes + xt_dom.attributes, lt_dom.class_vars + xt_dom.class_vars, lt_dom.metas + xt_dom.metas) X = self._join_array_by_indices(self.data.X, reduced_extra.X, lefti, righti) Y = self._join_array_by_indices(np.c_[self.data.Y], np.c_[reduced_extra.Y], lefti, righti) string_cols = [ i for i, var in enumerate(domain.metas) if var.is_string ] metas = self._join_array_by_indices(self.data.metas, reduced_extra.metas, lefti, righti, string_cols) if rightu is not None: # This domain is used for transforming the extra rows for outer join # It must use the original - not renamed - variables from right, so # values are copied, # but new domain for the left, so renamed values are *not* copied right_domain = Orange.data.Domain( domain.attributes[:len(lt_dom.attributes)] + xt_dom.attributes, domain.class_vars[:len(lt_dom.class_vars)] + xt_dom.class_vars, domain.metas[:len(lt_dom.metas)] + xt_dom.metas) extras = self.extra_data[rightu].transform(right_domain) X = np.vstack((X, extras.X)) extras_Y = extras.Y if extras_Y.ndim == 1: extras_Y = extras_Y.reshape(-1, 1) Y = np.vstack((Y, extras_Y)) metas = np.vstack((metas, extras.metas)) table = Orange.data.Table.from_numpy(domain, X, Y, metas) table.name = getattr(self.data, 'name', '') table.attributes = getattr(self.data, 'attributes', {}) if rightu is not None: table.ids = np.hstack((self.data.ids, self.extra_data.ids[rightu])) else: table.ids = self.data.ids[lefti] return table def _domain_rename_duplicates(self, attributes, class_vars, metas): """Check for duplicate variable names in domain. If any, rename the variables, by replacing them with new ones (names are appended a number). """ attrs, cvars, mets = [], [], [] n_attrs, n_cvars, n_metas = len(attributes), len(class_vars), len( metas) lists = [attrs] * n_attrs + [cvars] * n_cvars + [mets] * n_metas all_vars = attributes + class_vars + metas proposed_names = [m.name for m in all_vars] unique_names = get_unique_names_duplicates(proposed_names) duplicates = set() for p_name, u_name, var, c in zip(proposed_names, unique_names, all_vars, lists): if p_name != u_name: duplicates.add(p_name) var = var.copy(name=u_name) c.append(var) if duplicates: self.Warning.renamed_vars(", ".join(duplicates)) return Orange.data.Domain(attrs, cvars, mets) @staticmethod def _join_array_by_indices(left, right, lefti, righti, string_cols=None): """Join (horizontally) two arrays, taking pairs of rows given in indices """ def prepare(arr, inds, str_cols): try: newarr = arr[inds] except IndexError: newarr = np.full_like(arr, np.nan) else: empty = np.full(arr.shape[1], np.nan) if str_cols: assert arr.dtype == object empty = empty.astype(object) empty[str_cols] = '' newarr[inds == -1] = empty return newarr left_width = left.shape[1] str_left = [i for i in string_cols or () if i < left_width] str_right = [ i - left_width for i in string_cols or () if i >= left_width ] res = hstack((prepare(left, lefti, str_left), prepare(right, righti, str_right))) return res @staticmethod def migrate_settings(settings, version=None): def mig_value(x): if x == "Position (index)": return INDEX if x == "Source position (index)": return INSTANCEID return x if not version: operations = ("augment", "merge", "combine") oper = operations[settings["merging"]] settings["attr_pairs"] = (True, True, [ (mig_value(settings[f"attr_{oper}_data"]), mig_value(settings[f"attr_{oper}_extra"])) ]) for oper in operations: del settings[f"attr_{oper}_data"] del settings[f"attr_{oper}_extra"] if not version or version < 2 and "attr_pairs" in settings: data_exists, extra_exists, attr_pairs = settings.pop("attr_pairs") if not (data_exists and extra_exists): settings["context_settings"] = [] return mapper = {0: (INDEX, 100), 1: (INSTANCEID, 100)} context = ContextHandler().new_context() context.values["attr_pairs"] = [ tuple(mapper.get(var, (var, 100)) for var in pair) for pair in attr_pairs ] context.variables1 = {} context.variables2 = {} settings["context_settings"] = [context]
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 class Inputs: train_data = Input("Data", Table, default=True) test_data = Input("Test Data", Table) learner = Input("Learner", Learner, multiple=True) preprocessor = Input("Preprocessor", Preprocess) class Outputs: predictions = Output("Predictions", Table) evaluations_results = Output("Evaluation Results", Results) settings_version = 3 UserAdviceMessages = [ widget.Message("Click on the table header to select shown columns", "click_header") ] settingsHandler = settings.PerfectDomainContextHandler() #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) BUILTIN_ORDER = { DiscreteVariable: ("AUC", "CA", "F1", "Precision", "Recall"), ContinuousVariable: ("MSE", "RMSE", "MAE", "R2") } shown_scores = \ settings.Setting(set(chain(*BUILTIN_ORDER.values()))) class Error(OWWidget.Error): train_data_empty = Msg("Train dataset is empty.") test_data_empty = Msg("Test dataset is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train datasets " "have different target variables.") memory_error = Msg("Not enough memory.") no_class_values = Msg("Target variable has no values.") only_one_class_var_value = Msg("Target variable has only one value.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False self.scorers = [] #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() # type: Dict[Any, Input] self.__state = State.Waiting # Do we need to [re]test any learners, set by _invalidate and # cleared by __update self.__needupdate = False self.__task = None # type: Optional[Task] self.__executor = ThreadExecutor() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView(wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) header.setContextMenuPolicy(Qt.CustomContextMenu) header.customContextMenuRequested.connect(self.show_column_chooser) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestLearners.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestLearners.FeatureFold and not enabled: self.resampling = OWTestLearners.KFold @Inputs.learner def set_learner(self, learner, key): """ Set the input `learner` for `key`. Parameters ---------- learner : Optional[Orange.base.Learner] key : Any """ if key in self.learners and learner is None: # Removed self._invalidate([key]) del self.learners[key] else: self.learners[key] = InputLearner(learner, None, None) self._invalidate([key]) @Inputs.train_data def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() self.Error.class_required.clear() self.Error.too_many_classes.clear() self.Error.no_class_values.clear() self.Error.only_one_class_var_value.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data: conds = [ not data.domain.class_vars, len(data.domain.class_vars) > 1, np.isnan(data.Y).all(), data.domain.has_discrete_class and len(data.domain.class_var.values) == 1 ] errors = [ self.Error.class_required, self.Error.too_many_classes, self.Error.no_class_values, self.Error.only_one_class_var_value ] for cond, error in zip(conds, errors): if cond: error() data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_scorers() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestLearners.FeatureFold self._invalidate() @Inputs.test_data def set_test_data(self, data): # type: (Orange.data.Table) -> None """ Set the input separate testing dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not len(data): self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required_test() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] # List of scorers shouldn't be retrieved globally, when the module is # loading since add-ons could have registered additional scorers. # It could have been cached but # - we don't gain much with it # - it complicates the unit tests def _update_scorers(self): if self.data is None or self.data.domain.class_var is None: self.scorers = [] return class_var = self.data and self.data.domain.class_var order = { name: i for i, name in enumerate(self.BUILTIN_ORDER[type(class_var)]) } # 'abstract' is retrieved from __dict__ to avoid inheriting usable = (cls for cls in scoring.Score.registry.values() if cls.is_scalar and not cls.__dict__.get("abstract") and isinstance(class_var, cls.class_types)) self.scorers = sorted(usable, key=lambda cls: order.get(cls.name, 99)) @Inputs.preprocessor def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self._update_header() self._update_stats_model() if self.__needupdate: self.__update() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestLearners.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() self.__update() def _update_header(self): # Set the correct horizontal header labels on the results_model. model = self.result_model model.setColumnCount(1 + len(self.scorers)) for col, score in enumerate(self.scorers): item = QStandardItem(score.name) item.setToolTip(score.long_name) model.setHorizontalHeaderItem(col + 1, item) self._update_shown_columns() def _update_shown_columns(self): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test model = self.result_model header = self.view.horizontalHeader() for section in range(1, model.columnCount()): col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole) header.setSectionHidden(section, col_name not in self.shown_scores) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) # Cell variable is used immediatelly, it's not stored # pylint: disable=cell-var-from-loop stats = [ Try(scorer_caller(scorer, ovr_results, target=1)) for scorer in self.scorers ] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): self.fold_feature_selected = \ self.resampling == OWTestLearners.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.__needupdate = True def show_column_chooser(self, pos): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test def update(col_name, checked): if checked: self.shown_scores.add(col_name) else: self.shown_scores.remove(col_name) self._update_shown_columns() menu = QMenu() model = self.result_model header = self.view.horizontalHeader() for section in range(1, model.columnCount()): col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole) action = menu.addAction(col_name) action.setCheckable(True) action.setChecked(col_name in self.shown_scores) action.triggered.connect(partial(update, col_name)) menu.exec(header.mapToGlobal(pos)) def commit(self): """ Commit the results to output. """ self.Error.memory_error.clear() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] combined = None predictions = None if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities try: predictions = combined.get_augmented_data( combined.learner_names) except MemoryError: self.Error.memory_error() self.Outputs.evaluations_results.send(combined) self.Outputs.predictions.send(predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if settings_["resampling"] > 0: settings_["resampling"] += 1 if version < 3: # Older version used an incompatible context handler settings_["context_settings"] = [ c for c in settings_.get("context_settings", ()) if not hasattr(c, 'classes') ] @Slot(float) def setProgressValue(self, value): self.progressBarSet(value, processEvents=False) def __update(self): self.__needupdate = False assert self.__task is None or self.__state == State.Running if self.__state == State.Running: self.cancel() self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() # check preconditions and return early if self.data is None: self.__state = State.Waiting self.commit() return if not self.learners: self.__state = State.Waiting self.commit() return if self.resampling == OWTestLearners.KFold and \ len(self.data) < self.NFolds[self.n_folds]: self.Error.too_many_folds() self.__state = State.Waiting self.commit() return elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() self.__state = State.Waiting self.commit() return elif self.test_data.domain.class_var != self.data.domain.class_var: self.Error.class_inconsistent() self.__state = State.Waiting self.commit() return elif self.test_data is not None: self.Warning.test_data_unused() rstate = 42 common_args = dict( store_data=True, preprocessor=self.preprocessor, ) # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] # deepcopy all learners as they are not thread safe (by virtue of # the base API). These will be the effective learner objects tested # but will be replaced with the originals on return (see restore # learners bellow) learners_c = [copy.deepcopy(learner) for learner in learners] if self.resampling == OWTestLearners.KFold: folds = self.NFolds[self.n_folds] test_f = partial(Orange.evaluation.CrossValidation, self.data, learners_c, k=folds, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.FeatureFold: test_f = partial(Orange.evaluation.CrossValidationFeature, self.data, learners_c, self.fold_feature, **common_args) elif self.resampling == OWTestLearners.LeaveOneOut: test_f = partial(Orange.evaluation.LeaveOneOut, self.data, learners_c, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 test_f = partial(Orange.evaluation.ShuffleSplit, self.data, learners_c, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: test_f = partial(Orange.evaluation.TestOnTrainingData, self.data, learners_c, **common_args) elif self.resampling == OWTestLearners.TestOnTest: test_f = partial(Orange.evaluation.TestOnTestData, self.data, self.test_data, learners_c, **common_args) else: assert False, "self.resampling %s" % self.resampling def replace_learners(evalfunc, *args, **kwargs): res = evalfunc(*args, **kwargs) assert all(lc is lo for lc, lo in zip(learners_c, res.learners)) res.learners[:] = learners return res test_f = partial(replace_learners, test_f) self.__submit(test_f) def __submit(self, testfunc): # type: (Callable[[Callable[float]], Results]) -> None """ Submit a testing function for evaluation MUST not be called if an evaluation is already pending/running. Cancel the existing task first. Parameters ---------- testfunc : Callable[[Callable[float]], Results]) Must be a callable taking a single `callback` argument and returning a Results instance """ assert self.__state != State.Running # Setup the task task = Task() def progress_callback(finished): if task.cancelled: raise UserInterrupt() QMetaObject.invokeMethod(self, "setProgressValue", Qt.QueuedConnection, Q_ARG(float, 100 * finished)) def ondone(_): QMetaObject.invokeMethod(self, "__task_complete", Qt.QueuedConnection, Q_ARG(object, task)) testfunc = partial(testfunc, callback=progress_callback) task.future = self.__executor.submit(testfunc) task.future.add_done_callback(ondone) self.progressBarInit(processEvents=None) self.setBlocking(True) self.setStatusMessage("Running") self.__state = State.Running self.__task = task @Slot(object) def __task_complete(self, task): # handle a completed task assert self.thread() is QThread.currentThread() if self.__task is not task: assert task.cancelled log.debug("Reaping cancelled task: %r", "<>") return self.setBlocking(False) self.progressBarFinished(processEvents=None) self.setStatusMessage("") result = task.future assert result.done() self.__task = None try: results = result.result() # type: Results learners = results.learners # type: List[Learner] except Exception as er: log.exception("testing error (in __task_complete):", exc_info=True) self.error("\n".join(traceback.format_exception_only(type(er), er))) self.__state = State.Done return self.__state = State.Done learner_key = { slot.learner: key for key, slot in self.learners.items() } assert all(learner in learner_key for learner in learners) # Update the results for individual learners class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(self.scorers) result = Try.Fail(ex) else: stats = [ Try(scorer_caller(scorer, result)) for scorer in self.scorers ] result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self._update_header() self._update_stats_model() self.commit() def cancel(self): """ Cancel the current/pending evaluation (if any). """ if self.__task is not None: assert self.__state == State.Running self.__state = State.Cancelled task, self.__task = self.__task, None task.cancel() assert task.future.done() def onDeleteWidget(self): self.cancel() super().onDeleteWidget()
class OWItemsets(widget.OWWidget): name = 'Frequent Itemsets' description = 'Explore sets of items that frequently appear together.' icon = 'icons/FrequentItemsets.svg' priority = 10 class Inputs: data = Input("Data", Table) class Outputs: matching_data = Output("Matching Data", Table) class Error(widget.OWWidget.Error): need_discrete_data = widget.Msg( "Need some discrete data to work with.") no_disc_features = widget.Msg( "Discrete features required but data has none.") class Warning(widget.OWWidget.Warning): cont_attrs = widget.Msg( "Data has continuous attributes which will be skipped.") err_reg_expression = widget.Msg("Error in regular expression: {}") minSupport = settings.Setting(30) maxItemsets = settings.Setting(10000) filterSearch = settings.Setting(True) autoFind = settings.Setting(False) autoSend = settings.Setting(True) filterKeywords = settings.Setting('') filterMinItems = settings.Setting(1) filterMaxItems = settings.Setting(10000) UserAdviceMessages = [ widget.Message( 'Itemset are listed in item-sorted order, i.e. ' 'an itemset containing A and B is only listed once, as ' 'A > B (and not also B > A).', 'itemsets-order', widget.Message.Warning), widget.Message( 'To select all the itemsets that are descendants of ' '(include) some item X (i.e. the whole subtree), you ' 'can fold the subtree at that item and then select it.', 'itemsets-order', widget.Message.Information) ] def __init__(self): self.data = None self._is_running = False self.isRegexMatch = lambda x: True self.tree = QTreeWidget(self.mainArea, columnCount=2, allColumnsShowFocus=True, alternatingRowColors=True, selectionMode=QTreeWidget.ExtendedSelection, uniformRowHeights=True) self.tree.setHeaderLabels(["Itemsets", "Support", "%"]) self.tree.header().setStretchLastSection(True) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.mainArea.layout().addWidget(self.tree) box = gui.widgetBox(self.controlArea, "Info") self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = '' gui.label(box, self, "Number of itemsets: %(nItemsets)s") gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s") gui.label(box, self, "Selected examples: %(nSelectedExamples)s") hbox = gui.widgetBox(box, orientation='horizontal') gui.button(hbox, self, "Expand all", callback=self.tree.expandAll) gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll) box = gui.widgetBox(self.controlArea, 'Find itemsets') gui.valueSlider(box, self, 'minSupport', values=[.0001, .0005, .001, .005, .01, .05, .1, .5] + list(range(1, 101)), label='Minimal support:', labelFormat="%g%%", callback=lambda: self.find_itemsets()) gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000, label='Max. number of itemsets:', labelFormat="%d", callback=lambda: self.find_itemsets()) self.button = gui.auto_commit( box, self, 'autoFind', 'Find Itemsets', commit=self.find_itemsets, callback=lambda: self.autoFind and self.find_itemsets()) box = gui.widgetBox(self.controlArea, 'Filter itemsets') gui.lineEdit(box, self, 'filterKeywords', 'Contains:', callback=self.filter_change, orientation='horizontal', tooltip='A comma or space-separated list of regular ' 'expressions.') hbox = gui.widgetBox(box, orientation='horizontal') gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:', callback=self.filter_change) gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:', callback=self.filter_change) gui.checkBox(box, self, 'filterSearch', label='Apply these filters in search', tooltip='If checked, the itemsets are filtered according ' 'to these filter conditions already in the search ' 'phase. \nIf unchecked, the only filters applied ' 'during search are the ones above, ' 'and the itemsets are \nfiltered afterwards only for ' 'display, i.e. only the matching itemsets are shown.') gui.rubber(hbox) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection') self.filter_change() ITEM_DATA_ROLE = Qt.UserRole + 1 def selectionChanged(self): X = self.X mapping = self.onehot_mapping instances = set() where = np.where def whole_subtree(node): yield node for i in range(node.childCount()): yield from whole_subtree(node.child(i)) def itemset(node): while node: yield node.data(0, self.ITEM_DATA_ROLE) node = node.parent() def selection_ranges(node): n_children = node.childCount() if n_children: yield (self.tree.indexFromItem(node.child(0)), self.tree.indexFromItem(node.child(n_children - 1))) for i in range(n_children): yield from selection_ranges(node.child(i)) nSelectedItemsets = 0 item_selection = QItemSelection() for node in self.tree.selectedItems(): nodes = (node, ) if node.isExpanded() else whole_subtree(node) if not node.isExpanded(): for srange in selection_ranges(node): item_selection.select(*srange) for node in nodes: nSelectedItemsets += 1 cols, vals = zip(*(mapping[i] for i in itemset(node))) if issparse(X): rows = (len(cols) == np.bincount( (X[:, cols] != 0).indices, minlength=X.shape[0])).nonzero()[0] else: rows = where((X[:, cols] == vals).all(axis=1))[0] instances.update(rows) self.tree.itemSelectionChanged.disconnect(self.selectionChanged) self.tree.selectionModel().select( item_selection, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.nSelectedExamples = len(instances) self.nSelectedItemsets = nSelectedItemsets self.output = self.data[sorted(instances)] or None self.commit() def commit(self): self.Outputs.matching_data.send(self.output) def filter_change(self): self.Warning.err_reg_expression.clear() try: isRegexMatch = self.isRegexMatch = re.compile( '|'.join( i.strip() for i in re.split('(,|\s)+', self.filterKeywords.strip()) if i.strip()), re.IGNORECASE).search except Exception as e: self.Warning.err_reg_expression(e.args[0]) isRegexMatch = self.isRegexMatch = lambda x: True def hide(node, depth, has_kw): if not has_kw: has_kw = isRegexMatch(node.text(0)) hidden = ( sum( hide(node.child(i), depth + 1, has_kw) for i in range(node.childCount())) == node.childCount() if node.childCount() else (not has_kw or not self.filterMinItems <= depth <= self.filterMaxItems)) node.setHidden(hidden) return hidden hide(self.tree.invisibleRootItem(), 0, False) class TreeWidgetItem(QTreeWidgetItem): def data(self, column, role): """Construct lazy tooltips""" if role != Qt.ToolTipRole: return super().data(column, role) tooltip = [] while self: tooltip.append(self.text(0)) self = self.parent() return '\n'.join(reversed(tooltip)) def find_itemsets(self): if self.data is None or not len(self.data): return if self._is_running: self._is_running = False return self._is_running = True self.button.button.setText('Cancel') data = self.data self.tree.clear() self.tree.setUpdatesEnabled(False) self.tree.blockSignals(True) class ItemDict(dict): def __init__(self, item): self.item = item top = ItemDict(self.tree.invisibleRootItem()) X, mapping = OneHot.encode(data) self.Error.need_discrete_data.clear() if X is None: self.Error.need_discrete_data() self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping) } nItemsets = 0 filterSearch = self.filterSearch filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems isRegexMatch = self.isRegexMatch # Find itemsets and populate the TreeView with self.progressBar(self.maxItemsets + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): if filterSearch and not filterMinItems <= len( itemset) <= filterMaxItems: continue parent = top first_new_item = None itemset_matches_filter = False for item in sorted(itemset): name = names[item] if filterSearch and not itemset_matches_filter: itemset_matches_filter = isRegexMatch(name) child = parent.get(name) if child is None: try: wi = self.TreeWidgetItem(parent.item, [ name, str(support), '{:.4g}'.format( 100 * support / len(data)) ]) except RuntimeError: # FIXME: When autoFind was in effect and the support # slider was moved, this line excepted with: # RuntimeError: wrapped C/C++ object of type # TreeWidgetItem has been deleted return wi.setData(0, self.ITEM_DATA_ROLE, item) child = parent[name] = ItemDict(wi) if first_new_item is None: first_new_item = (parent, name) parent = child if filterSearch and not itemset_matches_filter: parent, name = first_new_item parent.item.removeChild(parent[name].item) del parent[name].item del parent[name] else: nItemsets += 1 progress.advance() if not self._is_running or nItemsets >= self.maxItemsets: break qApp.processEvents() if not filterSearch: self.filter_change() self.nItemsets = nItemsets self.nSelectedItemsets = 0 self.nSelectedExamples = 0 self.tree.expandAll() for i in range(self.tree.columnCount()): self.tree.resizeColumnToContents(i) self.tree.setUpdatesEnabled(True) self.tree.blockSignals(False) self._is_running = False self.button.button.setText('Find Itemsets') @Inputs.data def set_data(self, data): self.data = data is_error = False if data is not None: self.Warning.cont_attrs.clear() self.Error.no_disc_features.clear() self.button.setDisabled(False) self.X = data.X if issparse(data.X): self.X = data.X.tocsc() else: if not data.domain.has_discrete_attributes(): self.Error.no_disc_features() is_error = True self.button.setDisabled(True) elif data.domain.has_continuous_attributes(): self.Warning.cont_attrs() else: self.output = None self.commit() if self.autoFind and not is_error: self.find_itemsets()
class OWImageViewer(widget.OWWidget): name = "Image Viewer" description = "View images referred to in the data." icon = "icons/ImageViewer.svg" priority = 130 replaces = ["Orange.widgets.data.owimageviewer.OWImageViewer", ] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) settingsHandler = settings.DomainContextHandler() imageAttr = settings.ContextSetting(0) titleAttr = settings.ContextSetting(0) imageSize = settings.Setting(100) autoCommit = settings.Setting(True) buttons_area_orientation = Qt.Vertical graph_name = "scene" UserAdviceMessages = [ widget.Message( "Pressing the 'Space' key while the thumbnail view has focus and " "a selected item will open a window with a full image", persistent_id="preview-introduction") ] def __init__(self): super().__init__() self.data = None self.allAttrs = [] self.stringAttrs = [] self.selectedIndices = [] #: List of _ImageItems self.items = [] self._errcount = 0 self._successcount = 0 self.info = gui.widgetLabel( gui.vBox(self.controlArea, "Info"), "Waiting for input.\n" ) self.imageAttrCB = gui.comboBox( self.controlArea, self, "imageAttr", box="Image Filename Attribute", tooltip="Attribute with image filenames", callback=[self.clearScene, self.setupScene], contentsLength=12, addSpace=True, ) self.titleAttrCB = gui.comboBox( self.controlArea, self, "titleAttr", box="Title Attribute", tooltip="Attribute with image title", callback=self.updateTitles, contentsLength=12, addSpace=True ) gui.hSlider( self.controlArea, self, "imageSize", box="Image Size", minValue=32, maxValue=1024, step=16, callback=self.updateSize, createLabel=False ) gui.rubber(self.controlArea) gui.auto_commit(self.buttonsArea, self, "autoCommit", "Send", box=False) self.thumbnailView = ThumbnailView( alignment=Qt.AlignTop | Qt.AlignLeft, # scene alignment, focusPolicy=Qt.StrongFocus, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn ) self.mainArea.layout().addWidget(self.thumbnailView) self.scene = self.thumbnailView.scene() self.scene.selectionChanged.connect(self.onSelectionChanged) self.loader = ImageLoader(self) def sizeHint(self): return QSize(800, 600) @Inputs.data def setData(self, data): self.closeContext() self.clear() self.data = data if data is not None: domain = data.domain self.allAttrs = (domain.class_vars + domain.metas + domain.attributes) self.stringAttrs = [a for a in domain.metas if a.is_string] self.stringAttrs = sorted( self.stringAttrs, key=lambda attr: 0 if "type" in attr.attributes else 1 ) indices = [i for i, var in enumerate(self.stringAttrs) if var.attributes.get("type") == "image"] if indices: self.imageAttr = indices[0] self.imageAttrCB.setModel(VariableListModel(self.stringAttrs)) self.titleAttrCB.setModel(VariableListModel(self.allAttrs)) self.openContext(data) self.imageAttr = max(min(self.imageAttr, len(self.stringAttrs) - 1), 0) self.titleAttr = max(min(self.titleAttr, len(self.allAttrs) - 1), 0) if self.stringAttrs: self.setupScene() else: self.info.setText("Waiting for input.\n") def clear(self): self.data = None self.error() self.imageAttrCB.clear() self.titleAttrCB.clear() self.clearScene() def setupScene(self): self.error() if self.data: attr = self.stringAttrs[self.imageAttr] titleAttr = self.allAttrs[self.titleAttr] assert self.thumbnailView.count() == 0 size = QSizeF(self.imageSize, self.imageSize) for i, inst in enumerate(self.data): if not numpy.isfinite(inst[attr]): # skip missing continue url = self.urlFromValue(inst[attr]) title = str(inst[titleAttr]) thumbnail = GraphicsThumbnailWidget(QPixmap(), title=title) thumbnail.setThumbnailSize(size) thumbnail.setToolTip(url.toString()) thumbnail.instance = inst self.thumbnailView.addThumbnail(thumbnail) if url.isValid() and url.isLocalFile(): reader = QImageReader(url.toLocalFile()) image = reader.read() if image.isNull(): error = reader.errorString() thumbnail.setToolTip( thumbnail.toolTip() + "\n" + error) self._errcount += 1 else: pixmap = QPixmap.fromImage(image) thumbnail.setPixmap(pixmap) self._successcount += 1 future = Future() future.set_result(image) future._reply = None elif url.isValid(): future = self.loader.get(url) @future.add_done_callback def set_pixmap(future, thumb=thumbnail): if future.cancelled(): return assert future.done() if future.exception(): # Should be some generic error image. pixmap = QPixmap() thumb.setToolTip(thumb.toolTip() + "\n" + str(future.exception())) else: pixmap = QPixmap.fromImage(future.result()) thumb.setPixmap(pixmap) self._noteCompleted(future) else: future = None self.items.append(_ImageItem(i, thumbnail, url, future)) if any(it.future is not None and not it.future.done() for it in self.items): self.info.setText("Retrieving...\n") else: self._updateStatus() def urlFromValue(self, value): variable = value.variable origin = variable.attributes.get("origin", "") if origin and QDir(origin).exists(): origin = QUrl.fromLocalFile(origin) elif origin: origin = QUrl(origin) if not origin.scheme(): origin.setScheme("file") else: origin = QUrl("") base = origin.path() if base.strip() and not base.endswith("/"): origin.setPath(base + "/") if os.path.exists(str(value)): url = QUrl.fromLocalFile(str(value)) else: name = QUrl(str(value)) url = origin.resolved(name) if not url.scheme(): url.setScheme("file") return url def _cancelAllFutures(self): for item in self.items: if item.future is not None: item.future.cancel() if item.future._reply is not None: item.future._reply.close() item.future._reply.deleteLater() item.future._reply = None def clearScene(self): self._cancelAllFutures() self.items = [] self.thumbnailView.clear() self._errcount = 0 self._successcount = 0 def thumbnailItems(self): return [item.widget for item in self.items] def updateSize(self): size = QSizeF(self.imageSize, self.imageSize) for item in self.thumbnailItems(): item.setThumbnailSize(size) def updateTitles(self): titleAttr = self.allAttrs[self.titleAttr] for item in self.items: item.widget.setTitle(str(item.widget.instance[titleAttr])) def onSelectionChanged(self): selected = [item for item in self.items if item.widget.isSelected()] self.selectedIndices = [item.index for item in selected] self.commit() def commit(self): if self.data: if self.selectedIndices: selected = self.data[self.selectedIndices] else: selected = None self.Outputs.data.send(selected) else: self.Outputs.data.send(None) def _noteCompleted(self, future): # Note the completed future's state if future.cancelled(): return if future.exception(): self._errcount += 1 _log.debug("Error: %r", future.exception()) else: self._successcount += 1 self._updateStatus() def _updateStatus(self): count = len([item for item in self.items if item.future is not None]) self.info.setText( "Retrieving:\n" + "{} of {} images".format(self._successcount, count)) if self._errcount + self._successcount == count: if self._errcount: self.info.setText( "Done:\n" + "{} images, {} errors".format(count, self._errcount) ) else: self.info.setText( "Done:\n" + "{} images".format(count) ) attr = self.stringAttrs[self.imageAttr] if self._errcount == count and "type" not in attr.attributes: self.error("No images found! Make sure the '%s' attribute " "is tagged with 'type=image'" % attr.name) def onDeleteWidget(self): self._cancelAllFutures() self.clear()
class OWFile(widget.OWWidget, RecentPathsWComboMixin): name = "文件(File)" id = "orange.widgets.data.file" description = "从输入文件或网络读取数据并将数据表发送到输出。" icon = "icons/File.svg" priority = 10 category = "Data" keywords = ["file", "load", "read", "open"] class Outputs: data = Output("数据(Data)", Table, doc="Attribute-valued dataset read from the input file.", replaces=['Data']) want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] SIZE_LIMIT = 1e7 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "iris.tab"), RecentPath("", "sample-datasets", "titanic.tab"), RecentPath("", "sample-datasets", "housing.tab"), RecentPath("", "sample-datasets", "heart_disease.tab"), RecentPath("", "sample-datasets", "brown-selected.tab"), RecentPath("", "sample-datasets", "zoo.tab"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") performance_warning = widget.Msg( "Categorical variables with >100 values may decrease performance.") class Error(widget.OWWidget.Error): file_not_found = widget.Msg("File not found.") missing_reader = widget.Msg("Missing reader.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") class NoFileSelected: pass UserAdviceMessages = [ widget.Message( "Use CSV File Import widget for advanced options " "for comma-separated files", "use-csv-file-import"), widget.Message( "This widget loads only tabular data. Use other widgets to load " "other data types like models, distance matrices and networks.", "other-data-types") ] def __init__(self): super().__init__() RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "文件:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "重新加载", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox( None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True, ) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) box = gui.vBox(self.controlArea, "信息") self.infolabel = gui.widgetLabel(box, '未加载数据。') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "列(双击编辑)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "浏览文档数据集", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) gui.button(box, self, "重置", callback=self.reset_domain_edit) self.apply_button = gui.button(box, self, "应用", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return QTimer.singleShot(0, self.load_data) @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information(None, "文件", "无法找到文件") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("无数据。") def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() assert self.reader is not None except Exception: return self.Error.missing_reader if self.reader is self.NoFileSelected: self.Outputs.data.send(None) return None try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _get_reader(self) -> FileFormat: if self.source == self.LOCAL_FILE: path = self.last_path() if path is None: return self.NoFileSelected if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = FileFormat.get_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) return reader else: url = self.url_combo.currentText().strip() if url: return UrlReader(url) else: return self.NoFileSelected def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% 个缺失值)" else: return "(无缺失值)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} 条数据" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>特征数目: {len(domain.attributes)} {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>回归; 数值类 {missing_in_class}" elif domain.has_discrete_class: text += "<br/>分类: 分类种类共 " \ f"{len(domain.class_var.values)} 个 {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>元属性: { len(domain.metas)}" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def _inspect_discrete_variables(self, domain): for var in chain(domain.variables, domain.metas): if var.is_discrete and len(var.values) > 100: self.Warning.performance_warning() def apply_domain_edit(self): self.Warning.performance_warning.clear() if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue)
class OWTilefile(widget.OWWidget, RecentPathsWComboMixin): name = "Tile File" id = "orangecontrib.spectroscopy.widgets.tilefile" icon = "icons/tilefile.svg" description = "Read data tile-by-tile from input files, " \ "preprocess, and send a data table to the output." priority = 10000 replaces = ["orangecontrib.protospec.widgets.owtilefile.OWTilefile"] class Inputs: preprocessor = Input("Preprocessor", Preprocess) class Outputs: data = Output("Data", Table, doc="Preprocessed dataset read from the input files.") want_main_area = False SEARCH_PATHS = [("sample-datasets", get_sample_datasets_dir())] # Always trigger size limit warning (never auto-load) SIZE_LIMIT = 0 LOCAL_FILE, URL = range(2) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) # pylint seems to want declarations separated from definitions recent_paths: List[RecentPath] recent_urls: List[str] variables: list # Overload RecentPathsWidgetMixin.recent_paths to set defaults recent_paths = Setting([ RecentPath("", "sample-datasets", "agilent/5_mosaic_agg1024.dmt"), ]) recent_urls = Setting([]) source = Setting(LOCAL_FILE) xls_sheet = ContextSetting("") sheet_names = Setting({}) url = Setting("") variables = ContextSetting([]) domain_editor = SettingProvider(DomainEditor) class Warning(widget.OWWidget.Warning): no_preprocessor = Msg("No preprocessor on input." " Press Reload to load anyway.") file_too_big = widget.Msg( "The file is too large to load automatically." " Press Reload to load.") load_warning = widget.Msg("Read warning:\n{}") performance_warning = widget.Msg( "Categorical variables with >100 values may decrease performance.") class Error(widget.OWWidget.Error): missing_reader = Msg("No tile-by-tile reader for this file.") file_not_found = widget.Msg("File not found.") sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") class NoFileSelected: pass UserAdviceMessages = [ widget.Message( "Connect a Preprocessor " "which results in data-reduction ", "to best make use of this widget."), ] def __init__(self): self.preprocessor = None super().__init__() ### owfile init code-copy ### RecentPathsWComboMixin.__init__(self) self.domain = None self.data = None self.loaded_file = "" self.reader = None layout = QGridLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) vbox = gui.radioButtons(None, self, "source", box=True, addSpace=True, callback=self.load_data, addToLayout=False) rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.file_combo.activated[int].connect(self.select_file) box.layout().addWidget(self.file_combo) layout.addWidget(box, 0, 1) file_button = gui.button(None, self, '...', callback=self.browse_file, autoDefault=False) file_button.setIcon(self.style().standardIcon(QStyle.SP_DirOpenIcon)) file_button.setSizePolicy(Policy.Maximum, Policy.Fixed) layout.addWidget(file_button, 0, 2) reload_button = gui.button(None, self, "Reload", callback=self.load_data, autoDefault=False) reload_button.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) reload_button.setSizePolicy(Policy.Fixed, Policy.Fixed) layout.addWidget(reload_button, 0, 3) self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) self.sheet_combo = gui.comboBox( None, self, "xls_sheet", callback=self.select_sheet, sendSelectedValue=True, ) self.sheet_combo.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_label = QLabel() self.sheet_label.setText('Sheet') self.sheet_label.setSizePolicy(Policy.MinimumExpanding, Policy.Fixed) self.sheet_box.layout().addWidget(self.sheet_label, Qt.AlignLeft) self.sheet_box.layout().addWidget(self.sheet_combo, Qt.AlignVCenter) layout.addWidget(self.sheet_box, 2, 1) self.sheet_box.hide() rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = NamedURLModel(self.sheet_names) url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(Policy.Ignored, Policy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 3, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel(box, 'No data loaded.') self.warnings = gui.widgetLabel(box, '') box = gui.widgetBox(self.controlArea, "Columns (Double click to edit)") self.domain_editor = DomainEditor(self) self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) box = gui.hBox(self.controlArea) gui.button(box, self, "Browse documentation datasets", callback=lambda: self.browse_file(True), autoDefault=False) gui.rubber(box) gui.button(box, self, "Reset", callback=self.reset_domain_edit) self.apply_button = gui.button(box, self, "Apply", callback=self.apply_domain_edit) self.apply_button.setEnabled(False) self.apply_button.setFixedWidth(170) self.editor_model.dataChanged.connect( lambda: self.apply_button.setEnabled(True)) self.set_file_list() # Must not call open_file from within __init__. open_file # explicitly re-enters the event loop (by a progress bar) self.setAcceptDrops(True) ### End code copy ### box = gui.vBox(self.controlArea, "Preprocessor") self.info_preproc = gui.widgetLabel(box, 'No preprocessor on input.') self.Warning.file_too_big() ### owfile methods code-copy ### @staticmethod def sizeHint(): return QSize(600, 550) def select_file(self, n): assert n < len(self.recent_paths) super().select_file(n) if self.recent_paths: self.source = self.LOCAL_FILE self.load_data() self.set_file_list() def select_sheet(self): self.recent_paths[0].sheet = self.sheet_combo.currentText() self.load_data() def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = 'http://' + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.load_data() # Open a file, create data from it and send it over the data channel def load_data(self): # We need to catch any exception type since anything can happen in # file readers self.closeContext() self.domain_editor.set_domain(None) self.apply_button.setEnabled(False) self.clear_messages() self.set_file_list() error = self._try_load() if error: error() self.data = None self.sheet_box.hide() self.Outputs.data.send(None) self.infolabel.setText("No data.") def _try_load(self): # pylint: disable=broad-except if self.last_path() and not os.path.exists(self.last_path()): return self.Error.file_not_found try: self.reader = self._get_reader() assert self.reader is not None except Exception: return self.Error.missing_reader if self.reader is self.NoFileSelected: self.Outputs.data.send(None) return None try: self._update_sheet_combo() except Exception: return self.Error.sheet_error with catch_warnings(record=True) as warnings: try: data = self.reader.read() except Exception as ex: log.exception(ex) return lambda x=ex: self.Error.unknown(str(x)) if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) self.infolabel.setText(self._describe(data)) self.loaded_file = self.last_path() add_origin(data, self.loaded_file) self.data = data self.openContext(data.domain) self.apply_domain_edit() # sends data return None def _update_sheet_combo(self): if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None) return self.sheet_combo.clear() self.sheet_combo.addItems(self.reader.sheets) self._select_active_sheet() self.sheet_box.show() def _select_active_sheet(self): if self.reader.sheet: try: idx = self.reader.sheets.index(self.reader.sheet) self.sheet_combo.setCurrentIndex(idx) except ValueError: # Requested sheet does not exist in this file self.reader.select_sheet(None) else: self.sheet_combo.setCurrentIndex(0) @staticmethod def _describe(table): def missing_prop(prop): if prop: return f"({prop * 100:.1f}% missing values)" else: return "(no missing values)" domain = table.domain text = "" attrs = getattr(table, "attributes", {}) descs = [ attrs[desc] for desc in ("Name", "Description") if desc in attrs ] if len(descs) == 2: descs[0] = f"<b>{descs[0]}</b>" if descs: text += f"<p>{'<br/>'.join(descs)}</p>" text += f"<p>{len(table)} instance(s)" missing_in_attr = missing_prop(table.has_missing_attribute() and table.get_nan_frequency_attribute()) missing_in_class = missing_prop(table.has_missing_class() and table.get_nan_frequency_class()) text += f"<br/>{len(domain.attributes)} feature(s) {missing_in_attr}" if domain.has_continuous_class: text += f"<br/>Regression; numerical class {missing_in_class}" elif domain.has_discrete_class: text += "<br/>Classification; categorical class " \ f"with {len(domain.class_var.values)} values {missing_in_class}" elif table.domain.class_vars: text += "<br/>Multi-target; " \ f"{len(table.domain.class_vars)} target variables " \ f"{missing_in_class}" else: text += "<br/>Data has no target variable." text += f"<br/>{len(domain.metas)} meta attribute(s)" text += "</p>" if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"<p>First entry: {table[0, 'Timestamp']}<br/>" \ f"Last entry: {table[-1, 'Timestamp']}</p>" return text def storeSpecificSettings(self): self.current_context.modified_variables = self.variables[:] def retrieveSpecificSettings(self): if hasattr(self.current_context, "modified_variables"): self.variables[:] = self.current_context.modified_variables def reset_domain_edit(self): self.domain_editor.reset_domain() self.apply_domain_edit() def _inspect_discrete_variables(self, domain): for var in chain(domain.variables, domain.metas): if var.is_discrete and len(var.values) > 100: self.Warning.performance_warning() def apply_domain_edit(self): self.Warning.performance_warning.clear() if self.data is None: table = None else: domain, cols = self.domain_editor.get_domain( self.data.domain, self.data) if not (domain.variables or domain.metas): table = None elif domain is self.data.domain: table = self.data else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, self.data.W) table.name = self.data.name table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) self.Outputs.data.send(table) self.apply_button.setEnabled(False) def get_widget_name_extension(self): _, name = os.path.split(self.loaded_file) return os.path.splitext(name)[0] def send_report(self): def get_ext_name(filename): try: return FileFormat.names[os.path.splitext(filename)[1]] except KeyError: return "unknown" if self.data is None: self.report_paragraph("File", "No file.") return if self.source == self.LOCAL_FILE: home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ name = "~" + os.path.sep + \ self.loaded_file[len(home):].lstrip("/").lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): name += f" ({self.sheet_combo.currentText()})" self.report_items("File", [("File name", name), ("Format", get_ext_name(name))]) else: self.report_items("Data", [("Resource", self.url), ("Format", get_ext_name(self.url))]) self.report_data("Data", self.data) @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" urls = event.mimeData().urls() if urls: try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() except IOError: pass def dropEvent(self, event): """Handle file drops""" urls = event.mimeData().urls() if urls: self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() def workflowEnvChanged(self, key, value, oldvalue): """ Function called when environment changes (e.g. while saving the scheme) It make sure that all environment connected values are modified (e.g. relative file paths are changed) """ self.update_file_list(key, value, oldvalue) #### End code copy #### @staticmethod def _is_preproc(p): """ Tests that a preprocessor is not None or empty PreprocessorList """ return not (p is None or (isinstance(p, PreprocessorList) and len(p.preprocessors) == 0)) @staticmethod def _format_preproc_str(p): pstring = str() if isinstance(p, PreprocessorList): for preproc in p.preprocessors: pstring += "\n{0}".format(preproc) else: pstring = str(p) return pstring @Inputs.preprocessor def update_preprocessor(self, preproc): self.Warning.no_preprocessor.clear() if not self._is_preproc(preproc): self.info_preproc.setText("No preprocessor on input.") self.Warning.no_preprocessor() elif self.preprocessor is not preproc: self.info_preproc.setText("New preprocessor, reload file to use." + self._format_preproc_str(preproc)) self.preprocessor = preproc def browse_file(self, in_demos=False): if in_demos: start_file = get_sample_datasets_dir() if not os.path.exists(start_file): QMessageBox.information( None, "File", "Cannot find the directory with documentation datasets") return else: start_file = self.last_path() or os.path.expanduser("~/") readers = [ f for f in FileFormat.formats if getattr(f, 'read_tile', None) and getattr(f, "EXTENSIONS", None) ] filename, reader, _ = open_filename_dialog(start_file, None, readers) if not filename: return self.add_path(filename) if reader is not None: self.recent_paths[0].file_format = reader.qualified_name() self.source = self.LOCAL_FILE if not self._is_preproc(self.preprocessor): return self.Warning.no_preprocessor() self.load_data() @classmethod def get_tile_reader(cls, filename): """Return reader instance that can be used to read a file tile-wise Parameters ---------- filename : str Returns ------- FileFormat """ readers = [ f for f in FileFormat.formats if getattr(f, 'read_tile', None) and getattr(f, "EXTENSIONS", None) ] for reader in readers: if os.path.splitext(filename)[1] in reader.EXTENSIONS: return reader(filename) raise IOError('No readers for file "{}"'.format(filename)) def _get_reader(self): """ Returns ------- FileFormat """ if self.source == self.LOCAL_FILE: path = self.last_path() if self.recent_paths and self.recent_paths[0].file_format: qname = self.recent_paths[0].file_format reader_class = class_from_qualified_name(qname) reader = reader_class(path) else: reader = self.get_tile_reader(path) if self.recent_paths and self.recent_paths[0].sheet: reader.select_sheet(self.recent_paths[0].sheet) # set preprocessor here if hasattr(reader, "read_tile"): reader.set_preprocessor(self.preprocessor) if self.preprocessor is not None: self.info_preproc.setText( self._format_preproc_str( self.preprocessor).lstrip("\n")) else: # only allow readers with tile-by-tile support to run. reader = None return reader elif self.source == self.URL: url = self.url_combo.currentText().strip() if url: return UrlReader(url)
class OWMovingTransform(widget.OWWidget): name = 'Moving Transform' description = 'Apply rolling window functions to the time series.' icon = 'icons/MovingTransform.svg' priority = 20 inputs = [("Time series", Table, 'set_data')] outputs = [("Time series", Timeseries)] want_main_area = False non_overlapping = settings.Setting(False) fixed_wlen = settings.Setting(5) transformations = settings.Setting([]) autocommit = settings.Setting(False) last_win_width = settings.Setting(5) _NON_OVERLAPPING_WINDOWS = 'Non-overlapping windows' UserAdviceMessages = [ widget.Message( 'Get the simple moving average (SMA) of a series ' 'by setting the aggregation function to "{}".'.format(Mean), 'sma-is-mean'), widget.Message( 'If "{}" is checked, the rolling windows don\t ' 'overlap. Instead, they run through the series ' 'side-to-side, so the resulting transformed series is ' 'fixed-window-length-times shorter.'.format( _NON_OVERLAPPING_WINDOWS), 'non-overlapping') ] def __init__(self): self.data = None box = gui.vBox(self.controlArea, 'Moving Transform') def _disable_fixed_wlen(): fixed_wlen.setDisabled(not self.non_overlapping) self.view.repaint() self.on_changed() gui.checkBox(box, self, 'non_overlapping', label=self._NON_OVERLAPPING_WINDOWS, callback=_disable_fixed_wlen, tooltip='If this is checked, instead of rolling windows ' 'through the series, they are applied side-to-side, ' 'so the resulting output series will be some ' 'length-of-fixed-window-times shorter.') fixed_wlen = gui.spin(box, self, 'fixed_wlen', 2, 1000, label='Fixed window width:', callback=self.on_changed) fixed_wlen.setDisabled(not self.non_overlapping) # TODO: allow the user to choose left-aligned, right-aligned, or center-aligned window class TableView(gui.TableView): def __init__(self, parent): super().__init__( parent, editTriggers=(self.SelectedClicked | self.CurrentChanged | self.DoubleClicked | self.EditKeyPressed), ) self.horizontalHeader().setStretchLastSection(False) agg_functions = ListModel(AGG_FUNCTIONS + [Cumulative_sum, Cumulative_product], parent=self) self.setItemDelegateForColumn(0, self.VariableDelegate(parent)) self.setItemDelegateForColumn(1, self.SpinDelegate(parent)) self.setItemDelegateForColumn( 2, self.ComboDelegate(self, agg_functions)) class _ItemDelegate(QStyledItemDelegate): def updateEditorGeometry(self, widget, option, _index): widget.setGeometry(option.rect) class ComboDelegate(_ItemDelegate): def __init__(self, parent=None, combo_model=None): super().__init__(parent) self._parent = parent if combo_model is not None: self._combo_model = combo_model def createEditor(self, parent, _QStyleOptionViewItem, index): combo = QComboBox(parent) combo.setModel(self._combo_model) return combo def setEditorData(self, combo, index): var = index.model().data(index, Qt.EditRole) combo.setCurrentIndex(self._combo_model.indexOf(var)) def setModelData(self, combo, model, index): var = self._combo_model[combo.currentIndex()] model.setData(index, var, Qt.EditRole) class VariableDelegate(ComboDelegate): @property def _combo_model(self): return self._parent.var_model class SpinDelegate(_ItemDelegate): def paint(self, painter, option, index): # Don't paint window length if non-overlapping windows set if not self.parent().non_overlapping: super().paint(painter, option, index) def createEditor(self, parent, _QStyleOptionViewItem, _index): # Don't edit window length if non-overlapping windows set if self.parent().non_overlapping: return None spin = QSpinBox(parent, minimum=1, maximum=1000) return spin def setEditorData(self, spin, index): spin.setValue(index.model().data(index, Qt.EditRole)) def setModelData(self, spin, model, index): spin.interpretText() model.setData(index, spin.value(), Qt.EditRole) self.var_model = VariableListModel(parent=self) self.table_model = model = PyTableModel(self.transformations, parent=self, editable=True) model.setHorizontalHeaderLabels( ['Series', 'Window width', 'Aggregation function']) model.dataChanged.connect(self.on_changed) self.view = view = TableView(self) view.setModel(model) box.layout().addWidget(view) hbox = gui.hBox(box) from os.path import dirname, join self.add_button = button = gui.button(hbox, self, 'Add &Transform', callback=self.on_add_transform) button.setIcon( QIcon(join(dirname(__file__), 'icons', 'LineChart-plus.png'))) self.del_button = button = gui.button(hbox, self, '&Delete Selected', callback=self.on_del_transform) QIcon.setThemeName('gnome') # Works for me button.setIcon(QIcon.fromTheme('edit-delete')) gui.auto_commit(box, self, 'autocommit', '&Apply') def sizeHint(self): return QSize(450, 600) def on_add_transform(self): if self.data is not None: self.table_model.append( [self.var_model[0], self.last_win_width, AGG_FUNCTIONS[0]]) self.commit() def on_del_transform(self): for row in sorted( [mi.row() for mi in self.view.selectionModel().selectedRows(0)], reverse=True): del self.table_model[row] if len(self.table_model): selection_model = self.view.selectionModel() selection_model.select( self.table_model.index(len(self.table_model) - 1, 0), selection_model.Select | selection_model.Rows) self.commit() def set_data(self, data): self.data = data = None if data is None else Timeseries.from_data_table( data) self.add_button.setDisabled(not len(getattr(data, 'domain', ()))) self.table_model.clear() if data is not None: self.var_model.wrap([ var for var in data.domain if var.is_continuous and var is not data.time_variable ]) self.on_changed() def on_changed(self): self.commit() def commit(self): data = self.data if not data: self.send(Output.TIMESERIES, None) return ts = moving_transform(data, self.table_model, self.non_overlapping and self.fixed_wlen) self.send(Output.TIMESERIES, ts)
class OWGrangerCausality(widget.OWWidget): name = 'Granger Causality' description = 'Test if one time series Granger-causes (i.e. can be an ' \ 'indicator of) another.' icon = 'icons/GrangerCausality.svg' priority = 190 class Inputs: time_series = Input("Time series", Table, replaces=["Timeseries"]) max_lag = settings.Setting(20) confidence = settings.Setting(95) autocommit = settings.Setting(False) UserAdviceMessages = [ widget.Message('We say <i>X</i> Granger-causes <i>Y</i> if ' 'predictions of values of <i>Y</i> based on its own ' 'past values and on the past values of <i>X</i> are ' 'better than predictions of <i>Y</i> based on its ' 'past values alone.<br><br>' 'It does NOT mean <i>X</i> causes <i>Y</i>!', 'explanation', widget.Message.Warning) ] class Error(widget.OWWidget.Error): unexpected_error = widget.Msg('Unexpected error: {}') def __init__(self): self.data = None box = gui.vBox(self.controlArea, 'Granger Test') gui.hSlider(box, self, 'confidence', minValue=90, maxValue=99, label='Confidence:', labelFormat=" %d%%", callback=self.on_changed) gui.spin(box, self, 'max_lag', 1, 50, label='Max lag:', callback=self.on_changed) gui.auto_commit(box, self, 'autocommit', '&Test') gui.rubber(self.controlArea) self.model = model = PyTableModel(parent=self) model.setHorizontalHeaderLabels(['Min. lag', 'Series 1', '', 'Series 2']) view = gui.TableView(self) view.setModel(model) bold = view.BoldFontDelegate(self) view.setItemDelegateForColumn(1, bold) view.setItemDelegateForColumn(3, bold) view.horizontalHeader().setStretchLastSection(False) self.mainArea.layout().addWidget(view) # TODO: output the series with subset columns of selected model rows # TODO: allow setting filters or choosing what variables to include in test def on_changed(self): self.commit() @Inputs.time_series def set_data(self, data): self.data = data = None if data is None else \ Timeseries.from_data_table(data) self.on_changed() def commit(self): data = self.data self.model.clear() self.Error.unexpected_error.clear() if data is None: return try: with self.progressBar() as progress: res = granger_causality(data, self.max_lag, 1 - self.confidence / 100, callback=progress.advance) res = [[lag, row, '→', col] for lag, row, col in res] except (ValueError, LinAlgError) as ex: self.Error.unexpected_error(ex.args[0]) else: self.model.wrap(res) self.model.sort(0, Qt.DescendingOrder)
class OWTestAndScore(OWWidget): name = "Test and Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 keywords = ['Cross Validation', 'CV'] replaces = ["Orange.widgets.evaluate.owtestlearners.OWTestLearners"] class Inputs: train_data = Input("Data", Table, default=True) test_data = Input("Test Data", Table) learner = Input("Learner", Learner, multiple=True) preprocessor = Input("Preprocessor", Preprocess) class Outputs: predictions = Output("Predictions", Table) evaluations_results = Output("Evaluation Results", Results) settings_version = 3 buttons_area_orientation = None UserAdviceMessages = [ widget.Message("Click on the table header to select shown columns", "click_header") ] settingsHandler = settings.PerfectDomainContextHandler() score_table = settings.SettingProvider(ScoreTable) #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(2) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) use_rope = settings.Setting(False) rope = settings.Setting(0.1) comparison_criterion = settings.Setting(0, schema_only=True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): test_data_empty = Msg("Test dataset is empty.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train datasets " "have different target variables.") memory_error = Msg("Not enough memory.") test_data_incompatible = Msg( "Test data may be incompatible with train data.") train_data_error = Msg("{}") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") cant_stratify = \ Msg("Can't run stratified {}-fold cross validation; " "the least common class has only {} instances.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") test_data_transformed = Msg( "Test data has been transformed to match the train data.") cant_stratify_numeric = Msg("Stratification is ignored for regression") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False self.scorers = [] self.__pending_comparison_criterion = self.comparison_criterion #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() # type: Dict[Any, Input] self.__state = State.Waiting # Do we need to [re]test any learners, set by _invalidate and # cleared by __update self.__needupdate = False self.__task = None # type: Optional[TaskState] self.__executor = ThreadExecutor() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, searchable=True, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, contentsLength=8, searchable=True, callback=self._on_target_class_changed) self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison") gui.comboBox(box, self, "comparison_criterion", callback=self.update_comparison_table) hbox = gui.hBox(box) gui.checkBox(hbox, self, "use_rope", "Negligible difference: ", callback=self._on_use_rope_changed) gui.lineEdit(hbox, self, "rope", validator=QDoubleValidator(), controlWidth=70, callback=self.update_comparison_table, alignment=Qt.AlignRight) self.controls.rope.setEnabled(self.use_rope) gui.rubber(self.controlArea) self.score_table = ScoreTable(self) self.score_table.shownScoresChanged.connect(self.update_stats_model) view = self.score_table.view view.setSizeAdjustPolicy(view.AdjustToContents) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.score_table.view) self.compbox = box = gui.vBox(self.mainArea, box="Model comparison") table = self.comparison_table = QTableWidget( wordWrap=False, editTriggers=QTableWidget.NoEditTriggers, selectionMode=QTableWidget.NoSelection) table.setSizeAdjustPolicy(table.AdjustToContents) header = table.verticalHeader() header.setSectionResizeMode(QHeaderView.Fixed) header.setSectionsClickable(False) header = table.horizontalHeader() header.setTextElideMode(Qt.ElideRight) header.setDefaultAlignment(Qt.AlignCenter) header.setSectionsClickable(False) header.setStretchLastSection(False) header.setSectionResizeMode(QHeaderView.ResizeToContents) avg_width = self.fontMetrics().averageCharWidth() header.setMinimumSectionSize(8 * avg_width) header.setMaximumSectionSize(15 * avg_width) header.setDefaultSectionSize(15 * avg_width) box.layout().addWidget(table) box.layout().addWidget( QLabel( "<small>Table shows probabilities that the score for the model in " "the row is higher than that of the model in the column. " "Small numbers show the probability that the difference is " "negligible.</small>", wordWrap=True)) def sizeHint(self): sh = super().sizeHint() return QSize(780, sh.height()) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestAndScore.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestAndScore.FeatureFold and not enabled: self.resampling = OWTestAndScore.KFold @Inputs.learner def set_learner(self, learner, key): """ Set the input `learner` for `key`. Parameters ---------- learner : Optional[Orange.base.Learner] key : Any """ if key in self.learners and learner is None: # Removed self._invalidate([key]) del self.learners[key] elif learner is not None: self.learners[key] = InputLearner(learner, None, None) self._invalidate([key]) @Inputs.train_data def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.cancel() self.Information.data_sampled.clear() self.Error.train_data_error.clear() if data is not None: data_errors = [ ("Train dataset is empty.", len(data) == 0), ("Train data input requires a target variable.", not data.domain.class_vars), ("Too many target variables.", len(data.domain.class_vars) > 1), ("Target variable has no values.", np.isnan(data.Y).all()), ("Target variable has only one value.", data.domain.has_discrete_class and len(unique(data.Y)) < 2), ("Data has no features to learn from.", data.X.shape[1] == 0), ] for error_msg, cond in data_errors: if cond: self.Error.train_data_error(error_msg) data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_scorers() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestAndScore.FeatureFold self._invalidate() @Inputs.test_data def set_test_data(self, data): # type: (Orange.data.Table) -> None """ Set the input separate testing dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not data: self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required_test() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestAndScore.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] # List of scorers shouldn't be retrieved globally, when the module is # loading since add-ons could have registered additional scorers. # It could have been cached but # - we don't gain much with it # - it complicates the unit tests def _update_scorers(self): if self.data and self.data.domain.class_var: new_scorers = usable_scorers(self.data.domain.class_var) else: new_scorers = [] # Don't unnecessarily reset the combo because this would always reset # comparison_criterion; we also set it explicitly, though, for clarity if new_scorers != self.scorers: self.scorers = new_scorers combo = self.controls.comparison_criterion combo.clear() combo.addItems( [scorer.long_name or scorer.name for scorer in self.scorers]) if self.scorers: self.comparison_criterion = 0 if self.__pending_comparison_criterion is not None: # Check for the unlikely case that some scorers have been removed # from modules if self.__pending_comparison_criterion < len(self.scorers): self.comparison_criterion = self.__pending_comparison_criterion self.__pending_comparison_criterion = None self._update_compbox_title() def _update_compbox_title(self): criterion = self.comparison_criterion if criterion < len(self.scorers): scorer = self.scorers[criterion]() self.compbox.setTitle(f"Model Comparison by {scorer.name}") else: self.compbox.setTitle(f"Model Comparison") @Inputs.preprocessor def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.score_table.update_header(self.scorers) self._update_view_enabled() self.update_stats_model() if self.__needupdate: self.__update() def kfold_changed(self): self.resampling = OWTestAndScore.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestAndScore.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestAndScore.ShuffleSplit self._param_changed() def _param_changed(self): self.modcompbox.setEnabled(self.resampling == OWTestAndScore.KFold) self._update_view_enabled() self._invalidate() self.__update() def _update_view_enabled(self): self.comparison_table.setEnabled( self.resampling == OWTestAndScore.KFold and len(self.learners) > 1 and self.data is not None) self.score_table.view.setEnabled(self.data is not None) def update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.score_table.model # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False names = [] for key, slot in self.learners.items(): name = learner_name(slot.learner) names.append(name) head = QStandardItem(name) head.setData(key, Qt.UserRole) results = slot.results if results is not None and results.success: train = QStandardItem("{:.3f}".format( results.value.train_time)) train.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) train.setData(key, Qt.UserRole) test = QStandardItem("{:.3f}".format(results.value.test_time)) test.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) test.setData(key, Qt.UserRole) row = [head, train, test] else: row = [head] if isinstance(results, Try.Fail): head.setToolTip(str(results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) if isinstance(results.exception, DomainTransformationError) \ and self.resampling == self.TestOnTest: self.Error.test_data_incompatible() self.Information.test_data_transformed.clear() else: errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) # Cell variable is used immediatelly, it's not stored # pylint: disable=cell-var-from-loop stats = [ Try(scorer_caller(scorer, ovr_results, target=1)) for scorer in self.scorers ] else: stats = None else: stats = slot.stats if stats is not None: for stat, scorer in zip(stats, self.scorers): item = QStandardItem() item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) if stat.success: item.setData(float(stat.value[0]), Qt.DisplayRole) else: item.setToolTip(str(stat.exception)) if scorer.name in self.score_table.shown_scores: has_missing_scores = True row.append(item) model.appendRow(row) # Resort rows based on current sorting header = self.score_table.view.horizontalHeader() model.sort(header.sortIndicatorSection(), header.sortIndicatorOrder()) self._set_comparison_headers(names) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _on_use_rope_changed(self): self.controls.rope.setEnabled(self.use_rope) self.update_comparison_table() def update_comparison_table(self): self.comparison_table.clearContents() slots = self._successful_slots() if not (slots and self.scorers): return names = [learner_name(slot.learner) for slot in slots] self._set_comparison_headers(names) if self.resampling == OWTestAndScore.KFold: scores = self._scores_by_folds(slots) self._fill_table(names, scores) def _successful_slots(self): model = self.score_table.model proxy = self.score_table.sorted_model keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole) for row in range(proxy.rowCount())) slots = [ slot for slot in (self.learners[key] for key in keys) if slot.results is not None and slot.results.success ] return slots def _set_comparison_headers(self, names): table = self.comparison_table try: # Prevent glitching during update table.setUpdatesEnabled(False) header = table.horizontalHeader() if len(names) > 2: header.setSectionResizeMode(QHeaderView.Stretch) else: header.setSectionResizeMode(QHeaderView.Fixed) table.setRowCount(len(names)) table.setColumnCount(len(names)) table.setVerticalHeaderLabels(names) table.setHorizontalHeaderLabels(names) finally: table.setUpdatesEnabled(True) def _scores_by_folds(self, slots): scorer = self.scorers[self.comparison_criterion]() self._update_compbox_title() if scorer.is_binary: if self.class_selection != self.TARGET_AVERAGE: class_var = self.data.domain.class_var target_index = class_var.values.index(self.class_selection) kw = dict(target=target_index) else: kw = dict(average='weighted') else: kw = {} def call_scorer(results): def thunked(): return scorer.scores_by_folds(results.value, **kw).flatten() return thunked scores = [Try(call_scorer(slot.results)) for slot in slots] scores = [score.value if score.success else None for score in scores] # `None in scores doesn't work -- these are np.arrays) if any(score is None for score in scores): self.Warning.scores_not_computed() return scores def _fill_table(self, names, scores): table = self.comparison_table for row, row_name, row_scores in zip(count(), names, scores): for col, col_name, col_scores in zip(range(row), names, scores): if row_scores is None or col_scores is None: continue if self.use_rope and self.rope: p0, rope, p1 = baycomp.two_on_single( row_scores, col_scores, self.rope) if np.isnan(p0) or np.isnan(rope) or np.isnan(p1): self._set_cells_na(table, row, col) continue self._set_cell( table, row, col, f"{p0:.3f}<br/><small>{rope:.3f}</small>", f"p({row_name} > {col_name}) = {p0:.3f}\n" f"p({row_name} = {col_name}) = {rope:.3f}") self._set_cell( table, col, row, f"{p1:.3f}<br/><small>{rope:.3f}</small>", f"p({col_name} > {row_name}) = {p1:.3f}\n" f"p({col_name} = {row_name}) = {rope:.3f}") else: p0, p1 = baycomp.two_on_single(row_scores, col_scores) if np.isnan(p0) or np.isnan(p1): self._set_cells_na(table, row, col) continue self._set_cell(table, row, col, f"{p0:.3f}", f"p({row_name} > {col_name}) = {p0:.3f}") self._set_cell(table, col, row, f"{p1:.3f}", f"p({col_name} > {row_name}) = {p1:.3f}") @classmethod def _set_cells_na(cls, table, row, col): cls._set_cell(table, row, col, "NA", "comparison cannot be computed") cls._set_cell(table, col, row, "NA", "comparison cannot be computed") @staticmethod def _set_cell(table, row, col, label, tooltip): item = QLabel(label) item.setToolTip(tooltip) item.setAlignment(Qt.AlignCenter) table.setCellWidget(row, col, item) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = (self.TARGET_AVERAGE, ) + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self.update_stats_model() self.update_comparison_table() def _invalidate(self, which=None): self.cancel() self.fold_feature_selected = \ self.resampling == OWTestAndScore.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.score_table.model statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.comparison_table.clearContents() self.__needupdate = True def commit(self): """ Commit the results to output. """ self.Error.memory_error.clear() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] combined = None predictions = None if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities try: predictions = combined.get_augmented_data( combined.learner_names) except MemoryError: self.Error.memory_error() self.Outputs.evaluations_results.send(combined) self.Outputs.predictions.send(predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.score_table.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if settings_["resampling"] > 0: settings_["resampling"] += 1 if version < 3: # Older version used an incompatible context handler settings_["context_settings"] = [ c for c in settings_.get("context_settings", ()) if not hasattr(c, 'classes') ] @Slot(float) def setProgressValue(self, value): self.progressBarSet(value) def __update(self): self.__needupdate = False assert self.__task is None or self.__state == State.Running if self.__state == State.Running: self.cancel() self.Warning.test_data_unused.clear() self.Error.test_data_incompatible.clear() self.Warning.test_data_missing.clear() self.Warning.cant_stratify.clear() self.Information.cant_stratify_numeric.clear() self.Information.test_data_transformed( shown=self.resampling == self.TestOnTest and self.data is not None and self.test_data is not None and self.data.domain.attributes != self.test_data.domain.attributes) self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() # check preconditions and return early or show warnings if self.data is None: self.__state = State.Waiting self.commit() return if not self.learners: self.__state = State.Waiting self.commit() return if self.resampling == OWTestAndScore.KFold: k = self.NFolds[self.n_folds] if len(self.data) < k: self.Error.too_many_folds() self.__state = State.Waiting self.commit() return do_stratify = self.cv_stratified if do_stratify: if self.data.domain.class_var.is_discrete: least = min( filter(None, np.bincount(self.data.Y.astype(int)))) if least < k: self.Warning.cant_stratify(k, least) do_stratify = False else: self.Information.cant_stratify_numeric() do_stratify = False elif self.resampling == OWTestAndScore.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() self.__state = State.Waiting self.commit() return elif self.test_data.domain.class_var != self.data.domain.class_var: self.Error.class_inconsistent() self.__state = State.Waiting self.commit() return elif self.test_data is not None: self.Warning.test_data_unused() rstate = 42 # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] # deepcopy all learners as they are not thread safe (by virtue of # the base API). These will be the effective learner objects tested # but will be replaced with the originals on return (see restore # learners bellow) learners_c = [copy.deepcopy(learner) for learner in learners] if self.resampling == OWTestAndScore.TestOnTest: test_f = partial( Orange.evaluation.TestOnTestData(store_data=True, store_models=True), self.data, self.test_data, learners_c, self.preprocessor) else: if self.resampling == OWTestAndScore.KFold: sampler = Orange.evaluation.CrossValidation( k=self.NFolds[self.n_folds], random_state=rstate, stratified=do_stratify) elif self.resampling == OWTestAndScore.FeatureFold: sampler = Orange.evaluation.CrossValidationFeature( feature=self.fold_feature) elif self.resampling == OWTestAndScore.LeaveOneOut: sampler = Orange.evaluation.LeaveOneOut() elif self.resampling == OWTestAndScore.ShuffleSplit: sampler = Orange.evaluation.ShuffleSplit( n_resamples=self.NRepeats[self.n_repeats], train_size=self.SampleSizes[self.sample_size] / 100, test_size=None, stratified=self.shuffle_stratified, random_state=rstate) elif self.resampling == OWTestAndScore.TestOnTrain: sampler = Orange.evaluation.TestOnTrainingData( store_models=True) else: assert False, "self.resampling %s" % self.resampling sampler.store_data = True test_f = partial(sampler, self.data, learners_c, self.preprocessor) def replace_learners(evalfunc, *args, **kwargs): res = evalfunc(*args, **kwargs) assert all(lc is lo for lc, lo in zip(learners_c, res.learners)) res.learners[:] = learners return res test_f = partial(replace_learners, test_f) self.__submit(test_f) def __submit(self, testfunc): # type: (Callable[[Callable[[float], None]], Results]) -> None """ Submit a testing function for evaluation MUST not be called if an evaluation is already pending/running. Cancel the existing task first. Parameters ---------- testfunc : Callable[[Callable[float]], Results]) Must be a callable taking a single `callback` argument and returning a Results instance """ assert self.__state != State.Running # Setup the task task = TaskState() def progress_callback(finished): if task.is_interruption_requested(): raise UserInterrupt() task.set_progress_value(100 * finished) testfunc = partial(testfunc, callback=progress_callback) task.start(self.__executor, testfunc) task.progress_changed.connect(self.setProgressValue) task.watcher.finished.connect(self.__task_complete) self.Outputs.evaluations_results.invalidate() self.Outputs.predictions.invalidate() self.progressBarInit() self.setStatusMessage("Running") self.__state = State.Running self.__task = task @Slot(object) def __task_complete(self, f: 'Future[Results]'): # handle a completed task assert self.thread() is QThread.currentThread() assert self.__task is not None and self.__task.future is f self.progressBarFinished() self.setStatusMessage("") assert f.done() self.__task = None self.__state = State.Done try: results = f.result() # type: Results learners = results.learners # type: List[Learner] except Exception as er: # pylint: disable=broad-except log.exception("testing error (in __task_complete):", exc_info=True) self.error("\n".join(traceback.format_exception_only(type(er), er))) return learner_key = { slot.learner: key for key, slot in self.learners.items() } assert all(learner in learner_key for learner in learners) # Update the results for individual learners class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(self.scorers) result = Try.Fail(ex) else: stats = [ Try(scorer_caller(scorer, result)) for scorer in self.scorers ] result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.score_table.update_header(self.scorers) self.update_stats_model() self.update_comparison_table() self.commit() def cancel(self): """ Cancel the current/pending evaluation (if any). """ if self.__task is not None: assert self.__state == State.Running self.__state = State.Cancelled task, self.__task = self.__task, None task.cancel() task.progress_changed.disconnect(self.setProgressValue) task.watcher.finished.disconnect(self.__task_complete) self.progressBarFinished() self.setStatusMessage("") def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=False) super().onDeleteWidget() def copy_to_clipboard(self): self.score_table.copy_selection_to_clipboard()
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")] outputs = [("Selected Data", Orange.data.Table)] quantities = [ "Number of instances", "Proportion of predicted", "Proportion of actual" ] settingsHandler = settings.ClassValuesContextHandler() selected_learner = settings.Setting(0) selection = settings.ContextSetting(set()) selected_quantity = settings.Setting(0) append_predictions = settings.Setting(True) append_probabilities = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell") ] def __init__(self): super().__init__() if isinstance(self.selected_learner, list): self.selected_learner = (self.selected_learner + [0])[0] self.data = None self.results = None self.learners = [] self.headers = [] box = gui.vBox(self.controlArea, "Learners") self.learners_box = gui.listBox(box, self, "selected_learner", "learners", callback=self._learner_changed) box = gui.vBox(self.controlArea, "Show") gui.comboBox(box, self, "selected_quantity", items=self.quantities, callback=self._update) box = gui.vBox(self.controlArea, "Select") gui.button(box, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(box, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(box, self, "Clear Selection", callback=self.select_none, autoDefault=False) self.outputbox = box = gui.vBox(self.controlArea, "Output") gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_commit(self.controlArea, self, "autocommit", "Send Selected", "Send Automatically") grid = QGridLayout() self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.clicked.connect(self.cell_clicked) grid.addWidget(view, 0, 0) self.mainArea.layout().addLayout(grid) def sizeHint(self): """Initial size""" return QSize(750, 490) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def _init_table(self, nclasses): item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, nclasses) self.tableview.setSpan(2, 0, nclasses, 1) font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(self.headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(nclasses + 3) self.tablemodel.setColumnCount(nclasses + 3) def set_results(self, results): """Set the input results.""" prev_sel_learner = self.selected_learner self.clear() self.warning() self.closeContext() data = None if results is not None and results.data is not None: data = results.data if data is not None and not data.domain.has_discrete_class: self.warning("Confusion Matrix cannot show regression results.") self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) else: self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] self.headers = class_values + \ [unicodedata.lookup("N-ARY SUMMATION")] # NOTE: The 'learner_names' is set in 'Test Learners' widget. if hasattr(results, "learner_names"): self.learners = results.learner_names else: self.learners = [ "Learner #{}".format(i + 1) for i in range(nmodels) ] self._init_table(len(class_values)) self.openContext(data.domain.class_var) if prev_sel_learner is None or \ prev_sel_learner >= len(self.learners): self.selected_learner = 0 else: self.selected_learner = prev_sel_learner self._update() self._set_selection() self.unconditional_commit() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_wrong(self): """Select the off-diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner is not None: indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner] predicted = self.results.predicted[self.selected_learner] selected = [ i for i, t in enumerate(zip(actual, predicted)) if t in indices ] row_indices = self.results.row_indices[selected] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas if self.append_predictions: predicted = numpy.array(predicted[selected], dtype=object) extra.append(predicted.reshape(-1, 1)) var = Orange.data.DiscreteVariable( "{}({})".format(class_var.name, learner_name), class_var.values) metas = metas + (var, ) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner, selected] extra.append(numpy.array(probs, dtype=object)) pvars = [ Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values ] metas = metas + tuple(pvars) X = self.data.X[row_indices] Y = self.data.Y[row_indices] M = self.data.metas[row_indices] row_ids = self.data.ids[row_indices] M = numpy.hstack((M, ) + tuple(extra)) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = Orange.data.Table.from_numpy(domain, X, Y, M) data.ids = row_ids data.name = learner_name else: data = None self.send("Selected Data", data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit() def _set_selection(self): selection = QItemSelection() index = self.tableview.model().index for row, col in self.selection: sel = index(row + 2, col + 2) selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _learner_changed(self): self._update() self._set_selection() self.commit() def _update(self): def _isinvalid(x): return isnan(x) or isinf(x) # Update the displayed confusion matrix if self.results is not None and self.selected_learner is not None: cmatrix = confusion_matrix(self.results, self.selected_learner) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = numpy.diag_indices(n) colors = cmatrix.astype(numpy.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(numpy.int) formatstr = "{}" div = numpy.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, numpy.newaxis] div = colors.max(axis=1)[:, numpy.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div colors[diag] = normalized[diag] / normalized[diag].max() for i in range(n): for j in range(n): val = normalized[i, j] col_val = colors[i, j] item = self._item(i + 2, j + 2) item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) item.setData("trbl", BorderRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(n): self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t")) self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l")) self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum()))) def send_report(self): """Send report""" if self.results is not None and self.selected_learner is not None: self.report_table( "Confusion matrix for {} (showing {})".format( self.learners[self.selected_learner], self.quantities[self.selected_quantity].lower()), self.tableview)
class OWDifference(widget.OWWidget): name = 'Difference' description = 'Make the time series stationary by replacing it with ' \ '1st or 2nd order discrete difference along its values. ' icon = 'icons/Difference.svg' priority = 570 keywords = ['difference', 'derivative', 'quotient', 'percent change'] class Inputs: time_series = Input("Time series", Table) class Outputs: time_series = Output("Time series", Timeseries) settingsHandler = DomainContextHandler() selected = ContextSetting([], schema_only=True) class Operation(str, Enum): DIFF = 'Difference' QUOT = 'Quotient' PERC = 'Percentage change' want_main_area = False resizing_enabled = False chosen_operation = settings.Setting(Operation.DIFF) diff_order = settings.Setting(1) shift_period = settings.Setting(1) invert_direction = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( 'Series can be differentiated up to the 2nd order. ' 'However, if the series is shifted by other than 1 ' 'step, a differencing order of 1 is always assumed.', 'diff-shift') ] def __init__(self): self.data = None box = gui.vBox(self.controlArea, 'Differencing') gui.comboBox(box, self, 'chosen_operation', orientation=Qt.Horizontal, items=[el.value for el in self.Operation], label='Compute:', callback=self.on_changed, sendSelectedValue=True) self.order_spin = gui.spin( box, self, 'diff_order', 1, 2, label='Differencing order:', callback=self.on_changed, tooltip='The value corresponds to n-th order numerical ' 'derivative of the series. \nThe order is fixed to 1 ' 'if the shift period is other than 1.') gui.spin(box, self, 'shift_period', 1, 100, label='Shift:', callback=self.on_changed, tooltip='Set this to other than 1 if you don\'t want to ' 'compute differences for subsequent values but for ' 'values shifted number of spaces apart. \n' 'If this value is different from 1, differencing ' 'order is fixed to 1.') gui.checkBox(box, self, 'invert_direction', label='Invert differencing direction', callback=self.on_changed, tooltip='Influences where the series is padded with nan ' 'values — at the beginning or at the end.') self.view = view = QListView(self, selectionMode=QListView.ExtendedSelection) self.model = model = VariableListModel(parent=self) view.setModel(model) view.selectionModel().selectionChanged.connect(self.on_changed) box.layout().addWidget(view) gui.auto_commit(box, self, 'autocommit', '&Apply') @Inputs.time_series def set_data(self, data): self.closeContext() self.data = data = None if data is None else Timeseries.from_data_table( data) if data is not None: self.model[:] = [ var for var in data.domain.variables if var.is_continuous and var is not data.time_variable ] self.select_default_variable() self.openContext(self.data) self._restore_selection() else: self.reset_model() self.on_changed() def _restore_selection(self): def restore(view, selection): with signal_blocking(view.selectionModel()): # gymnastics for transforming variable names back to indices var_list = [ var for var in self.data.domain.variables if var.is_continuous and var is not self.data.time_variable ] indices = [var_list.index(i) for i in selection] select_rows(view, indices) restore(self.view, self.selected) def select_default_variable(self): self.selected = [0] select_rows(self.view, self.selected) def reset_model(self): self.model.wrap([]) def on_changed(self): var_names = [ i.row() for i in self.view.selectionModel().selectedRows() ] self.order_spin.setEnabled( self.shift_period == 1 and self.chosen_operation == self.Operation.DIFF) self.selected = [self.model[v] for v in var_names] self.commit() def commit(self): data = self.data if not data or not len(self.selected): self.Outputs.time_series.send(None) return X = [] attrs = [] invert = self.invert_direction shift = self.shift_period order = self.diff_order op = self.chosen_operation for var in self.selected: col = np.ravel(data[:, var]) if invert: col = col[::-1] out = np.empty(len(col)) if op == self.Operation.DIFF and shift == 1: out[order:] = np.diff(col, order) out[:order] = np.nan else: if op == self.Operation.DIFF: out[shift:] = col[shift:] - col[:-shift] else: out[shift:] = np.divide(col[shift:], col[:-shift]) if op == self.Operation.PERC: out = (out - 1) * 100 out[:shift] = np.nan if invert: out = out[::-1] X.append(out) if op == self.Operation.DIFF and shift == 1: details = f'order={order}' else: details = f'shift={shift}' template = f'{var} ({op[:4].lower()}; {details})' name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) ts = Timeseries( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas) ts.time_variable = data.time_variable self.Outputs.time_series.send(ts)
class OWNxExplorer(widget.OWWidget): name = "Network Explorer" description = "Visually explore the network and its properties." icon = "icons/NetworkExplorer.svg" priority = 6420 class Inputs: network = Input("Network", network.Graph, default=True) node_subset = Input("Node Subset", Table) node_data = Input("Node Data", Table) node_distances = Input("Node Distances", Orange.misc.DistMatrix) class Outputs: subgraph = Output("Selected sub-network", network.Graph) unselected_subgraph = Output("Remaining sub-network", network.Graph) distances = Output("Distance matrix", Orange.misc.DistMatrix) selected = Output("Selected items", Table) highlighted = Output("Highlighted items", Table) remaining = Output("Remaining items", Table) UserAdviceMessages = [ widget.Message( 'When selecting nodes on the Marking tab, ' 'press <b><tt>Enter</tt></b> key to add ' '<b><font color="{}">highlighted</font></b> nodes to ' '<b><font color="{}">selection</font></b>.'.format( Node.Pen.HIGHLIGHTED.color().name(), Node.Pen.SELECTED.color().name()), 'marking-info', widget.Message.Information), widget.Message( 'Left-click to select nodes ' '(hold <b><tt>Shift</tt></b> to append to selection). ' 'Right-click to pan/move the view. Scroll to zoom.', 'mouse-info', widget.Message.Information), ] settingsHandler = DomainContextHandler() do_auto_commit = Setting(True) selectionMode = Setting(SelectionMode.FROM_INPUT) tabIndex = Setting(0) showEdgeWeights = Setting(False) relativeEdgeWidths = Setting(False) randomizePositions = Setting(True) invertNodeSize = Setting(False) markDistance = Setting(1) markSearchString = Setting("") markNBest = Setting(1) markNConnections = Setting(2) point_width = Setting(10) edge_width = Setting(1) attr_size = ContextSetting(None) attr_color = ContextSetting(None) attrs_label = ContextSetting({}) attrs_tooltip = ContextSetting({}) graph_name = 'view' class Warning(widget.OWWidget.Warning): distance_matrix_size = widget.Msg( "Distance matrix size doesn't match the number of network nodes. Not using it." ) no_graph_found = widget.Msg('No graph found!') no_graph_or_items = widget.Msg( 'No graph provided or no items attached to the graph.') class Error(widget.OWWidget.Error): instance_for_each_node = widget.Msg( 'Items table must have one instance for each network node.') network_too_large = widget.Msg( 'Network is too large to visualize. Sorry.') def __init__(self): super().__init__() #self.contextHandlers = {"": DomainContextHandler("", [ContextField("attributes", selected="node_label_attrs"), ContextField("attributes", selected="tooltipAttributes"), "color"])} self.view = GraphView(self) self.mainArea.layout().addWidget(self.view) self.graph_attrs = [] self.acceptingEnterKeypress = False self.node_label_attrs = [] self.tooltipAttributes = [] self.searchStringTimer = QTimer(self) self.markInputItems = None self.node_color_attr = 0 self.node_size_attr = 0 self.nHighlighted = 0 self.nSelected = 0 self.verticesPerEdge = 0 self.edgesPerVertex = 0 self.items_matrix = None self.number_of_nodes_label = 0 self.number_of_edges_label = 0 self.graph = None self.setMinimumWidth(600) self.tabs = gui.tabWidget(self.controlArea) self.displayTab = gui.createTabPage(self.tabs, "Display") self.markTab = gui.createTabPage(self.tabs, "Marking") def on_tab_changed(index): self.tabIndex = index self.set_selection_mode() self.tabs.currentChanged.connect(on_tab_changed) self.tabs.setCurrentIndex(self.tabIndex) ib = gui.widgetBox(self.displayTab, "Info") gui.label( ib, self, "Nodes: %(number_of_nodes_label)i (%(verticesPerEdge).2f per edge)" ) gui.label( ib, self, "Edges: %(number_of_edges_label)i (%(edgesPerVertex).2f per node)") box = gui.widgetBox(self.displayTab, "Nodes") self.relayout_button = gui.button(box, self, 'Re-layout', callback=self.relayout, autoDefault=False) self.randomize_cb = gui.checkBox(box, self, "randomizePositions", "Randomize positions") self.view.positionsChanged.connect( lambda positions, progress: self.progressbar.widget.progressBarSet( int(round(100 * progress)))) def animationFinished(): self.relayout_button.setEnabled(True) self.progressbar.finish() self.view.animationFinished.connect(animationFinished) self.color_model = VariableListModel(placeholder="(Same color)") self.color_combo = gui.comboBox(box, self, "attr_color", label='Color:', orientation='horizontal', callback=self.set_node_colors, model=self.color_model) self.size_model = VariableListModel(placeholder="(Same size)") self.size_combo = gui.comboBox(box, self, "attr_size", label='Size:', orientation='horizontal', callback=self.set_node_sizes, model=self.size_model) gui.hSlider(box, self, 'point_width', label="Symbol size: ", minValue=1, maxValue=10, step=1, createLabel=False, callback=self.set_node_sizes) hb = gui.widgetBox(box, orientation="horizontal") hb.layout().addStretch(1) self.invertNodeSizeCheck = gui.checkBox(hb, self, "invertNodeSize", "Invert", callback=self.set_node_sizes) hb = gui.widgetBox(self.displayTab, box="Node labels | tooltips", orientation="horizontal", addSpace=False) self.attListBox = gui.listBox( hb, self, "node_label_attrs", "graph_attrs", selectionMode=QListWidget.MultiSelection, sizeHint=QSize(100, 100), callback=self._on_node_label_attrs_changed) self.tooltipListBox = gui.listBox( hb, self, "tooltipAttributes", "graph_attrs", selectionMode=QListWidget.MultiSelection, sizeHint=QSize(100, 100), callback=self._clicked_tooltip_lstbox) eb = gui.widgetBox(self.displayTab, "Edges", orientation="vertical") self.checkbox_relative_edges = gui.checkBox( eb, self, 'relativeEdgeWidths', 'Relative edge widths', callback=self.set_edge_sizes) gui.hSlider(eb, self, 'edge_width', label="Edge width: ", minValue=1, maxValue=10, step=1, createLabel=False, callback=self.set_edge_sizes) self.checkbox_show_weights = gui.checkBox( eb, self, 'showEdgeWeights', 'Show edge weights', callback=self.set_edge_labels) ib = gui.widgetBox(self.markTab, "Info", orientation="vertical") gui.label(ib, self, "Nodes: %(number_of_nodes_label)i") gui.label(ib, self, "Selected: %(nSelected)i") gui.label(ib, self, "Highlighted: %(nHighlighted)i") def on_selection_change(): self.nSelected = len(self.view.getSelected()) self.nHighlighted = len(self.view.getHighlighted()) self.set_selection_mode() self.commit() self.view.selectionChanged.connect(on_selection_change) ib = gui.widgetBox(self.markTab, "Highlight nodes ...") ribg = gui.radioButtonsInBox(ib, self, "selectionMode", callback=self.set_selection_mode) gui.appendRadioButton(ribg, "None") gui.appendRadioButton(ribg, "... whose attributes contain:") self.ctrlMarkSearchString = gui.lineEdit( gui.indentedBox(ribg), self, "markSearchString", callback=self._set_search_string_timer, callbackOnType=True) self.searchStringTimer.timeout.connect(self.set_selection_mode) gui.appendRadioButton(ribg, "... neighbours of selected, ≤ N hops away") ib = gui.indentedBox(ribg, orientation=0) self.ctrlMarkDistance = gui.spin( ib, self, "markDistance", 1, 100, 1, label="Hops:", callback=lambda: self.set_selection_mode(SelectionMode.NEIGHBORS)) ib.layout().addStretch(1) gui.appendRadioButton(ribg, "... with at least N connections") gui.appendRadioButton(ribg, "... with at most N connections") ib = gui.indentedBox(ribg, orientation=0) self.ctrlMarkNConnections = gui.spin( ib, self, "markNConnections", 0, 1000000, 1, label="Connections:", callback=lambda: self.set_selection_mode( SelectionMode.AT_MOST_N if self.selectionMode == SelectionMode. AT_MOST_N else SelectionMode.AT_LEAST_N)) ib.layout().addStretch(1) gui.appendRadioButton(ribg, "... with more connections than any neighbor") gui.appendRadioButton( ribg, "... with more connections than average neighbor") gui.appendRadioButton(ribg, "... with most connections") ib = gui.indentedBox(ribg, orientation=0) self.ctrlMarkNumber = gui.spin( ib, self, "markNBest", 1, 1000000, 1, label="Number of nodes:", callback=lambda: self.set_selection_mode(SelectionMode.MOST_CONN)) ib.layout().addStretch(1) self.markInputRadioButton = gui.appendRadioButton( ribg, "... from Node Subset input signal") self.markInputRadioButton.setEnabled(True) gui.auto_commit(ribg, self, 'do_auto_commit', 'Output changes') self.markTab.layout().addStretch(1) self.set_graph(None) self.set_selection_mode() def sizeHint(self): return QSize(800, 600) def commit(self): self.send_data() @Inputs.node_distances def set_items_distance_matrix(self, matrix): assert matrix is None or isinstance(matrix, Orange.misc.DistMatrix) self.items_matrix = matrix self.relayout() def _set_search_string_timer(self): self.selectionMode = SelectionMode.SEARCH self.searchStringTimer.stop() self.searchStringTimer.start(300) def switchTab(self, index=None): index = index or self.tabs.currentIndex() curTab = self.tabs.widget(index) self.acceptingEnterKeypress = False if curTab == self.markTab and self.selectionMode != SelectionMode.NONE: self.acceptingEnterKeypress = True @non_reentrant def set_selection_mode(self, selectionMode=None): self.searchStringTimer.stop() selectionMode = self.selectionMode = selectionMode or self.selectionMode self.switchTab() if (self.graph is None or self.tabs.widget(self.tabs.currentIndex()) != self.markTab and selectionMode != SelectionMode.FROM_INPUT): return if selectionMode == SelectionMode.NONE: self.view.setHighlighted([]) elif selectionMode == SelectionMode.SEARCH: table, txt = self.graph.items(), self.markSearchString.lower() if not table or not txt: return toMark = set(i for i, instance in enumerate(table) if txt in " ".join(map(str, instance.list)).lower()) self.view.setHighlighted(toMark) elif selectionMode == SelectionMode.NEIGHBORS: selected = set(self.view.getSelected()) neighbors = selected.copy() for _ in range(self.markDistance): for neigh in list(neighbors): neighbors |= set(self.graph[neigh].keys()) neighbors -= selected self.view.setHighlighted(neighbors) elif selectionMode == SelectionMode.AT_LEAST_N: self.view.setHighlighted( set(node for node, degree in self.graph.degree() if degree >= self.markNConnections)) elif selectionMode == SelectionMode.AT_MOST_N: self.view.setHighlighted( set(node for node, degree in self.graph.degree() if degree <= self.markNConnections)) elif selectionMode == SelectionMode.ANY_NEIGH: self.view.setHighlighted( set(node for node, degree in self.graph.degree() if degree > max(dict(self.graph.degree(self.graph[node])).values(), default=0))) elif selectionMode == SelectionMode.AVG_NEIGH: self.view.setHighlighted( set(node for node, degree in self.graph.degree() if degree > np.nan_to_num( np.mean( list( dict(self.graph.degree( self.graph[node])).values()))))) elif selectionMode == SelectionMode.MOST_CONN: degrees = np.array( sorted(self.graph.degree(), key=lambda i: i[1], reverse=True)) cut_ind = max(1, min(self.markNBest, self.graph.number_of_nodes())) cut_degree = degrees[cut_ind - 1, 1] toMark = set(degrees[degrees[:, 1] >= cut_degree, 0]) self.view.setHighlighted(toMark) elif selectionMode == SelectionMode.FROM_INPUT: tomark = {} if self.markInputItems: ids = set(self.markInputItems.ids) tomark = { x for x in self.graph if self.graph.items()[x].id in ids } self.view.setHighlighted(tomark) def keyReleaseEvent(self, ev): """On Enter, expand the selected set with the highlighted""" if (not self.acceptingEnterKeypress or ev.key() not in (Qt.Key_Return, Qt.Key_Enter)): super().keyReleaseEvent(ev) return highlighted = self.view.getHighlighted() self.view.setSelected(highlighted, extend=True) self.view.setHighlighted([]) self.set_selection_mode() def save_network(self): # TODO: this was never reviewed since Orange2 if self.view is None or self.graph is None: return filename = QFileDialog.getSaveFileName( self, 'Save Network', '', 'NetworkX graph as Python pickle (*.gpickle)\n' 'NetworkX edge list (*.edgelist)\n' 'Pajek network (*.net *.pajek)\n' 'GML network (*.gml)') if filename: _, ext = os.path.splitext(filename) if not ext: filename += ".net" items = self.graph.items() for i in range(self.graph.number_of_nodes()): graph_node = self.graph.node[i] plot_node = self.networkCanvas.networkCurve.nodes()[i] if items is not None: ex = items[i] if 'x' in ex.domain: ex['x'] = plot_node.x() if 'y' in ex.domain: ex['y'] = plot_node.y() graph_node['x'] = plot_node.x() graph_node['y'] = plot_node.y() network.readwrite.write(self.graph, filename) def send_data(self): if not self.graph: for output in dir(self.Outputs): if not output.startswith('__'): getattr(self.Outputs, output).send(None) return selected = self.view.getSelected() self.Outputs.subgraph.send( self.graph.subgraph(selected) if selected else None) self.Outputs.unselected_subgraph.send( self.graph.subgraph(self.view.getUnselected() ) if selected else self.graph) self.Outputs.distances.send( self.items_matrix.submatrix(sorted(selected)) if self.items_matrix is not None and selected else None) items = self.graph.items() if not items: self.Outputs.selected.send(None) self.Outputs.highlighted.send(None) self.Outputs.remaining.send(None) else: highlighted = self.view.getHighlighted() self.Outputs.selected.send(items[ sorted(selected), :] if selected else None) self.Outputs.highlighted.send(items[ sorted(highlighted), :] if highlighted else None) remaining = sorted( set(self.graph) - set(selected) - set(highlighted)) self.Outputs.remaining.send(items[ remaining, :] if remaining else None) def _set_combos(self): self._clear_combos() self.graph_attrs = self.graph.items_vars() self.color_model[:] = [None] + [ v for v in self.graph_attrs if v.is_primitive() ] self.size_model[:] = [None] + [ v for v in self.graph_attrs if v.is_continuous ] self.size_combo.setDisabled(not self.graph_attrs) self.color_combo.setDisabled(not self.graph_attrs) self.set_node_sizes() self.set_node_colors() self.set_edge_sizes() for columns, box in ((self.attrs_label, self.attListBox), (self.attrs_tooltip, self.tooltipListBox)): columns = [var.name for var in columns] if columns: selection = QItemSelection() model = box.model() for i in range(box.count()): if str(box.item(i).text()) in columns: selection.append(QItemSelectionRange(model.index(i, 0))) selmodel = box.selectionModel() selmodel.select(selection, selmodel.Select | selmodel.Clear) else: box.selectionModel().clearSelection() self._on_node_label_attrs_changed() self._clicked_tooltip_lstbox() def _clear_combos(self): self.graph_attrs = [] self.color_combo.clear() self.size_combo.clear() def set_graph_none(self): self.graph = None self.graph_base = None self._clear_combos() self.number_of_nodes_label = 0 self.number_of_edges_label = 0 self.verticesPerEdge = 0 self.edgesPerVertex = 0 self._items = None self.view.set_graph(None) @Inputs.network def set_graph(self, graph): if not graph: return self.set_graph_none() if graph.number_of_nodes() < 2: self.set_graph_none() self.information( 'I\'m not really in a mood to visualize just one node. Try again tomorrow.' ) return if graph.number_of_nodes() + graph.number_of_edges() > 30000: self.set_graph_none() self.Error.network_too_large() return self.information() self.closeContext() all_edges_equal = bool( 1 == len(set(w for u, v, w in graph.edges(data='weight')))) self.checkbox_show_weights.setEnabled(not all_edges_equal) self.checkbox_relative_edges.setEnabled(not all_edges_equal) self.graph_base = graph self.graph = graph.copy() # Set items table from the separate signal if self._items: self.set_items(self._items) self.view.set_graph(self.graph, relayout=False) # Set labels self.number_of_nodes_label = self.graph.number_of_nodes() self.number_of_edges_label = self.graph.number_of_edges() self.verticesPerEdge = self.graph.number_of_nodes() / max( 1, self.graph.number_of_edges()) self.edgesPerVertex = self.graph.number_of_edges() / max( 1, self.graph.number_of_nodes()) self._set_combos() if self.graph.items(): self.openContext(self.graph.items().domain) self.Error.clear() self.set_selection_mode() self.randomizePositions = True self.relayout() @Inputs.node_data def set_items(self, items=None): self._items = items if items is None: return self.set_graph(self.graph_base) if not self.graph: self.Warning.no_graph_found() return self.Warning.clear() if len(items) != self.graph.number_of_nodes(): self.Error.instance_for_each_node() return self.Error.instance_for_each_node.clear() self.graph.set_items(items) self._set_combos() @Inputs.node_subset def set_marking_items(self, items): self.markInputRadioButton.setEnabled(False) self.markInputItems = items self.Warning.clear() if self.selectionMode == SelectionMode.FROM_INPUT and \ (items is None or self.graph is None or self.graph.items() is None): self.selectionMode = SelectionMode.NONE if items is None: self.view.selectionChanged.emit() return if self.graph is None or self.graph.items() is None: self.Warning.no_graph_or_items() return if len(items) > 0: self.markInputRadioButton.setEnabled(True) self.view.selectionChanged.emit() def relayout(self): if self.graph is None or self.graph.number_of_nodes() <= 1: return self.progressbar = gui.ProgressBar(self, FR_ITERATIONS) distmatrix = self.items_matrix if distmatrix is not None and distmatrix.shape[ 0] != self.graph.number_of_nodes(): self.Warning.distance_matrix_size() distmatrix = None self.Warning.distance_matrix_size.clear() self.relayout_button.setDisabled(True) self.view.relayout(randomize=self.randomizePositions, weight=distmatrix) def _on_node_label_attrs_changed(self): if not self.graph: return attributes = self.attrs_label = [ self.graph_attrs[i] for i in self.node_label_attrs ] if attributes: table = self.graph.items() if not table: return for i, node in enumerate(self.view.nodes): text = ', '.join(map(str, table[i, attributes][0].list)) node.setText(text) else: for node in self.view.nodes: node.setText('') def _clicked_tooltip_lstbox(self): if not self.graph: return attributes = self.attrs_tooltip = [ self.graph_attrs[i] for i in self.tooltipAttributes ] if attributes: table = self.graph.items() if not table: return assert self.view.nodes for i, node in enumerate(self.view.nodes): node.setTooltip( lambda row=i, attributes=attributes, table=table: '<br>'. join('<b>{.name}:</b> {}'.format( i[0], str(i[1]).replace('<', '<')) for i in zip( attributes, table[row, attributes][0].list))) else: for node in self.view.nodes: node.setTooltip(None) def set_edge_labels(self): if not self.graph: return if self.showEdgeWeights: weights = (str(w or '') for u, v, w in self.graph.edges(data='weight')) else: weights = ('' for i in range(self.graph.number_of_edges())) for edge, weight in zip(self.view.edges, weights): edge.setText(weight) def set_node_colors(self): if not self.graph: return attribute = self.attr_color assert not attribute or isinstance(attribute, Orange.data.Variable) if self.view.legend is not None: self.view.scene().removeItem(self.view.legend) self.view.legend.clear() else: self.view.legend = LegendItem() self.view.legend.set_parent(self.view) if not attribute: for node in self.view.nodes: node.setColor(None) return table = self.graph.items() if not table: return if attribute in table.domain.class_vars: values = table[:, attribute].Y if values.ndim > 1: values = values.T elif attribute in table.domain.metas: values = table[:, attribute].metas[:, 0] elif attribute in table.domain.attributes: values = table[:, attribute].X[:, 0] else: raise RuntimeError("Shouldn't be able to select this column") if attribute.is_continuous: colors = CONTINUOUS_PALETTE[scale(values)] label = PaletteItemSample( CONTINUOUS_PALETTE, DiscretizedScale(np.nanmin(values), np.nanmax(values))) self.view.legend.addItem(label, "") self.view.legend.setGeometry(label.boundingRect()) elif attribute.is_discrete: DISCRETE_PALETTE = ColorPaletteGenerator(len(attribute.values)) colors = DISCRETE_PALETTE[values] for value, color in zip(attribute.values, DISCRETE_PALETTE): self.view.legend.addItem( ScatterPlotItem(pen=Node.Pen.DEFAULT, brush=QBrush(QColor(color)), size=10, symbol="o"), escape(value)) for node, color in zip(self.view.nodes, colors): node.setColor(color) self.view.scene().addItem(self.view.legend) self.view.legend.geometry_changed() def set_node_sizes(self): self.invertNodeSizeCheck.setDisabled(not self.attr_size) if not self.graph: return table = self.graph.items() if table is None: return try: a = table.get_column_view(self.attr_size)[0] values = a.copy() except Exception: for node in self.view.nodes: node.setSize(MIN_NODE_SIZE * self.point_width) return if self.invertNodeSize: values += np.nanmin(values) + 1 values = 1 / values nodemin, nodemax = np.nanmin(values), np.nanmax(values) if nodemin == nodemax: # np.polyfit borks on this condition sizes = (MIN_NODE_SIZE for _ in range(len(self.view.nodes))) else: k, n = np.polyfit([nodemin, nodemax], [MIN_NODE_SIZE, MAX_NODE_SIZE], 1) sizes = values * k + n sizes[np.isnan(sizes)] = np.nanmean(sizes) for node, size in zip(self.view.nodes, sizes): node.setSize(size * self.point_width) def set_edge_sizes(self): if not self.graph: return if self.relativeEdgeWidths: widths = [ self.graph.adj[u][v].get('weight', 1) for u, v in self.graph.edges() ] widths = scale(widths, .7, 8) * np.log2(self.edge_width / 4 + 1) else: widths = (.7 * self.edge_width for _ in range(self.graph.number_of_edges())) for edge, width in zip(self.view.edges, widths): edge.setSize(width) def send_report(self): self.report_data("Data", self.graph.items()) self.report_items('Graph info', [ ("Number of vertices", self.graph.number_of_nodes()), ("Number of edges", self.graph.number_of_edges()), ("Vertices per edge", "%.3f" % self.verticesPerEdge), ("Edges per vertex", "%.3f" % self.edgesPerVertex), ]) if self.node_color_attr or self.node_size_attr or self.node_label_attrs: self.report_items("Visual settings", [ ("Vertex color", self.colorCombo.currentText()), ("Vertex size", str(self.nodeSizeCombo.currentText()) + " (inverted)" if self.invertNodeSize else ""), ("Labels", ", ".join(self.graph_attrs[i].name for i in self.node_label_attrs)), ]) self.report_plot("Graph", self.view)