class ContingencyTable(QTableView): """ A contingency table widget which can be used wherever ``QTableView`` could be used. Parameters ---------- parent : Orange.widgets.widget.OWWidget The containing widget to which the table is connected. Attributes ---------- classesv : :obj:`list` of :obj:`str` Vertical class headers. classesh : :obj:`list` of :obj:`str` Horizontal class headers. headerv : :obj:`str`, optional Vertical top header. headerh : :obj:`str`, optional Horizontal top header. corner_string : str String that is top right and bottom left corner of the table. Default is ``unicodedata.lookup("N-ARY SUMMATION")``. """ def __init__(self, parent): super().__init__(editTriggers=QTableView.NoEditTriggers) self.bold_headers = None self.circles = False self.classesv = None self.classesh = None self.headerv = None self.headerh = None self.parent = parent self.corner_string = unicodedata.lookup("N-ARY SUMMATION") self.tablemodel = QStandardItemModel(self) self.setModel(self.tablemodel) self.horizontalHeader().hide() self.verticalHeader().hide() self.horizontalHeader().setMinimumSectionSize(60) self.setShowGrid(False) self.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) self.clicked.connect(self._cell_clicked) def mouseReleaseEvent(self, e): super().mouseReleaseEvent(e) self.parent._invalidate() def keyPressEvent(self, event): super().keyPressEvent(event) self.parent._invalidate() def _cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() m = self.tablemodel.columnCount() index = self.tablemodel.index selection = None if i == j == 1 or not self.circles and i == n - 1 and j == m - 1: selection = QItemSelection(index(2, 2), index(n - 1, m - 1)) elif i == 1 or not self.circles and i == n - 1: selection = QItemSelection(index(2, j), index(n - 1, j)) elif j == 1 or not self.circles and j == m - 1: selection = QItemSelection(index(i, 2), index(i, m - 1)) if selection is not None: self.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def set_variables(self, variablev, variableh, **kwargs): """ Sets class headers and top headers and initializes table structure. Parameters ---------- variablev : Orange.data.variable.DiscreteVariable Class headers are set to ``variablev.values``, top header is set to ``variablev.name``. variableh : Orange.data.variable.DiscreteVariable Class headers are set to ``variableh.values``, top header is set to ``variableh.name``. """ self.classesv = variablev.values self.classesh = variableh.values self.headerv = variablev.name self.headerh = variableh.name self.initialize(**kwargs) def set_headers(self, classesv, classesh, headerv=None, headerh=None, **kwargs): """ Sets class headers and top headers and initializes table structure. Parameters ---------- classesv : :obj:`list` of :obj:`str` Vertical class headers. classesh : :obj:`list` of :obj:`str` Horizontal class headers. headerv : :obj:`str`, optional Vertical top header. headerh : :obj:`str`, optional Horizontal top header. """ self.classesv = classesv self.classesh = classesh self.headerv = headerv self.headerh = headerh self.initialize(**kwargs) def _style_cells(self): """ Style all cells. """ if self.circles: self.setItemDelegate(CircleItemDelegate(Qt.white)) else: self.setItemDelegate(BorderedItemDelegate(Qt.white)) item = self._item(0, 2) item.setData(self.headerh, Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData(self.headerv, Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.setSpan(0, 2, 1, len(self.classesh) + 1) self.setSpan(2, 0, len(self.classesv) + 1, 1) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) def _initialize_headers(self): """ Fill headers with content and style them. """ font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for headers, ix in ((self.classesv + [self.corner_string], lambda p: (p + 2, 1)), (self.classesh + [self.corner_string], lambda p: (1, p + 2))): for p, label in enumerate(headers): i, j = ix(p) item = self._item(i, j) item.setData(label, Qt.DisplayRole) if self.bold_headers: item.setFont(bold_font) if not (i == 1 and self.circles): item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) else: item.setData("", BorderRole) self._set_item(i, j, item) def _resize(self): """ Resize table to fit new contents and style. """ if self.circles: self.resizeRowToContents(1) self.horizontalHeader().setDefaultSectionSize(self.rowHeight(2)) self.resizeColumnToContents(1) self.tablemodel.setRowCount(len(self.classesv) + 2) self.tablemodel.setColumnCount(len(self.classesh) + 2) else: if len(' '.join(self.classesh + [self.corner_string])) < 120: self.horizontalHeader().setSectionResizeMode( QHeaderView.ResizeToContents) else: self.horizontalHeader().setDefaultSectionSize(60) self.tablemodel.setRowCount(len(self.classesv) + 3) self.tablemodel.setColumnCount(len(self.classesh) + 3) def initialize(self, circles=False, bold_headers=True): """ Initializes table structure. Class headers must be set beforehand. Parameters ---------- circles : :obj:`bool`, optional Turns on circle display. All table values should be between 0 and 1 (inclusive). Defaults to False. bold_headers : :obj:`bool`, optional Whether the headers are bold or not. Defaults to True. """ assert self.classesv is not None and self.classesh is not None self.circles = circles self.bold_headers = bold_headers self._style_cells() self._initialize_headers() self._resize() def get_selection(self): """ Get indexes of selected cells. Returns ------- :obj:`set` of :obj:`tuple` of :obj:`int` Set of pairs of indexes. """ return {(ind.row() - 2, ind.column() - 2) for ind in self.selectedIndexes()} def set_selection(self, indexes): """ Set indexes of selected cells. Parameters ---------- indexes : :obj:`set` of :obj:`tuple` of :obj:`int` Set of pairs of indexes. """ selection = QItemSelection() index = self.model().index for row, col in indexes: sel = index(row + 2, col + 2) selection.select(sel, sel) self.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def _set_sums(self, colsum, rowsum): """ Set content of cells on bottom and right edge. Parameters ---------- colsum : numpy.array Content of cells on bottom edge. rowsum : numpy.array Content of cells on right edge. """ bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(len(self.classesh)): self._set_item( len(self.classesv) + 2, i + 2, _sum_item(int(colsum[i]), "t")) for i in range(len(self.classesv)): self._set_item(i + 2, len(self.classesh) + 2, _sum_item(int(rowsum[i]), "l")) self._set_item( len(self.classesv) + 2, len(self.classesh) + 2, _sum_item(int(rowsum.sum()))) def _set_values(self, matrix, colors, formatstr, tooltip): """ Set content of cells which aren't headers and don't represent aggregate values. Parameters ---------- matrix : numpy.array 2D array to be set as data. colors : :obj:`numpy.array` 2D array with color values. formatstr : :obj:`str`, optional Format string for cell data. tooltip : :obj:`(int, int) -> str` Function which takes vertical index and horizontal index as arguments and returns desired tooltip as a string. """ def _isinvalid(x): return isnan(x) or isinf(x) for i in range(len(self.classesv)): for j in range(len(self.classesh)): val = matrix[i, j] col_val = float('nan') if colors is None else colors[i, j] item = QStandardItem() if self.circles: item.setData(val, CircleAreaRole) else: item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) item.setData("trbl", BorderRole) if tooltip is not None: item.setToolTip(tooltip(i, j)) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) def update_table(self, matrix, colsum=None, rowsum=None, colors=None, formatstr="{}", tooltip=None): """ Sets ``matrix`` as data of the table. Parameters ---------- matrix : numpy.array 2D array to be set as data. colsum : :obj:`numpy.array`, optional 1D optional array with aggregate values of columns, defaults to sum. rowsum : :obj:`numpy.array`, optional 1D optional array with aggregate values of rows, defaults to sum. colors : :obj:`numpy.array`, optional 2D array with color values, defaults to no color. formatstr : :obj:`str`, optional Format string for cell data, defaults to ``"{}"``. tooltip : :obj:`(int, int) -> str`, optional Function which takes vertical index and horizontal index as arguments and returns desired tooltip as a string. Defaults to no tooltips. """ selected_indexes = self.get_selection() self._set_values(matrix, colors, formatstr, tooltip) if not self.circles: if colsum is None: colsum = matrix.sum(axis=0) if rowsum is None: rowsum = matrix.sum(axis=1) self._set_sums(colsum, rowsum) self.set_selection(selected_indexes) def clear(self): """ Clears the table. """ self.tablemodel.clear()
class ScoreTable(OWComponent, QObject): shown_scores = \ Setting(set(chain(*BUILTIN_SCORERS_ORDER.values()))) shownScoresChanged = Signal() class ItemDelegate(QStyledItemDelegate): def sizeHint(self, *args): size = super().sizeHint(*args) return QSize(size.width(), size.height() + 6) def displayText(self, value, locale): if isinstance(value, float): return f"{value:.3f}" else: return super().displayText(value, locale) def __init__(self, master): QObject.__init__(self) OWComponent.__init__(self, master) self.view = gui.TableView(wordWrap=True, editTriggers=gui.TableView.NoEditTriggers) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) header.setContextMenuPolicy(Qt.CustomContextMenu) header.customContextMenuRequested.connect(self.show_column_chooser) self.model = QStandardItemModel(master) self.model.setHorizontalHeaderLabels(["Method"]) self.sorted_model = ScoreModel() self.sorted_model.setSourceModel(self.model) self.view.setModel(self.sorted_model) self.view.setItemDelegate(self.ItemDelegate()) def _column_names(self): return (self.model.horizontalHeaderItem(section).data(Qt.DisplayRole) for section in range(1, self.model.columnCount())) def show_column_chooser(self, pos): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test def update(col_name, checked): if checked: self.shown_scores.add(col_name) else: self.shown_scores.remove(col_name) self._update_shown_columns() menu = QMenu() header = self.view.horizontalHeader() for col_name in self._column_names(): action = menu.addAction(col_name) action.setCheckable(True) action.setChecked(col_name in self.shown_scores) action.triggered.connect(partial(update, col_name)) menu.exec(header.mapToGlobal(pos)) def _update_shown_columns(self): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test header = self.view.horizontalHeader() for section, col_name in enumerate(self._column_names(), start=1): header.setSectionHidden(section, col_name not in self.shown_scores) self.view.resizeColumnsToContents() self.shownScoresChanged.emit() def update_header(self, scorers): # Set the correct horizontal header labels on the results_model. self.model.setColumnCount(3 + len(scorers)) self.model.setHorizontalHeaderItem(0, QStandardItem("Model")) self.model.setHorizontalHeaderItem(1, QStandardItem("Train time [s]")) self.model.setHorizontalHeaderItem(2, QStandardItem("Test time [s]")) for col, score in enumerate(scorers, start=3): item = QStandardItem(score.name) item.setToolTip(score.long_name) self.model.setHorizontalHeaderItem(col, item) self._update_shown_columns()
class PivotTableView(QTableView): selection_changed = pyqtSignal() TOTAL_STRING = "Total" def __init__(self): super().__init__(editTriggers=QTableView.NoEditTriggers) self._n_classesv = None # number of row_feature values self._n_classesh = None # number of col_feature values self._n_agg_func = None # number of aggregation functions self._n_leading_rows = None # number of leading rows self._n_leading_cols = None # number of leading columns self.table_model = QStandardItemModel(self) self.setModel(self.table_model) self.horizontalHeader().hide() self.verticalHeader().hide() self.horizontalHeader().setMinimumSectionSize(60) self.setShowGrid(False) self.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) self.setItemDelegate(BorderedItemDelegate()) self.pressed.connect(self.__cell_clicked) self.clicked.connect(self.__cell_clicked) self.entered.connect(self.__cell_entered) self.__clicked_cell = None @property def add_agg_column(self) -> bool: return self._n_agg_func > 1 def __cell_entered(self, model_index): if self.__clicked_cell is None: return index = self.table_model.index selection = None i_end, j_end = model_index.row(), model_index.column() i_start, j_start = self.__clicked_cell i_start, i_end = sorted([i_start, i_end]) j_start, j_end = sorted([j_start, j_end]) if i_start >= self._n_leading_rows and j_start >= self._n_leading_cols: i_start = (i_start - self._n_leading_rows) // self._n_agg_func * \ self._n_agg_func + self._n_leading_rows i_end = (i_end - self._n_leading_rows) // self._n_agg_func * \ self._n_agg_func + self._n_leading_rows + self._n_agg_func - 1 start, end = index(i_start, j_start), index(i_end, j_end) selection = QItemSelection(start, end) if selection is not None: self.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) self.selection_changed.emit() def __cell_clicked(self, model_index): i, j = model_index.row(), model_index.column() self.__clicked_cell = (i, j) m, n = self.table_model.rowCount(), self.table_model.columnCount() index = self.table_model.index selection = None if i > m - self._n_agg_func - 1 and j == n - 1: start_index = index(self._n_leading_rows, self._n_leading_cols) selection = QItemSelection(start_index, index(m - 1, n - 1)) elif i == self._n_leading_rows - 1 or i > m - self._n_agg_func - 1: start_index = index(self._n_leading_rows, j) selection = QItemSelection(start_index, index(m - 1, j)) elif j in (self._n_leading_cols - 1, n - 1, 1): i_start = (i - self._n_leading_rows) // self._n_agg_func * \ self._n_agg_func + self._n_leading_rows i_end = i_start + self._n_agg_func - 1 start_index = index(i_start, self._n_leading_cols) selection = QItemSelection(start_index, index(i_end, n - 1)) elif i >= self._n_leading_rows and j >= self._n_leading_cols: i_start = (i - self._n_leading_rows) // self._n_agg_func * \ self._n_agg_func + self._n_leading_rows i_end = i_start + self._n_agg_func - 1 selection = QItemSelection(index(i_start, j), index(i_end, j)) if selection is not None: self.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def mouseReleaseEvent(self, e): super().mouseReleaseEvent(e) self.selection_changed.emit() def update_table(self, titleh: str, titlev: str, table: Table, table_total_h: Table, table_total_v: Table, table_total: Table): self.clear() if not table: return self._initialize(table, table_total_h) self._set_headers(titleh, titlev, table) self._set_values(table[:, 2:]) self._set_totals(table_total_h[:, 2:], table_total_v, table_total) self._draw_lines() self._resize(table) def _initialize(self, table, table_total_h): self._n_classesv = int(len(table) / len(table_total_h)) self._n_classesh = table.X.shape[1] - 2 self._n_agg_func = len(table_total_h) self._n_leading_rows = 2 self._n_leading_cols = 2 + int(len(table_total_h) > 1) def _set_headers(self, titleh, titlev, table): self.__set_horizontal_title(titleh) self.__set_vertical_title(titlev) self.__set_flags_title() self.__set_horizontal_headers(table) self.__set_vertical_headers(table) def __set_horizontal_title(self, titleh): item = QStandardItem() item.setData(titleh, Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) self.table_model.setItem(0, self._n_leading_cols, item) self.setSpan(0, self._n_leading_cols, 1, self._n_classesh + 3) def __set_vertical_title(self, titlev): item = QStandardItem() item.setData(titlev, Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) self.setItemDelegateForColumn(0, gui.VerticalItemDelegate(extend=True)) self.table_model.setItem(self._n_leading_rows, 0, item) row_span = self._n_classesv * self._n_agg_func + 1 self.setSpan(self._n_leading_rows, 0, row_span, 1) def __set_flags_title(self): item = self.table_model.item(0, self._n_leading_cols) item.setFlags(Qt.NoItemFlags) item = self.table_model.item(self._n_leading_rows, 0) item.setFlags(Qt.NoItemFlags) for i, j in product(range(self._n_leading_rows), range(self._n_leading_cols)): item = QStandardItem() item.setFlags(Qt.NoItemFlags) self.table_model.setItem(i, j, item) def __set_horizontal_headers(self, table): labels = [a.name for a in table.domain[1:]] + [self.TOTAL_STRING] if not self.add_agg_column: labels[0] = str(table[0, 1]) for i, label in enumerate(labels, self._n_leading_cols - 1): self.table_model.setItem(1, i, self._create_header_item(label)) def __set_vertical_headers(self, table): labels = [(str(row[0]), str(row[1])) for row in table] i = self._n_leading_rows - 1 for i, (l1, l2) in enumerate(labels, self._n_leading_rows): l1 = "" if (i - self._n_leading_rows) % self._n_agg_func else l1 self.table_model.setItem(i, 1, self._create_header_item(l1)) if self.add_agg_column: self.table_model.setItem(i, 2, self._create_header_item(l2)) if self.add_agg_column: labels = [str(row[1]) for row in table[:self._n_agg_func]] start = self._n_leading_rows + self._n_agg_func * self._n_classesv for j, l2 in enumerate(labels, i + 1): l1 = self.TOTAL_STRING if j == start else "" self.table_model.setItem(j, 1, self._create_header_item(l1)) self.table_model.setItem(j, 2, self._create_header_item(l2)) else: item = self._create_header_item(self.TOTAL_STRING) self.table_model.setItem(i + 1, 1, item) def _set_values(self, table): for i, j in product(range(len(table)), range(len(table[0]))): value = table[i, j] item = self._create_value_item(str(value)) self.table_model.setItem(i + self._n_leading_rows, j + self._n_leading_cols, item) def _set_totals(self, table_total_h, table_total_v, table_total): def set_total_item(table, get_row, get_col): for i, j in product(range(len(table)), range(len(table[0]))): item = self._create_header_item(str(table[i, j])) self.table_model.setItem(get_row(i), get_col(j), item) last_row = self._n_leading_rows + self._n_classesv * self._n_agg_func last_col = self._n_leading_cols + self._n_classesh set_total_item(table_total_v, lambda x: x + self._n_leading_rows, lambda x: last_col) set_total_item(table_total_h, lambda x: x + last_row, lambda x: x + self._n_leading_cols) set_total_item(table_total, lambda x: x + last_row, lambda x: last_col) def _create_header_item(self, text): bold_font = self.table_model.invisibleRootItem().font() bold_font.setBold(True) item = QStandardItem() item.setData(text, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) return item @staticmethod def _create_value_item(text): item = QStandardItem() item.setData(text, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) return item def _draw_lines(self): end_col = self._n_leading_cols + self._n_classesh + 1 total_row = self._n_leading_rows + self._n_classesv * self._n_agg_func indices = [(total_row, j) for j in range(1, end_col)] for i in range(self._n_classesv): inner_row = self._n_agg_func * i + self._n_leading_rows inner_indices = [(inner_row, j) for j in range(1, end_col)] indices = indices + inner_indices if not self.add_agg_column: break for i, j in indices: item = self.table_model.item(i, j) item.setData("t", BorderRole) item.setData(QColor(160, 160, 160), BorderColorRole) def _resize(self, table): labels = [a.name for a in table.domain[1:]] + [self.TOTAL_STRING] if len(' '.join(labels)) < 120: self.horizontalHeader().setSectionResizeMode( QHeaderView.ResizeToContents) else: self.horizontalHeader().setDefaultSectionSize(60) def get_selection(self) -> Set: m, n = self._n_leading_rows, self._n_leading_cols return {(ind.row() - m, ind.column() - n) for ind in self.selectedIndexes()} def set_selection(self, indexes: Set): selection = QItemSelection() index = self.model().index for row, col in indexes: sel = index(row + self._n_leading_rows, col + self._n_leading_cols) selection.select(sel, sel) self.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) def clear(self): self.table_model.clear()
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Table), ("Evaluation Results", Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): train_data_empty = Msg("Train data set is empty.") test_data_empty = Msg("Test data set is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg("Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different target variables.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox( ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox( ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox( ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView( wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data and not data.domain.class_vars: self.Error.class_required() data = None elif data and len(data.domain.class_vars) > 1: self.Error.too_many_classes() data = None else: self.Error.class_required.clear() self.Error.too_many_classes.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not len(data): self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return {(True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test "}[(self.train_data_missing_vals, self.test_data_missing_vals)] def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() return elif self.test_data.domain.class_var != class_var: self.Error.class_inconsistent() return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.Warning.test_data_unused() rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict( store_data=True, preprocessor=self.preprocessor, callback=update_progress, n_jobs=-1, ) self.setStatusMessage("Running") with self.progressBar(): try: folds = self.NFolds[self.n_folds] if self.resampling == OWTestLearners.KFold: if len(self.data) < folds: self.Error.too_many_folds() return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except (RuntimeError, ValueError) as e: self.error(str(e)) self.setStatusMessage("") return else: self.error() learner_key = {slot.learner: key for key, slot in self.learners.items()} for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}" .format(name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest( slot.results.value, target_index) stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.commit() def commit(self): """Recompute and output the results""" self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [learner_name(slot.learner) for slot in valid] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation". format(stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [("Sampling type", "{}Shuffle split, {} random samples with {}% data " .format(stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size]))] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view)
def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels([ "Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment" ]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item( ), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set( gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max( (e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score))) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView)) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("")
def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels( ["Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment"]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item(), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit ) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView) ) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("")
def find_rules(self): if self.data is None or not len(self.data): return if self._is_running: self._is_running = False return self.button.button.setText('Cancel') self._is_running = True data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.Error.need_discrete_data.clear() if X is None: self.Error.need_discrete_data() self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format( var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping) } # Items that consequent must include if classifying class_items = { item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var } if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([ ("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)" ), ("Antecedent", None), ("", None), ("Consequent", None) ]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} ARROW_ITEM = StandardItem('→') ARROW_ITEM.setTextAlignment(Qt.AlignCenter) with self.progressBar(self.maxRules + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in association_rules(itemsets, self.minConfidence / 100, itemset): left, right, support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ', '.join(names[i] for i in sorted(left)) right_str = ', '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule, ), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData( (itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([ support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, ARROW_ITEM.clone(), StandardItem(right_str, len(right)) ]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if not self._is_running or nRules >= self.maxRules: break qApp.processEvents() if not self._is_running or nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) self.button.button.setText('Find Rules') self.nRules = nRules self.nFilteredRules = proxy_model.rowCount( ) # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0 self._is_running = False
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Table), ("Evaluation Results", Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg("Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different target variables.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox( ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox( ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox( ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView( wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.Information.data_sampled.clear() if data and not data.domain.class_vars: self.Error.class_required() data = None elif data and len(data.domain.class_vars) > 1: self.Error.too_many_classes() data = None else: self.Error.class_required.clear() self.Error.too_many_classes.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.Information.test_data_sampled.clear() if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return {(True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test "}[(self.train_data_missing_vals, self.test_data_missing_vals)] def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.Warning.test_data_missing() return elif self.test_data.domain.class_var != class_var: self.Error.class_inconsistent() return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.Warning.test_data_unused() rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict( store_data=True, preprocessor=self.preprocessor, callback=update_progress, n_jobs=-1, ) self.setStatusMessage("Running") with self.progressBar(): try: folds = self.NFolds[self.n_folds] if self.resampling == OWTestLearners.KFold: if len(self.data) < folds: self.Error.too_many_folds() return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except (RuntimeError, ValueError) as e: self.error(str(e)) self.setStatusMessage("") return else: self.error() learner_key = {slot.learner: key for key, slot in self.learners.items()} for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}" .format(name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest( slot.results.value, target_index) stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.commit() def commit(self): """Recompute and output the results""" self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [learner_name(slot.learner) for slot in valid] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation". format(stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [("Sampling type", "{}Shuffle split, {} random samples with {}% data " .format(stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size]))] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view)
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 class Inputs: train_data = Input("Data", Table, default=True) test_data = Input("Test Data", Table) learner = Input("Learner", Learner, multiple=True) preprocessor = Input("Preprocessor", Preprocess) class Outputs: predictions = Output("Predictions", Table) evaluations_results = Output("Evaluation Results", Results) settings_version = 3 settingsHandler = settings.PerfectDomainContextHandler(metas_in_res=True) #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) shown_scores = \ settings.Setting({"AUC", "CA", "F1", "Precision", "Recall", "MSE", "RMSE", "MAE", "R2"}) class Error(OWWidget.Error): train_data_empty = Msg("Train data set is empty.") test_data_empty = Msg("Test data set is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different target variables.") memory_error = Msg("Not enough memory.") only_one_class_var_value = Msg("Target variable has only one value.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() # type: Dict[Any, Input] self.__state = State.Waiting # Do we need to [re]test any learners, set by _invalidate and # cleared by __update self.__needupdate = False self.__task = None # type: Optional[Task] self.__executor = ThreadExecutor() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView(wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) header.setContextMenuPolicy(Qt.CustomContextMenu) header.customContextMenuRequested.connect(self.show_column_chooser) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestLearners.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestLearners.FeatureFold and not enabled: self.resampling = OWTestLearners.KFold @Inputs.learner def set_learner(self, learner, key): """ Set the input `learner` for `key`. Parameters ---------- learner : Optional[Orange.base.Learner] key : Any """ if key in self.learners and learner is None: # Removed self._invalidate([key]) del self.learners[key] else: self.learners[key] = InputLearner(learner, None, None) self._invalidate([key]) @Inputs.train_data def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() self.Error.class_required.clear() self.Error.too_many_classes.clear() self.Error.only_one_class_var_value.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data: conds = [ not data.domain.class_vars, len(data.domain.class_vars) > 1, data.domain.has_discrete_class and len(data.domain.class_var.values) == 1 ] errors = [ self.Error.class_required, self.Error.too_many_classes, self.Error.only_one_class_var_value ] for cond, error in zip(conds, errors): if cond: error() data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestLearners.FeatureFold self._invalidate() @Inputs.test_data def set_test_data(self, data): # type: (Orange.data.Table) -> None """ Set the input separate testing dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not len(data): self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required_test() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses()(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] @Inputs.preprocessor def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self._update_header() self._update_stats_model() if self.__needupdate: self.__update() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestLearners.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() self.__update() def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) self._update_shown_columns() def _update_shown_columns(self): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test model = self.result_model header = self.view.horizontalHeader() for section in range(1, model.columnCount()): col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole) header.setSectionHidden(section, col_name not in self.shown_scores) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) stats = [ Try(lambda: score(ovr_results, target=1)) for score in classification_stats.scores ] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): self.fold_feature_selected = \ self.resampling == OWTestLearners.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.__needupdate = True def show_column_chooser(self, pos): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test def update(col_name, checked): if checked: self.shown_scores.add(col_name) else: self.shown_scores.remove(col_name) self._update_shown_columns() menu = QMenu() model = self.result_model header = self.view.horizontalHeader() for section in range(1, model.columnCount()): col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole) action = menu.addAction(col_name) action.setCheckable(True) action.setChecked(col_name in self.shown_scores) action.triggered.connect(partial(update, col_name)) menu.exec(header.mapToGlobal(pos)) def commit(self): """ Commit the results to output. """ self.Error.memory_error.clear() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] combined = None predictions = None if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities try: predictions = combined.get_augmented_data( combined.learner_names) except MemoryError: self.Error.memory_error() self.Outputs.evaluations_results.send(combined) self.Outputs.predictions.send(predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if settings_["resampling"] > 0: settings_["resampling"] += 1 if version < 3: # Older version used an incompatible context handler settings_["context_settings"] = [ c for c in settings_.get("context_settings", ()) if not hasattr(c, 'classes') ] @Slot(float) def setProgressValue(self, value): self.progressBarSet(value, processEvents=False) def __update(self): self.__needupdate = False assert self.__task is None or self.__state == State.Running if self.__state == State.Running: self.cancel() self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() # check preconditions and return early if self.data is None: self.__state = State.Waiting self.commit() return if not self.learners: self.__state = State.Waiting self.commit() return if self.resampling == OWTestLearners.KFold and \ len(self.data) < self.NFolds[self.n_folds]: self.Error.too_many_folds() self.__state = State.Waiting self.commit() return elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() self.__state = State.Waiting self.commit() return elif self.test_data.domain.class_var != self.data.domain.class_var: self.Error.class_inconsistent() self.__state = State.Waiting self.commit() return elif self.test_data is not None: self.Warning.test_data_unused() rstate = 42 common_args = dict( store_data=True, preprocessor=self.preprocessor, ) # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] # deepcopy all learners as they are not thread safe (by virtue of # the base API). These will be the effective learner objects tested # but will be replaced with the originals on return (see restore # learners bellow) learners_c = [copy.deepcopy(learner) for learner in learners] if self.resampling == OWTestLearners.KFold: folds = self.NFolds[self.n_folds] test_f = partial(Orange.evaluation.CrossValidation, self.data, learners_c, k=folds, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.FeatureFold: test_f = partial(Orange.evaluation.CrossValidationFeature, self.data, learners_c, self.fold_feature, **common_args) elif self.resampling == OWTestLearners.LeaveOneOut: test_f = partial(Orange.evaluation.LeaveOneOut, self.data, learners_c, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 test_f = partial(Orange.evaluation.ShuffleSplit, self.data, learners_c, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: test_f = partial(Orange.evaluation.TestOnTrainingData, self.data, learners_c, **common_args) elif self.resampling == OWTestLearners.TestOnTest: test_f = partial(Orange.evaluation.TestOnTestData, self.data, self.test_data, learners_c, **common_args) else: assert False, "self.resampling %s" % self.resampling def replace_learners(evalfunc, *args, **kwargs): res = evalfunc(*args, **kwargs) assert all(lc is lo for lc, lo in zip(learners_c, res.learners)) res.learners[:] = learners return res test_f = partial(replace_learners, test_f) self.__submit(test_f) def __submit(self, testfunc): # type: (Callable[[Callable[float]], Results]) -> None """ Submit a testing function for evaluation MUST not be called if an evaluation is already pending/running. Cancel the existing task first. Parameters ---------- testfunc : Callable[[Callable[float]], Results]) Must be a callable taking a single `callback` argument and returning a Results instance """ assert self.__state != State.Running # Setup the task task = Task() def progress_callback(finished): if task.cancelled: raise UserInterrupt() QMetaObject.invokeMethod(self, "setProgressValue", Qt.QueuedConnection, Q_ARG(float, 100 * finished)) def ondone(_): QMetaObject.invokeMethod(self, "__task_complete", Qt.QueuedConnection, Q_ARG(object, task)) testfunc = partial(testfunc, callback=progress_callback) task.future = self.__executor.submit(testfunc) task.future.add_done_callback(ondone) self.progressBarInit(processEvents=None) self.setBlocking(True) self.setStatusMessage("Running") self.__state = State.Running self.__task = task @Slot(object) def __task_complete(self, task): # handle a completed task assert self.thread() is QThread.currentThread() if self.__task is not task: assert task.cancelled log.debug("Reaping cancelled task: %r", "<>") return self.setBlocking(False) self.progressBarFinished(processEvents=None) self.setStatusMessage("") result = task.future assert result.done() self.__task = None try: results = result.result() # type: Results learners = results.learners # type: List[Learner] except Exception as er: log.exception("testing error (in __task_complete):", exc_info=True) self.error("\n".join(traceback.format_exception_only(type(er), er))) self.__state = State.Done return self.__state = State.Done learner_key = { slot.learner: key for key, slot in self.learners.items() } assert all(learner in learner_key for learner in learners) # Update the results for individual learners class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): scorers = classification_stats.scores if class_var.is_discrete \ else regression_stats.scores ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self._update_header() self._update_stats_model() self.commit() def cancel(self): """ Cancel the current/pending evaluation (if any). """ if self.__task is not None: assert self.__state == State.Running self.__state = State.Cancelled task, self.__task = self.__task, None task.cancel() assert task.future.done() def onDeleteWidget(self): self.cancel() super().onDeleteWidget()