Пример #1
0
    def test_methodinvoke(self):
        executor = ThreadExecutor()
        state = [None, None]

        class StateSetter(QObject):
            @pyqtSlot(object)
            def set_state(self, value):
                state[0] = value
                state[1] = QThread.currentThread()

        def func(callback):
            callback(QThread.currentThread())

        obj = StateSetter()
        f1 = executor.submit(func, methodinvoke(obj, "set_state", (object,)))
        f1.result()
        # So invoked method can be called from the event loop
        self.app.processEvents()

        self.assertIs(state[1], QThread.currentThread(),
                      "set_state was called from the wrong thread")

        self.assertIsNot(state[0], QThread.currentThread(),
                         "set_state was invoked in the main thread")

        executor.shutdown(wait=True)
Пример #2
0
    def test_executor(self):
        executor = ThreadExecutor()
        f1 = executor.submit(pow, 100, 100)

        f2 = executor.submit(lambda: 1 / 0)

        f3 = executor.submit(QThread.currentThread)

        self.assertTrue(f1.result(), pow(100, 100))

        with self.assertRaises(ZeroDivisionError):
            f2.result()

        self.assertIsInstance(f2.exception(), ZeroDivisionError)

        self.assertIsNot(f3.result(), QThread.currentThread())
Пример #3
0
    def setup_layout(self):
        super().setup_layout()

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # just a test cancel button
        gui.button(self.apply_button, self, "Cancel", callback=self.cancel)
Пример #4
0
    def __init__(self, parent=None):
        super().__init__(parent)

        self.taxids = taxonomy.common_taxids()
        self.current_taxid_index = self.taxids.index(self.taxid)

        self.data = None
        self.geneinfo = None
        self.nettask = None
        self._invalidated = False

        box = gui.widgetBox(self.controlArea, "Info")
        self.info = gui.widgetLabel(box, "No data on input\n")

        box = gui.widgetBox(self.controlArea, "Organism")
        self.organism_cb = gui.comboBox(
            box, self, "current_taxid_index",
            items=map(taxonomy.name, self.taxids),
            callback=self._update_organism
        )
        box = gui.widgetBox(self.controlArea, "Genes")
        self.genes_cb = gui.comboBox(
            box, self, "gene_var_index", callback=self._update_query_genes
        )
        self.varmodel = itemmodels.VariableListModel()
        self.genes_cb.setModel(self.varmodel)

        gui.checkBox(
            box, self, "use_attr_names",
            "Use attribute names",
            callback=self._update_query_genes
        )

        box = gui.widgetBox(self.controlArea, "Network")
        gui.comboBox(
            box, self, "network_source",
            items=[s.name for s in SOURCES],
            callback=self._on_source_db_changed
        )
        gui.checkBox(
            box, self, "include_neighborhood",
            "Include immediate gene neighbors",
            callback=self.invalidate
        )
        self.score_spin = gui.doubleSpin(
            box, self, "min_score", 0.0, 1.0, step=0.001,
            label="Minimal edge score",
            callback=self.invalidate
        )
        self.score_spin.setEnabled(SOURCES[self.network_source].score_filter)

        box = gui.widgetBox(self.controlArea, "Commit")
        gui.button(box, self, "Retrieve", callback=self.commit, default=True)

        self.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)
        self.layout().setSizeConstraint(QtGui.QLayout.SetFixedSize)

        self.executor = ThreadExecutor()
Пример #5
0
    def test_executor(self):
        executor = ThreadExecutor()

        f = executor.submit(QThread.currentThread)

        self.assertIsNot(f.result(3), QThread.currentThread())

        f = executor.submit(lambda: 1 / 0)

        with self.assertRaises(ZeroDivisionError):
            f.result()

        results = []
        task = Task(function=QThread.currentThread)
        task.resultReady.connect(results.append, Qt.DirectConnection)

        f = executor.submit_task(task)

        self.assertIsNot(f.result(3), QThread.currentThread())

        executor.shutdown()
Пример #6
0
class OWSignificantGroups(widget.OWWidget):
    name = 'Significant Groups'
    description = "Test whether instances grouped by nominal values are " \
                  "significantly different from random samples or the "\
                  "dataset in whole."
    icon = 'icons/SignificantGroups.svg'
    priority = 200

    class Inputs(widget.OWWidget.Inputs):
        data = widget.Input('Data', Table)

    class Outputs(widget.OWWidget.Outputs):
        selected_data = widget.Output('Selected Data', Table, default=True)
        data = widget.Output('Data', Table)
        results = widget.Output('Test Results', Table)

    want_main_area = True
    want_control_area = True

    class Information(widget.OWWidget.Information):
        nothing_significant = widget.Msg(
            'Chosen parameters reveal no significant groups')

    class Error(widget.OWWidget.Error):
        no_vars_selected = widget.Msg('No independent variables selected')
        no_class_selected = widget.Msg('No dependent variable selected')

    TEST_STATISTICS = OrderedDict((
        ('mean', np.nanmean),
        ('variance', np.nanvar),
        ('median', np.nanmedian),
        ('minimum', np.nanmin),
        ('maximum', np.nanmax),
    ))

    settingsHandler = settings.DomainContextHandler()

    chosen_X = settings.ContextSetting([])
    chosen_y = settings.ContextSetting(0)
    is_permutation = settings.Setting(False)
    test_statistic = settings.Setting(next(iter(TEST_STATISTICS)))
    min_count = settings.Setting(20)

    def __init__(self):
        self._task = None  # type: Optional[self.Task]
        self._executor = ThreadExecutor(self)

        self.data = None
        self.test_type = ''

        self.discrete_model = DomainModel(separators=False,
                                          valid_types=(DiscreteVariable, ),
                                          parent=self)
        self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE,
                                        parent=self)

        box = gui.vBox(self.controlArea, 'Hypotheses Testing')
        gui.listView(
            box,
            self,
            'chosen_X',
            model=self.discrete_model,
            box='Grouping Variables',
            selectionMode=QListView.ExtendedSelection,
            callback=self.Error.no_vars_selected.clear,
            toolTip='Select multiple variables with Ctrl+ or Shift+Click.')
        target = gui.comboBox(
            box,
            self,
            'chosen_y',
            sendSelectedValue=True,
            label='Test Variable',
            callback=[self.set_test_type, self.Error.no_class_selected.clear])
        target.setModel(self.domain_model)

        gui.checkBox(box,
                     self,
                     'is_permutation',
                     label='Permutation test',
                     callback=self.set_test_type)
        gui.comboBox(box,
                     self,
                     'test_statistic',
                     label='Statistic:',
                     items=tuple(self.TEST_STATISTICS),
                     orientation=Qt.Horizontal,
                     sendSelectedValue=True,
                     callback=self.set_test_type)
        gui.label(box, self, 'Test: %(test_type)s')

        box = gui.vBox(self.controlArea, 'Filter')
        gui.spin(box,
                 self,
                 'min_count',
                 5,
                 1000,
                 5,
                 label='Minimum group size (count):')

        self.btn_compute = gui.button(self.controlArea,
                                      self,
                                      '&Compute',
                                      callback=self.compute)
        gui.rubber(self.controlArea)

        class Model(PyTableModel):
            _n_vars = 0
            _BACKGROUND = [QBrush(QColor('#eee')), QBrush(QColor('#ddd'))]

            def setHorizontalHeaderLabels(self, labels, n_vars):
                self._n_vars = n_vars
                super().setHorizontalHeaderLabels(labels)

            def data(self, index, role=Qt.DisplayRole):
                if role == Qt.BackgroundRole and index.column() < self._n_vars:
                    return self._BACKGROUND[index.row() % 2]
                if role == Qt.DisplayRole or role == Qt.ToolTipRole:
                    colname = self.headerData(index.column(), Qt.Horizontal)
                    if colname.lower() in ('count', 'count | class'):
                        row = self.mapToSourceRows(index.row())
                        return int(self[row][index.column()])
                return super().data(index, role)

        owwidget = self

        class View(gui.TableView):
            _vars = None

            def set_vars(self, vars):
                self._vars = vars

            def selectionChanged(self, *args):
                super().selectionChanged(*args)

                rows = list({
                    index.row()
                    for index in self.selectionModel().selectedRows(0)
                })

                if not rows:
                    owwidget.Outputs.data.send(None)
                    return

                model = self.model().tolist()
                filters = [
                    Values([
                        FilterDiscrete(self._vars[col], {model[row][col]})
                        for col in range(len(self._vars))
                    ]) for row in self.model().mapToSourceRows(rows)
                ]
                data = Values(filters, conjunction=False)(owwidget.data)

                annotated = create_annotated_table(owwidget.data, data.ids)

                owwidget.Outputs.selected_data.send(data)
                owwidget.Outputs.data.send(annotated)

        self.view = view = View(self)
        self.model = Model(parent=self)
        view.setModel(self.model)
        view.horizontalHeader().setStretchLastSection(False)
        self.mainArea.layout().addWidget(view)

        self.set_test_type()

    @Inputs.data
    def set_data(self, data):
        self.data = data
        domain = None if data is None else data.domain

        self.closeContext()

        self.domain_model.set_domain(domain)
        self.discrete_model.set_domain(domain)
        if domain is not None:
            if domain.class_var:
                self.chosen_y = domain.class_var.name

        self.openContext(domain)

        self.set_test_type()

    def set_test_type(self):
        if self.data is None:
            return

        yvar = self.data.domain[self.chosen_y]

        self.controls.test_statistic.setEnabled(yvar.is_continuous)

        if self.is_permutation:
            test = 'Permutation '
            if yvar.is_discrete:
                test += 'χ² '
            else:
                test += str(self.test_statistic) + ' '
        else:
            test = ''
            if yvar.is_discrete:
                test += 'χ² ' if len(yvar.values) > 2 else 'Hypergeometric '
            else:
                if self.test_statistic == 'mean':
                    test += "Student's t-"
                elif self.test_statistic == 'variance':
                    test += "Fligner–Killeen "
                elif self.test_statistic == 'median':
                    test += "Mann–Whitney U "
                elif self.test_statistic in ('minimum', 'maximum'):
                    test += "Gumbel distribution "
                else:
                    assert False, self.test_statistic
        test += 'test'
        self.test_type = test

    def compute(self):
        if not self.chosen_X:
            self.Error.no_vars_selected()
            return

        if not self.chosen_y:
            self.Error.no_class_selected()
            return

        self.btn_compute.setEnabled(False)
        yvar = self.data.domain[self.chosen_y]

        def get_col(var, col):
            values = np.array(list(var.values) + [np.nan], dtype=object)
            pd.Categorical(col, list(var.values))
            col = pd.Series(col).fillna(-1).astype(int)
            return values[col]

        X = np.column_stack([
            get_col(var,
                    self.data.get_column_view(var)[0])
            for var in (self.data.domain[i] for i in self.chosen_X)
        ])
        X = pd.DataFrame(X, columns=self.chosen_X)
        y = pd.Series(self.data.get_column_view(yvar)[0])

        test, args, kwargs = None, (X, y), dict(min_count=self.min_count)
        if self.is_permutation:
            statistic = 'chi2' if yvar.is_discrete else self.TEST_STATISTICS[
                self.test_statistic]
            test = perm_test
            kwargs.update(statistic=statistic,
                          n_jobs=-2,
                          callback=methodinvoke(self, "setProgressValue",
                                                (int, int)))
        else:
            if yvar.is_discrete:
                if len(yvar.values) > 2:
                    test = chi2_test
                else:
                    test = hyper_test
                    args = (X, y.astype(bool))
            else:
                test = {
                    'mean': t_test,
                    'variance': fligner_killeen_test,
                    'median': mannwhitneyu_test,
                    'minimum': gumbel_min_test,
                    'maximum': gumbel_max_test,
                }[self.test_statistic]

        self._task = task = self.Task()
        self.progressBarInit()
        task.future = self._executor.submit(test, *args, **kwargs)
        task.watcher = FutureWatcher(task.future)
        task.watcher.done.connect(self.on_computed)

    @Slot(int, int)
    def setProgressValue(self, n, N):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(n / (N + 1) * 100)

    class Task:
        future = ...  # type: concurrent.futures.Future
        watcher = ...  # type: FutureWatcher
        cancelled = False  # type: bool

        def cancel(self):
            self.cancelled = True
            # Cancel the future. Note this succeeds only if the execution has
            # not yet started (see `concurrent.futures.Future.cancel`) ..
            self.future.cancel()
            # ... and wait until computation finishes
            concurrent.futures.wait([self.future])

    @Slot(concurrent.futures.Future)
    def on_computed(self, future):
        assert self.thread() is QThread.currentThread()
        assert future.done()

        self._task = None
        self.progressBarFinished()

        df = future.result()
        # Only retain "significant" p-values
        df = df[df[CORRECTED_LABEL] < .2]

        columns = [var.name for var in df.index.name] + list(df.columns)
        lst = [list(i) + list(j) for i, j in zip(df.index, df.values)]

        results_table = table_from_frame(pd.DataFrame(lst, columns=columns),
                                         force_nominal=True)
        results_table.name = 'Significant Groups'
        self.Outputs.results.send(results_table)

        self.view.set_vars(list(df.index.name))
        self.model.setHorizontalHeaderLabels(columns, len(df.index.name))
        self.model.wrap(lst)
        self.view.sortByColumn(len(columns) - 1, Qt.AscendingOrder)

        self.Information.nothing_significant(shown=not lst)
        self.btn_compute.setEnabled(True)

    def send_report(self):
        self.report_items([
            ('Test Variable', self.chosen_y),
            ('Test', self.test_type),
            ('Min. group size', self.min_count),
        ])
        self.report_table('Significant Groups', self.view)
Пример #7
0
class OWLearningCurveC(widget.OWWidget):
    name = "Learning Curve (C)"
    description = ("Takes a dataset and a set of learners and shows a "
                   "learning curve in a table")
    icon = "icons/LearningCurve.svg"
    priority = 1010

    inputs = [("Data", Orange.data.Table, "set_dataset", widget.Default),
              ("Test Data", Orange.data.Table, "set_testdataset"),
              ("Learner", Orange.classification.Learner, "set_learner",
               widget.Multiple + widget.Default)]

    #: cross validation folds
    folds = settings.Setting(5)
    #: points in the learning curve
    steps = settings.Setting(10)
    #: index of the selected scoring function
    scoringF = settings.Setting(0)
    #: compute curve on any change of parameters
    commitOnChange = settings.Setting(True)

    def __init__(self):
        super().__init__()

        # sets self.curvePoints, self.steps equidistant points from
        # 1/self.steps to 1
        self.updateCurvePoints()

        self.scoring = [("Classification Accuracy",
                         Orange.evaluation.scoring.CA),
                        ("AUC", Orange.evaluation.scoring.AUC),
                        ("Precision", Orange.evaluation.scoring.Precision),
                        ("Recall", Orange.evaluation.scoring.Recall)]
        #: input data on which to construct the learning curve
        self.data = None
        #: optional test data
        self.testdata = None
        #: A {input_id: Learner} mapping of current learners from input channel
        self.learners = OrderedDict()
        #: A {input_id: List[Results]} mapping of input id to evaluation
        #: results list, one for each curve point
        self.results = OrderedDict()
        #: A {input_id: List[float]} mapping of input id to learning curve
        #: point scores
        self.curves = OrderedDict()

        # [start-snippet-3]
        #: The current evaluating task (if any)
        self._task = None  # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()
        # [end-snippet-3]

        # GUI
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoa = gui.widgetLabel(box, 'No data on input.')
        self.infob = gui.widgetLabel(box, 'No learners.')

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Evaluation Scores")
        gui.comboBox(box,
                     self,
                     "scoringF",
                     items=[x[0] for x in self.scoring],
                     callback=self._invalidate_curves)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Options")
        gui.spin(box,
                 self,
                 'folds',
                 2,
                 100,
                 step=1,
                 label='Cross validation folds:  ',
                 keyboardTracking=False,
                 callback=lambda: self._invalidate_results()
                 if self.commitOnChange else None)
        gui.spin(box,
                 self,
                 'steps',
                 2,
                 100,
                 step=1,
                 label='Learning curve points:  ',
                 keyboardTracking=False,
                 callback=[
                     self.updateCurvePoints,
                     lambda: self._invalidate_results()
                     if self.commitOnChange else None
                 ])
        gui.checkBox(box, self, 'commitOnChange',
                     'Apply setting on any change')
        self.commitBtn = gui.button(box,
                                    self,
                                    "Apply Setting",
                                    callback=self._invalidate_results,
                                    disabled=True)

        gui.rubber(self.controlArea)

        # table widget
        self.table = gui.table(self.mainArea,
                               selectionMode=QTableWidget.NoSelection)

    ##########################################################################
    # slots: handle input signals

    def set_dataset(self, data):
        """Set the input train dataset."""
        # Clear all results/scores
        for id in list(self.results):
            self.results[id] = None
        for id in list(self.curves):
            self.curves[id] = None

        self.data = data

        if data is not None:
            self.infoa.setText('%d instances in input dataset' % len(data))
        else:
            self.infoa.setText('No data on input.')

        self.commitBtn.setEnabled(self.data is not None)

    def set_testdataset(self, testdata):
        """Set a separate test dataset."""
        # Clear all results/scores
        for id in list(self.results):
            self.results[id] = None
        for id in list(self.curves):
            self.curves[id] = None

        self.testdata = testdata

    def set_learner(self, learner, id):
        """Set the input learner for channel id."""
        if id in self.learners:
            if learner is None:
                # remove a learner and corresponding results
                del self.learners[id]
                del self.results[id]
                del self.curves[id]
            else:
                # update/replace a learner on a previously connected link
                self.learners[id] = learner
                # invalidate the cross-validation results and curve scores
                # (will be computed/updated in `_update`)
                self.results[id] = None
                self.curves[id] = None
        else:
            if learner is not None:
                self.learners[id] = learner
                # initialize the cross-validation results and curve scores
                # (will be computed/updated in `_update`)
                self.results[id] = None
                self.curves[id] = None

        if len(self.learners):
            self.infob.setText("%d learners on input." % len(self.learners))
        else:
            self.infob.setText("No learners.")

        self.commitBtn.setEnabled(len(self.learners))

# [start-snippet-4]

    def handleNewSignals(self):
        self._update()
# [end-snippet-4]

    def _invalidate_curves(self):
        if self.data is not None:
            self._update_curve_points()
        self._update_table()

    def _invalidate_results(self):
        for id in self.learners:
            self.curves[id] = None
            self.results[id] = None
        self._update()

# [start-snippet-5]

    def _update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        if self.data is None:
            return
        # collect all learners for which results have not yet been computed
        need_update = [(id, learner) for id, learner in self.learners.items()
                       if self.results[id] is None]
        if not need_update:
            return
# [end-snippet-5]
# [start-snippet-6]
        learners = [learner for _, learner in need_update]
        # setup the learner evaluations as partial function capturing
        # the necessary arguments.
        if self.testdata is None:
            learning_curve_func = partial(
                learning_curve,
                learners,
                self.data,
                folds=self.folds,
                proportions=self.curvePoints,
            )
        else:
            learning_curve_func = partial(
                learning_curve_with_test_data,
                learners,
                self.data,
                self.testdata,
                times=self.folds,
                proportions=self.curvePoints,
            )
# [end-snippet-6]
# [start-snippet-7]
# setup the task state
        self._task = task = Task()
        # The learning_curve[_with_test_data] also takes a callback function
        # to report the progress. We instrument this callback to both invoke
        # the appropriate slots on this widget for reporting the progress
        # (in a thread safe manner) and to implement cooperative cancellation.
        set_progress = methodinvoke(self, "setProgressValue", (float, ))

        def callback(finished):
            # check if the task has been cancelled and raise an exception
            # from within. This 'strategy' can only be used with code that
            # properly cleans up after itself in the case of an exception
            # (does not leave any global locks, opened file descriptors, ...)
            if task.cancelled:
                raise KeyboardInterrupt()
            set_progress(finished * 100)

        # capture the callback in the partial function
        learning_curve_func = partial(learning_curve_func, callback=callback)
        # [end-snippet-7]
        # [start-snippet-8]
        self.progressBarInit()
        # Submit the evaluation function to the executor and fill in the
        # task with the resultant Future.
        task.future = self._executor.submit(learning_curve_func)
        # Setup the FutureWatcher to notify us of completion
        task.watcher = FutureWatcher(task.future)
        # by using FutureWatcher we ensure `_task_finished` slot will be
        # called from the main GUI thread by the Qt's event loop
        task.watcher.done.connect(self._task_finished)
# [end-snippet-8]

    @pyqtSlot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

# [start-snippet-9]

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the result of learner evaluation.
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        try:
            results = f.result()  # type: List[Results]
        except Exception as ex:
            # Log the exception with a traceback
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.error("Exception occurred during evaluation: {!r}".format(ex))
            # clear all results
            for key in self.results.keys():
                self.results[key] = None
        else:
            # split the combined result into per learner/model results ...
            results = [
                list(Results.split_by_model(p_results))
                for p_results in results
            ]  # type: List[List[Results]]
            assert all(len(r.learners) == 1 for r1 in results for r in r1)
            assert len(results) == len(self.curvePoints)

            learners = [r.learners[0] for r in results[0]]
            learner_id = {
                learner: id_
                for id_, learner in self.learners.items()
            }

            # ... and update self.results
            for i, learner in enumerate(learners):
                id_ = learner_id[learner]
                self.results[id_] = [p_results[i] for p_results in results]
# [end-snippet-9]
# update the display
        self._update_curve_points()
        self._update_table()
# [end-snippet-9]

# [start-snippet-10]

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self._task = None
# [end-snippet-10]

# [start-snippet-11]

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()


# [end-snippet-11]

    def _update_curve_points(self):
        for id in self.learners:
            curve = [
                self.scoring[self.scoringF][1](x)[0] for x in self.results[id]
            ]
            self.curves[id] = curve

    def _update_table(self):
        self.table.setRowCount(0)
        self.table.setRowCount(len(self.curvePoints))
        self.table.setColumnCount(len(self.learners))

        self.table.setHorizontalHeaderLabels(
            [learner.name for _, learner in self.learners.items()])
        self.table.setVerticalHeaderLabels(
            ["{:.2f}".format(p) for p in self.curvePoints])

        if self.data is None:
            return

        for column, curve in enumerate(self.curves.values()):
            for row, point in enumerate(curve):
                self.table.setItem(row, column,
                                   QTableWidgetItem("{:.5f}".format(point)))

        for i in range(len(self.learners)):
            sh = self.table.sizeHintForColumn(i)
            cwidth = self.table.columnWidth(i)
            self.table.setColumnWidth(i, max(sh, cwidth))

    def updateCurvePoints(self):
        self.curvePoints = [(x + 1.) / self.steps for x in range(self.steps)]
Пример #8
0
class OWSetEnrichment(widget.OWWidget):
    name = "Set Enrichment"
    description = ""
    icon = "../widgets/icons/GeneSetEnrichment.svg"
    priority = 5000

    inputs = [("Data", Orange.data.Table, "setData", widget.Default),
              ("Reference", Orange.data.Table, "setReference")]
    outputs = [("Data subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    taxid = settings.ContextSetting(None)
    speciesIndex = settings.ContextSetting(0)
    genesinrows = settings.ContextSetting(False)
    geneattr = settings.ContextSetting(0)
    categoriesCheckState = settings.ContextSetting({})

    useReferenceData = settings.Setting(False)
    useMinCountFilter = settings.Setting(True)
    useMaxPValFilter = settings.Setting(True)
    useMaxFDRFilter = settings.Setting(True)
    minClusterCount = settings.Setting(3)
    maxPValue = settings.Setting(0.01)
    maxFDR = settings.Setting(0.01)
    autocommit = settings.Setting(False)

    Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4

    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea, self,
            "speciesIndex", "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(
            box, self, "geneattr", "Entity feature", sendSelectedValue=0,
            callback=self.updateAnnotations)

        cb = gui.checkBox(
            box, self, "genesinrows", "Use feature names",
            callback=self.updateAnnotations,
            disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

#         gui.button(box, self, "Gene matcher settings",
#                    callback=self.updateGeneMatcherSettings,
#                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self, "useReferenceData",
            ["All entities", "Reference set (input)"],
            tooltips=["Use entire genome (for gene set enrichment) or all " +
                      "available entities for reference",
                      "Use entities from Reference Examples input signal " +
                      "as reference"],
            box="Reference", callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QtGui.QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QtGui.QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget, self, "minClusterCount",
                 0, 100, label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            pvalfilterbox, self, "useMaxPValFilter", "p-value",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            fdrfilterbox, self, "useMaxFDRFilter", "FDR",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QtGui.QLineEdit(
            self, placeholderText="Filter ...")

        self.filterCompleter = QtGui.QCompleter(self.filterLineEdit)
        self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QtGui.QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QtGui.QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QtGui.QTreeView.NoEditTriggers,
        )
        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             (geneset.sfdomain, "index.pck")]
        )

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(
            parent=self, threadPool=QtCore.QThreadPool(self))
        self._executor.submit(task)

    def sizeHint(self):
        return QtCore.QSize(1024, 600)

    def __initialize_finish(self):
        # Finalize the the widget's initialization (preferably after
        # ensuring all required databases have been downloaded.

        sets = geneset.list_all()
        taxids = set(taxonomy.common_taxids() +
                     list(filter(None, [tid for _, tid, _ in sets])))
        organisms = [(tid, name_or_none(tid)) for tid in taxids]
        organisms = [(tid, name) for tid, name in organisms
                     if name is not None]

        organisms = [(None, "None")] + sorted(organisms)
        taxids = [tid for tid, _ in organisms]
        names = [name for _, name in organisms]
        self.taxid_list = taxids

        self.speciesComboBox.clear()
        self.speciesComboBox.addItems(names)
        self.genesets = sets

        if self.taxid in self.taxid_list:
            taxid = self.taxid
        else:
            taxid = self.taxid_list[0]

        self.taxid = None
        self.setCurrentOrganism(taxid)
        self.setBlocking(False)
        self.__state = OWSetEnrichment.Ready
        self.setStatusMessage("")

    def setCurrentOrganism(self, taxid):
        """Set the current organism `taxid`."""
        if taxid not in self.taxid_list:
            taxid = self.taxid_list[min(self.speciesIndex,
                                        len(self.taxid_list) - 1)]
        if self.taxid != taxid:
            self.taxid = taxid
            self.speciesIndex = self.taxid_list.index(taxid)
            self.refreshHierarchy()
            self._invalidateGeneMatcher()
            self._invalidate()

    def currentOrganism(self):
        """Return the current organism taxid"""
        return self.taxid

    def __on_speciesIndexChanged(self):
        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = "< Do not look >"
        self.setCurrentOrganism(taxid)
        if self.__invalidated and self.data is not None:
            self.updateAnnotations()

    def clear(self):
        """Clear/reset the widget state."""
        self._cancelPending()
        self.state = None

        self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment

        self._clearView()

        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

        self.geneAttrComboBox.clear()
        self.geneAttrs = []
        self._updatesummary()

    def _cancelPending(self):
        """Cancel pending tasks."""
        if self.state is not None:
            self.state.results.cancel()
            self.state.namematcher.cancel()
            self.state.cancelled = True

    def _clearView(self):
        """Clear the enrichment report view (main area)."""
        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

    def setData(self, data=None):
        """Set the input dataset with query gene names"""
        if self.__state & OWSetEnrichment.Initializing:
            self.__initialize_finish()

        self.error(0)
        self.closeContext()
        self.clear()

        self.groupsWidget.clear()
        self.data = data

        if data is not None:
            varlist = [var for var in data.domain.variables + data.domain.metas
                       if isinstance(var, Orange.data.StringVariable)]

            self.geneAttrs = varlist
            for var in varlist:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            oldtaxid = self.taxid
            self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1)

            taxid = data_hints.get_hint(data, "taxid", "")
            if taxid in self.taxid_list:
                self.speciesIndex = self.taxid_list.index(taxid)
                self.taxid = taxid

            self.genesinrows = data_hints.get_hint(
                data, "genesinrows", self.genesinrows)

            self.openContext(data)
            if oldtaxid != self.taxid:
                self.taxid = "< Do not look >"
                self.setCurrentOrganism(taxid)

            self.refreshHierarchy()
            self._invalidate()

    def setReference(self, data=None):
        """Set the (optional) input dataset with reference gene names."""
        self.referenceData = data
        self.referenceRadioBox.setEnabled(bool(data))
        if self.useReferenceData:
            self._invalidate()

    def handleNewSignals(self):
        if self.__invalidated:
            self.updateAnnotations()

    def _invalidateGeneMatcher(self):
        _, f = self.__genematcher
        f.cancel()
        self.__genematcher = (None, fulfill(gene.matcher([])))

    def _invalidate(self):
        self.__invalidated = True

    def genesFromTable(self, table):
        if self.genesinrows:
            genes = [attr.name for attr in table.domain.attributes]
        else:
            geneattr = self.geneAttrs[self.geneattr]
            genes = [str(ex[geneattr]) for ex in table]
        return genes

    def getHierarchy(self, taxid):
        def recursive_dict():
            return defaultdict(recursive_dict)
        collection = recursive_dict()

        def collect(col, hier):
            if hier:
                collect(col[hier[0]], hier[1:])

        for hierarchy, t_id, _ in self.genesets:
            collect(collection[t_id], hierarchy)

        return (taxid, collection[taxid]), (None, collection[None])

    def setHierarchy(self, hierarchy, hierarchy_noorg):
        self.groupsWidgetItems = {}

        def fill(col, parent, full=(), org=""):
            for key, value in sorted(col.items()):
                full_cat = full + (key,)
                item = QtGui.QTreeWidgetItem(parent, [key])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable |
                              Qt.ItemIsSelectable | Qt.ItemIsEnabled)
                if value:
                    item.setFlags(item.flags() | Qt.ItemIsTristate)

                checked = self.categoriesCheckState.get(
                    (full_cat, org), Qt.Checked)
                item.setData(0, Qt.CheckStateRole, checked)
                item.setExpanded(True)
                item.category = full_cat
                item.organism = org
                self.groupsWidgetItems[full_cat] = item
                fill(value, item, full_cat, org=org)

        self.groupsWidget.clear()
        fill(hierarchy[1], self.groupsWidget, org=hierarchy[0])
        fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0])

    def refreshHierarchy(self):
        self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex]))

    def selectedCategories(self):
        """
        Return a list of currently selected hierarchy keys.

        A key is a tuple of identifiers from the root to the leaf of
        the hierarchy tree.
        """
        return [key for key, check in self.getHierarchyCheckState().items()
                if check == Qt.Checked]

    def getHierarchyCheckState(self):
        def collect(item, full=()):
            checked = item.checkState(0)
            name = str(item.data(0, Qt.DisplayRole))
            full_cat = full + (name,)
            result = [((full_cat, item.organism), checked)]
            for i in range(item.childCount()):
                result.extend(collect(item.child(i), full_cat))
            return result

        items = [self.groupsWidget.topLevelItem(i)
                 for i in range(self.groupsWidget.topLevelItemCount())]
        states = itertools.chain(*(collect(item) for item in items))
        return dict(states)

    def subsetSelectionChanged(self, item, column):
        # The selected geneset (hierarchy) subset has been changed by the
        # user. Update the displayed results.
        # Update the stored state (persistent settings)
        self.categoriesCheckState = self.getHierarchyCheckState()
        categories = self.selectedCategories()

        if self.data is not None:
            if self._nogenematching() or \
                    not set(categories) <= set(self.currentAnnotatedCategories):
                self.updateAnnotations()
            else:
                self.filterAnnotationsChartView()

    def updateGeneMatcherSettings(self):
        raise NotImplementedError

        from .OWGOEnrichmentAnalysis import GeneMatcherDialog
        dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True)
        if dialog.exec_():
            self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items]
            self._invalidateGeneMatcher()
            if self.data is not None:
                self.updateAnnotations()

    def _genematcher(self):
        """
        Return a Future[gene.SequenceMatcher]
        """
        taxid = self.taxid_list[self.speciesIndex]

        current, matcher_f = self.__genematcher

        if taxid == current and \
                not matcher_f.cancelled():
            return matcher_f

        self._invalidateGeneMatcher()

        if taxid is None:
            self.__genematcher = (None, fulfill(gene.matcher([])))
            return self.__genematcher[1]

        matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy]
        matchers = [m for m, use in zip(matchers, self.geneMatcherSettings)
                    if use]

        def create():
            return gene.matcher([m(taxid) for m in matchers])

        matcher_f = self._executor.submit(create)
        self.__genematcher = (taxid, matcher_f)
        return self.__genematcher[1]

    def _nogenematching(self):
        return self.taxid is None or not any(self.geneMatcherSettings)

    def updateAnnotations(self):
        if self.data is None:
            return

        assert not self.__state & OWSetEnrichment.Initializing
        self._cancelPending()
        self._clearView()

        self.information(0)
        self.warning(0)
        self.error(0)

        if not self.genesinrows and len(self.geneAttrs) == 0:
            self.error(0, "Input data contains no columns with gene names")
            return

        self.__state = OWSetEnrichment.RunningEnrichment

        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = taxid

        categories = self.selectedCategories()

        clusterGenes = self.genesFromTable(self.data)

        if self.referenceData is not None and self.useReferenceData:
            referenceGenes = self.genesFromTable(self.referenceData)
        else:
            referenceGenes = None

        self.currentAnnotatedCategories = categories

        genematcher = self._genematcher()

        self.progressBarInit()

        ## Load collections in a worker thread
        # TODO: Use cached collections if already loaded and
        # use ensure_genesetsdownloaded with progress report (OWSelectGenes)
        collections = self._executor.submit(geneset.collections, *categories)

        def refset_null():
            """Return the default background reference set"""
            col = collections.result()
            return reduce(operator.ior, (set(g.genes) for g in col), set())

        def refset_ncbi():
            """Return all NCBI gene names"""
            geneinfo = gene.NCBIGeneInfo(taxid)
            return set(geneinfo.keys())

        def namematcher():
            matcher = genematcher.result()
            match = matcher.set_targets(ref_set.result())
            match.umatch = memoize(match.umatch)
            return match

        def map_unames():
            matcher = namematcher.result()
            query = list(filter(None, map(matcher.umatch, querynames)))
            reference = list(filter(None, map(matcher.umatch, ref_set.result())))
            return query, reference

        if self._nogenematching():
            if referenceGenes is None:
                ref_set = self._executor.submit(refset_null)
            else:
                ref_set = fulfill(referenceGenes)
        else:
            if referenceGenes == None:
                ref_set = self._executor.submit(refset_ncbi)
            else:
                ref_set = fulfill(referenceGenes)

        namematcher = self._executor.submit(namematcher)
        querynames = clusterGenes

        state = types.SimpleNamespace()
        state.query_set = clusterGenes
        state.reference_set = referenceGenes
        state.namematcher = namematcher
        state.query_count = len(set(clusterGenes))
        state.reference_count = (len(set(referenceGenes))
                                 if referenceGenes is not None else None)

        state.cancelled = False

        progress = methodinvoke(self, "_setProgress", (float,))
        info = methodinvoke(self, "_setRunInfo", (str,))

        @withtraceback
        def run():
            info("Loading data")
            match = namematcher.result()
            query, reference = map_unames()
            gscollections = collections.result()

            results = []
            info("Running enrichment")
            p = 0
            for i, gset in enumerate(gscollections):
                genes = set(filter(None, map(match.umatch, gset.genes)))
                enr = set_enrichment(genes, reference, query)
                results.append((gset, enr))

                if state.cancelled:
                    raise UserInteruptException

                pnew = int(100 * i / len(gscollections))
                if pnew != p:
                    progress(pnew)
                    p = pnew
            progress(100)
            info("")
            return query, reference, results

        task = Task(function=run)
        task.resultReady.connect(self.__on_enrichment_finished)
        task.exceptionReady.connect(self.__on_enrichment_failed)
        result = self._executor.submit(task)
        state.results = result

        self.state = state
        self._updatesummary()

    def __on_enrichment_failed(self, exception):
        if not isinstance(exception, UserInteruptException):
            print("ERROR:", exception, file=sys.stderr)
            print(exception._traceback, file=sys.stderr)

        self.progressBarFinished()
        self.setStatusMessage("")
        self.__state &= ~OWSetEnrichment.RunningEnrichment

    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results),
                       default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QtGui.QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QtGui.QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QtGui.QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels(
            ["Category", "Term", "Count", "Reference count", "p-value",
             "FDR", "Enrichment"])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped, user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped, user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QtGui.QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit
        )

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(gsname(geneset)
                       for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames),
                           set())

        self.filterCompleter.setModel(None)
        self.completerModel = QtGui.QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max((e.enrichment_score for _, e in results
                             if np.isfinite(e.enrichment_score)),
                            default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score))
            )

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView)
        )

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))
#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")

    def _updatesummary(self):
        state = self.state
        if state is None:
            self.error(0,)
            self.warning(0)
            self.infoBox.setText("No data on input.\n")
            return

        text = "{.query_count} unique names on input\n".format(state)

        if state.results.done() and not state.results.exception():
            mapped, _, _ = state.results.result()
            ratio_mapped = (len(mapped) / state.query_count
                            if state.query_count else 0)
            text += ("%i (%.1f%%) gene names matched" %
                     (len(mapped), 100.0 * ratio_mapped))
        elif not state.results.done():
            text += "..."
        else:
            text += "<Error {}>".format(str(state.results.exception()))
        self.infoBox.setText(text)

        # TODO: warn on no enriched sets found (i.e no query genes
        # mapped to any set)

    def filterAnnotationsChartView(self, filterString=""):
        if self.__state & OWSetEnrichment.RunningEnrichment:
            return

        # TODO: Move filtering to a filter proxy model
        # TODO: Re-enable string search

        categories = set(", ".join(cat)
                         for cat, _ in self.selectedCategories())

#         filterString = str(self.filterLineEdit.text()).lower()

        model = self.annotationsChartView.model()

        def ishidden(index):
            # Is item at index (row) hidden
            item = model.item(index)
            item_cat = item.data(Qt.DisplayRole)
            return item_cat not in categories

        hidemask = [ishidden(i) for i in range(model.rowCount())]

        # compute FDR according the selected categories
        pvals = [model.item(i, 4).data(Qt.UserRole)
                 for i, hidden in enumerate(hidemask) if not hidden]
        fdrs = utils.stats.FDR(pvals)

        # update FDR for the selected collections and apply filtering rules
        itemsHidden = []
        fdriter = iter(fdrs)
        for index, hidden in enumerate(hidemask):
            if not hidden:
                fdr = next(fdriter)
                pval = model.index(index, 4).data(Qt.UserRole)
                count = model.index(index, 2).data(Qt.ToolTipRole)

                hidden = (self.useMinCountFilter and count < self.minClusterCount) or \
                         (self.useMaxPValFilter and pval > self.maxPValue) or \
                         (self.useMaxFDRFilter and fdr > self.maxFDR)

                if not hidden:
                    fdr_item = model.item(index, 5)
                    fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole)
                    fdr_item.setData(fmtp(fdr), Qt.DisplayRole)
                    fdr_item.setData(fdr, Qt.UserRole)

            self.annotationsChartView.setRowHidden(
                index, QModelIndex(), hidden)

            itemsHidden.append(hidden)

        if model.rowCount() and all(itemsHidden):
            self.information(0, "All sets were filtered out.")
        else:
            self.information(0)

        self._updatesummary()

    @Slot(float)
    def _setProgress(self, value):
        assert QThread.currentThread() is self.thread()
        self.progressBarSet(value, processEvents=None)

    @Slot(str)
    def _setRunInfo(self, text):
        self.setStatusMessage(text)

    def commit(self):
        if self.data is None or \
                self.__state & OWSetEnrichment.RunningEnrichment:
            return

        model = self.annotationsChartView.model()
        rows = self.annotationsChartView.selectionModel().selectedRows(0)
        selected = [model.item(index.row(), 0) for index in rows]
        mapped = reduce(operator.ior,
                        (set(item.enrichment.query_mapped)
                         for item in selected),
                        set())
        assert self.state.namematcher.done()
        matcher = self.state.namematcher.result()

        axis = 1 if self.genesinrows else 0
        if axis == 1:
            mapped = [attr for attr in self.data.domain.attributes
                      if matcher.umatch(attr.name) in mapped]

            newdomain = Orange.data.Domain(
                mapped, self.data.domain.class_vars, self.data.domain.metas)
            data = self.data.from_table(newdomain, self.data)
        else:
            geneattr = self.geneAttrs[self.geneattr]
            selected = [i for i, ex in enumerate(self.data)
                        if matcher.umatch(str(ex[geneattr])) in mapped]
            data = self.data[selected]
        self.send("Data subset", data)

    def onDeleteWidget(self):
        if self.state is not None:
            self._cancelPending()
            self.state = None
        self._executor.shutdown(wait=False)
    def __init__(self):
        super().__init__()
        self.data = None
        self.model = None
        self.to_explain = None
        self.explanations = None
        self.stop = True
        self.e = None

        self._task = None
        self._executor = ThreadExecutor()

        info_box = gui.vBox(self.controlArea, "Info")
        self.data_info = gui.widgetLabel(info_box, "Data: N/A")
        self.model_info = gui.widgetLabel(info_box, "Model: N/A")
        self.sample_info = gui.widgetLabel(info_box, "Sample: N/A")

        criteria_box = gui.vBox(self.controlArea, "Stopping criteria")
        self.error_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_error",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error < ",
                                   spinType=float,
                                   callback=self._update_error_spin,
                                   controlWidth=80,
                                   keyboardTracking=False)

        self.p_val_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_p_val",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error p-value < ",
                                   spinType=float,
                                   callback=self._update_p_val_spin,
                                   controlWidth=80, keyboardTracking=False)

        plot_properties_box = gui.vBox(self.controlArea, "Display features")
        self.num_atr_spin = gui.spin(plot_properties_box,
                                     self,
                                     "gui_num_atr",
                                     1,
                                     100,
                                     step=1,
                                     label="Show attributes",
                                     callback=self._update_num_atr_spin,
                                     controlWidth=80,
                                     keyboardTracking=False)

        self.sort_combo = gui.comboBox(plot_properties_box,
                                       self,
                                       "sort_index",
                                       label="Rank by",
                                       items=SortBy.items(),
                                       orientation=Qt.Horizontal,
                                       callback=self._update_combo)

        gui.rubber(self.controlArea)

        self.cancel_button = gui.button(self.controlArea,
                                        self,
                                        "Stop Computation",
                                        callback=self.toggle_button,
                                        autoDefault=True,
                                        tooltip="Stops and restarts computation")
        self.cancel_button.setDisabled(True)

        predictions_box = gui.vBox(self.mainArea, "Model prediction")
        self.predict_info = gui.widgetLabel(predictions_box, "")

        self.mainArea.setMinimumWidth(700)
        self.resize(700, 400)

        class _GraphicsView(QGraphicsView):
            def __init__(self, scene, parent, **kwargs):
                for k, v in dict(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 viewportUpdateMode=QGraphicsView.BoundingRectViewportUpdate,
                                 renderHints=(QPainter.Antialiasing |
                                              QPainter.TextAntialiasing |
                                              QPainter.SmoothPixmapTransform),
                                 alignment=(Qt.AlignTop |
                                            Qt.AlignLeft),
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.MinimumExpanding)).items():
                    kwargs.setdefault(k, v)
                super().__init__(scene, parent, **kwargs)

        class GraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                 styleSheet='QGraphicsView {background: white}')
                self.viewport().setMinimumWidth(500)
                self._is_resizing = False

            w = self

            def resizeEvent(self, resizeEvent):
                self._is_resizing = True
                self.w.draw()
                self._is_resizing = False
                return super().resizeEvent(resizeEvent)

            def is_resizing(self):
                return self._is_resizing

            def sizeHint(self):
                return QSize(600, 300)

        class FixedSizeGraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.Minimum))

            def sizeHint(self):
                return QSize(600, 30)

        """all will share the same scene, but will show different parts of it"""
        self.box_scene = QGraphicsScene(self)

        self.box_view = GraphicsView(self.box_scene, self)
        self.header_view = FixedSizeGraphicsView(self.box_scene, self)
        self.footer_view = FixedSizeGraphicsView(self.box_scene, self)

        self.mainArea.layout().addWidget(self.header_view)
        self.mainArea.layout().addWidget(self.box_view)
        self.mainArea.layout().addWidget(self.footer_view)

        self.painter = None
    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.clusterDataset = None
        self.referenceDataset = None
        self.ontology = None
        self.annotations = None
        self.loadedAnnotationCode = "---"
        self.treeStructRootKey = None
        self.probFunctions = [stats.Binomial(), stats.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = OWGOEnrichmentAnalysis.Initializing

        self.annotationCodes = []

        #############
        ## GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        ## Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(box, self, "Ontology/Annotation Info",
                   callback=self.ShowInfo,
                   tooltip="Show information on loaded ontology and annotations")

        box = gui.widgetBox(self.inputTab, "Organism")
        self.annotationComboBox = gui.comboBox(
            box, self, "annotationIndex", items=self.annotationCodes,
            callback=self._updateEnrichment, tooltip="Select organism")

        genebox = gui.widgetBox(self.inputTab, "Gene Names")
        self.geneAttrIndexCombo = gui.comboBox(
            genebox, self, "geneAttrIndex", callback=self._updateEnrichment,
            tooltip="Use this attribute to extract gene names from input data")
        self.geneAttrIndexCombo.setDisabled(self.useAttrNames)

        cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names",
                          tooltip="Use column names for gene names",
                          callback=self._updateEnrichment)
        cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled)

        gui.button(genebox, self, "Gene matcher settings",
                   callback=self.UpdateGeneMatcher,
                   tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab, self, "useReferenceDataset",
            ["Entire genome", "Reference set (input)"],
            tooltips=["Use entire genome for reference",
                      "Use genes from Referece Examples input signal as reference"],
            box="Reference", callback=self._updateEnrichment)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab, self, "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect", callback=self._updateEnrichment)

        ## Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(box, self, "filterByNumOfInstances", "Genes",
                     callback=self.FilterAndDisplayGraph, 
                     tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox, self, 'minNumOfInstances', 1, 100,
                 step=1, label='#:', labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box, self, "filterByPValue_nofdr", "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        #use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box, self, "filterByPValue", "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=["Use binomial distribution test",
                                        "Use hypergeometric distribution test"],
                              callback=self._updateEnrichment)
        box = gui.widgetBox(self.filterTab, "Evidence codes in annotation",
                              addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(
                etype, toolTip=go.evidenceTypes[etype],
                checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        ## Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(
            self.selectTab, self, "selectionDirectAnnotation",
            ["Directly or Indirectly", "Directly"],
            box="Annotated genes",
            callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box, self, "selectionDisjoint",
            btnLabels=["All selected genes",
                       "Term-specific genes",
                       "Common term genes"],
            tooltips=["Outputs genes annotated to all selected GO terms",
                      "Outputs genes that appear in only one of selected GO terms", 
                      "Outputs genes common to all selected GO terms"],
            callback=[self.ExampleSelection,
                      self.UpdateAddClassButton])

        self.addClassCB = gui.checkBox(
            box, self, "selectionAddTermAsClass", "Add GO Term as class",
            callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value',
                           'FDR', 'Genes', 'Enrichment']

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        self.setBlocking(True)
        self._executor = ThreadExecutor()
        self._init = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             ("GO", "taxonomy.pickle")]
        )
        self._init.finished.connect(self.__initialize_finish)
        self._executor.submit(self._init)
Пример #11
0
class OWSetEnrichment(widget.OWWidget):
    name = "Set Enrichment"
    description = ""
    icon = "../widgets/icons/GeneSetEnrichment.svg"
    priority = 5000

    inputs = [("Data", Orange.data.Table, "setData", widget.Default),
              ("Reference", Orange.data.Table, "setReference")]
    outputs = [("Data subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    taxid = settings.ContextSetting(None)
    speciesIndex = settings.ContextSetting(0)
    genesinrows = settings.ContextSetting(False)
    geneattr = settings.ContextSetting(0)
    categoriesCheckState = settings.ContextSetting({})

    useReferenceData = settings.Setting(False)
    useMinCountFilter = settings.Setting(True)
    useMaxPValFilter = settings.Setting(True)
    useMaxFDRFilter = settings.Setting(True)
    minClusterCount = settings.Setting(3)
    maxPValue = settings.Setting(0.01)
    maxFDR = settings.Setting(0.01)
    autocommit = settings.Setting(False)

    Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4

    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea, self,
            "speciesIndex", "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(
            box, self, "geneattr", "Entity feature", sendSelectedValue=0,
            callback=self.updateAnnotations)

        cb = gui.checkBox(
            box, self, "genesinrows", "Use feature names",
            callback=self.updateAnnotations,
            disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

#         gui.button(box, self, "Gene matcher settings",
#                    callback=self.updateGeneMatcherSettings,
#                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self, "useReferenceData",
            ["All entities", "Reference set (input)"],
            tooltips=["Use entire genome (for gene set enrichment) or all " +
                      "available entities for reference",
                      "Use entities from Reference Examples input signal " +
                      "as reference"],
            box="Reference", callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QtGui.QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QtGui.QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget, self, "minClusterCount",
                 0, 100, label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            pvalfilterbox, self, "useMaxPValFilter", "p-value",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            fdrfilterbox, self, "useMaxFDRFilter", "FDR",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QtGui.QLineEdit(
            self, placeholderText="Filter ...")

        self.filterCompleter = QtGui.QCompleter(self.filterLineEdit)
        self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QtGui.QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QtGui.QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QtGui.QTreeView.NoEditTriggers,
        )
        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded(
            [("Taxonomy", "ncbi_taxonomy.tar.gz"),
             (geneset.sfdomain, "index.pck")]
        )

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(
            parent=self, threadPool=QtCore.QThreadPool(self))
        self._executor.submit(task)

    def sizeHint(self):
        return QtCore.QSize(1024, 600)

    def __initialize_finish(self):
        # Finalize the the widget's initialization (preferably after
        # ensuring all required databases have been downloaded.

        sets = geneset.list_all()
        taxids = set(taxonomy.common_taxids() +
                     list(filter(None, [tid for _, tid, _ in sets])))
        organisms = [(tid, name_or_none(tid)) for tid in taxids]
        organisms = [(tid, name) for tid, name in organisms
                     if name is not None]

        organisms = [(None, "None")] + sorted(organisms)
        taxids = [tid for tid, _ in organisms]
        names = [name for _, name in organisms]
        self.taxid_list = taxids

        self.speciesComboBox.clear()
        self.speciesComboBox.addItems(names)
        self.genesets = sets

        if self.taxid in self.taxid_list:
            taxid = self.taxid
        else:
            taxid = self.taxid_list[0]

        self.taxid = None
        self.setCurrentOrganism(taxid)
        self.setBlocking(False)
        self.__state = OWSetEnrichment.Ready
        self.setStatusMessage("")

    def setCurrentOrganism(self, taxid):
        """Set the current organism `taxid`."""
        if taxid not in self.taxid_list:
            taxid = self.taxid_list[min(self.speciesIndex,
                                        len(self.taxid_list) - 1)]
        if self.taxid != taxid:
            self.taxid = taxid
            self.speciesIndex = self.taxid_list.index(taxid)
            self.refreshHierarchy()
            self._invalidateGeneMatcher()
            self._invalidate()

    def currentOrganism(self):
        """Return the current organism taxid"""
        return self.taxid

    def __on_speciesIndexChanged(self):
        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = "< Do not look >"
        self.setCurrentOrganism(taxid)
        if self.__invalidated and self.data is not None:
            self.updateAnnotations()

    def clear(self):
        """Clear/reset the widget state."""
        self._cancelPending()
        self.state = None

        self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment

        self._clearView()

        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

        self.geneAttrComboBox.clear()
        self.geneAttrs = []
        self._updatesummary()

    def _cancelPending(self):
        """Cancel pending tasks."""
        if self.state is not None:
            self.state.results.cancel()
            self.state.namematcher.cancel()
            self.state.cancelled = True

    def _clearView(self):
        """Clear the enrichment report view (main area)."""
        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

    def setData(self, data=None):
        """Set the input dataset with query gene names"""
        if self.__state & OWSetEnrichment.Initializing:
            self.__initialize_finish()

        self.error(0)
        self.closeContext()
        self.clear()

        self.groupsWidget.clear()
        self.data = data

        if data is not None:
            varlist = [var for var in data.domain.variables + data.domain.metas
                       if isinstance(var, Orange.data.StringVariable)]

            self.geneAttrs = varlist
            for var in varlist:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            oldtaxid = self.taxid
            self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1)

            taxid = data_hints.get_hint(data, "taxid", "")
            if taxid in self.taxid_list:
                self.speciesIndex = self.taxid_list.index(taxid)
                self.taxid = taxid

            self.genesinrows = data_hints.get_hint(
                data, "genesinrows", self.genesinrows)

            self.openContext(data)
            if oldtaxid != self.taxid:
                self.taxid = "< Do not look >"
                self.setCurrentOrganism(taxid)

            self.refreshHierarchy()
            self._invalidate()

    def setReference(self, data=None):
        """Set the (optional) input dataset with reference gene names."""
        self.referenceData = data
        self.referenceRadioBox.setEnabled(bool(data))
        if self.useReferenceData:
            self._invalidate()

    def handleNewSignals(self):
        if self.__invalidated:
            self.updateAnnotations()

    def _invalidateGeneMatcher(self):
        _, f = self.__genematcher
        f.cancel()
        self.__genematcher = (None, fulfill(gene.matcher([])))

    def _invalidate(self):
        self.__invalidated = True

    def genesFromTable(self, table):
        if self.genesinrows:
            genes = [attr.name for attr in table.domain.attributes]
        else:
            geneattr = self.geneAttrs[self.geneattr]
            genes = [str(ex[geneattr]) for ex in table]
        return genes

    def getHierarchy(self, taxid):
        def recursive_dict():
            return defaultdict(recursive_dict)
        collection = recursive_dict()

        def collect(col, hier):
            if hier:
                collect(col[hier[0]], hier[1:])

        for hierarchy, t_id, _ in self.genesets:
            collect(collection[t_id], hierarchy)

        return (taxid, collection[taxid]), (None, collection[None])

    def setHierarchy(self, hierarchy, hierarchy_noorg):
        self.groupsWidgetItems = {}

        def fill(col, parent, full=(), org=""):
            for key, value in sorted(col.items()):
                full_cat = full + (key,)
                item = QtGui.QTreeWidgetItem(parent, [key])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable |
                              Qt.ItemIsSelectable | Qt.ItemIsEnabled)
                if value:
                    item.setFlags(item.flags() | Qt.ItemIsTristate)

                checked = self.categoriesCheckState.get(
                    (full_cat, org), Qt.Checked)
                item.setData(0, Qt.CheckStateRole, checked)
                item.setExpanded(True)
                item.category = full_cat
                item.organism = org
                self.groupsWidgetItems[full_cat] = item
                fill(value, item, full_cat, org=org)

        self.groupsWidget.clear()
        fill(hierarchy[1], self.groupsWidget, org=hierarchy[0])
        fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0])

    def refreshHierarchy(self):
        self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex]))

    def selectedCategories(self):
        """
        Return a list of currently selected hierarchy keys.

        A key is a tuple of identifiers from the root to the leaf of
        the hierarchy tree.
        """
        return [key for key, check in self.getHierarchyCheckState().items()
                if check == Qt.Checked]

    def getHierarchyCheckState(self):
        def collect(item, full=()):
            checked = item.checkState(0)
            name = str(item.data(0, Qt.DisplayRole))
            full_cat = full + (name,)
            result = [((full_cat, item.organism), checked)]
            for i in range(item.childCount()):
                result.extend(collect(item.child(i), full_cat))
            return result

        items = [self.groupsWidget.topLevelItem(i)
                 for i in range(self.groupsWidget.topLevelItemCount())]
        states = itertools.chain(*(collect(item) for item in items))
        return dict(states)

    def subsetSelectionChanged(self, item, column):
        # The selected geneset (hierarchy) subset has been changed by the
        # user. Update the displayed results.
        # Update the stored state (persistent settings)
        self.categoriesCheckState = self.getHierarchyCheckState()
        categories = self.selectedCategories()

        if self.data is not None:
            if self._nogenematching() or \
                    not set(categories) <= set(self.currentAnnotatedCategories):
                self.updateAnnotations()
            else:
                self.filterAnnotationsChartView()

    def updateGeneMatcherSettings(self):
        raise NotImplementedError

        from .OWGOEnrichmentAnalysis import GeneMatcherDialog
        dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True)
        if dialog.exec_():
            self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items]
            self._invalidateGeneMatcher()
            if self.data is not None:
                self.updateAnnotations()

    def _genematcher(self):
        """
        Return a Future[gene.SequenceMatcher]
        """
        taxid = self.taxid_list[self.speciesIndex]

        current, matcher_f = self.__genematcher

        if taxid == current and \
                not matcher_f.cancelled():
            return matcher_f

        self._invalidateGeneMatcher()

        if taxid is None:
            self.__genematcher = (None, fulfill(gene.matcher([])))
            return self.__genematcher[1]

        matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy]
        matchers = [m for m, use in zip(matchers, self.geneMatcherSettings)
                    if use]

        def create():
            return gene.matcher([m(taxid) for m in matchers])

        matcher_f = self._executor.submit(create)
        self.__genematcher = (taxid, matcher_f)
        return self.__genematcher[1]

    def _nogenematching(self):
        return self.taxid is None or not any(self.geneMatcherSettings)

    def updateAnnotations(self):
        if self.data is None:
            return

        assert not self.__state & OWSetEnrichment.Initializing
        self._cancelPending()
        self._clearView()

        self.information(0)
        self.warning(0)
        self.error(0)

        if not self.genesinrows and len(self.geneAttrs) == 0:
            self.error(0, "Input data contains no attributes with gene names")
            return

        self.__state = OWSetEnrichment.RunningEnrichment

        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = taxid

        categories = self.selectedCategories()

        clusterGenes = self.genesFromTable(self.data)

        if self.referenceData is not None and self.useReferenceData:
            referenceGenes = self.genesFromTable(self.referenceData)
        else:
            referenceGenes = None

        self.currentAnnotatedCategories = categories

        genematcher = self._genematcher()

        self.progressBarInit()

        ## Load collections in a worker thread
        # TODO: Use cached collections if already loaded and
        # use ensure_genesetsdownloaded with progress report (OWSelectGenes)
        collections = self._executor.submit(geneset.collections, *categories)

        def refset_null():
            """Return the default background reference set"""
            col = collections.result()
            return reduce(operator.ior, (set(g.genes) for g in col), set())

        def refset_ncbi():
            """Return all NCBI gene names"""
            geneinfo = gene.NCBIGeneInfo(taxid)
            return set(geneinfo.keys())

        def namematcher():
            matcher = genematcher.result()
            match = matcher.set_targets(ref_set.result())
            match.umatch = memoize(match.umatch)
            return match

        def map_unames():
            matcher = namematcher.result()
            query = list(filter(None, map(matcher.umatch, querynames)))
            reference = list(filter(None, map(matcher.umatch, ref_set.result())))
            return query, reference

        if self._nogenematching():
            if referenceGenes is None:
                ref_set = self._executor.submit(refset_null)
            else:
                ref_set = fulfill(referenceGenes)
        else:
            if referenceGenes == None:
                ref_set = self._executor.submit(refset_ncbi)
            else:
                ref_set = fulfill(referenceGenes)

        namematcher = self._executor.submit(namematcher)
        querynames = clusterGenes

        state = types.SimpleNamespace()
        state.query_set = clusterGenes
        state.reference_set = referenceGenes
        state.namematcher = namematcher
        state.query_count = len(set(clusterGenes))
        state.reference_count = (len(set(referenceGenes))
                                 if referenceGenes is not None else None)

        state.cancelled = False

        progress = methodinvoke(self, "_setProgress", (float,))
        info = methodinvoke(self, "_setRunInfo", (str,))

        @withtraceback
        def run():
            info("Loading data")
            match = namematcher.result()
            query, reference = map_unames()
            gscollections = collections.result()

            results = []
            info("Running enrichment")
            p = 0
            for i, gset in enumerate(gscollections):
                genes = set(filter(None, map(match.umatch, gset.genes)))
                enr = set_enrichment(genes, reference, query)
                results.append((gset, enr))

                if state.cancelled:
                    raise UserInteruptException

                pnew = int(100 * i / len(gscollections))
                if pnew != p:
                    progress(pnew)
                    p = pnew
            progress(100)
            info("")
            return query, reference, results

        task = Task(function=run)
        task.resultReady.connect(self.__on_enrichment_finished)
        task.exceptionReady.connect(self.__on_enrichment_failed)
        result = self._executor.submit(task)
        state.results = result

        self.state = state
        self._updatesummary()

    def __on_enrichment_failed(self, exception):
        if not isinstance(exception, UserInteruptException):
            print("ERROR:", exception, file=sys.stderr)
            print(exception._traceback, file=sys.stderr)

        self.progressBarFinished()
        self.setStatusMessage("")
        self.__state &= ~OWSetEnrichment.RunningEnrichment

    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results),
                       default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QtGui.QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QtGui.QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QtGui.QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels(
            ["Category", "Term", "Count", "Reference count", "p-value",
             "FDR", "Enrichment"])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped, user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped, user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QtGui.QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit
        )

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(gsname(geneset)
                       for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames),
                           set())

        self.filterCompleter.setModel(None)
        self.completerModel = QtGui.QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max((e.enrichment_score for _, e in results
                             if np.isfinite(e.enrichment_score)),
                            default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score))
            )

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView)
        )

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))
#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")

    def _updatesummary(self):
        state = self.state
        if state is None:
            self.error(0,)
            self.warning(0)
            self.infoBox.setText("No data on input.\n")
            return

        text = "{.query_count} unique names on input\n".format(state)

        if state.results.done() and not state.results.exception():
            mapped, _, _ = state.results.result()
            ratio_mapped = (len(mapped) / state.query_count
                            if state.query_count else 0)
            text += ("%i (%.1f%%) gene names matched" %
                     (len(mapped), 100.0 * ratio_mapped))
        elif not state.results.done():
            text += "..."
        else:
            text += "<Error {}>".format(str(state.results.exception()))
        self.infoBox.setText(text)

        # TODO: warn on no enriched sets found (i.e no query genes
        # mapped to any set)

    def filterAnnotationsChartView(self, filterString=""):
        if self.__state & OWSetEnrichment.RunningEnrichment:
            return

        # TODO: Move filtering to a filter proxy model
        # TODO: Re-enable string search

        categories = set(", ".join(cat)
                         for cat, _ in self.selectedCategories())

#         filterString = str(self.filterLineEdit.text()).lower()

        model = self.annotationsChartView.model()

        def ishidden(index):
            # Is item at index (row) hidden
            item = model.item(index)
            item_cat = item.data(Qt.DisplayRole)
            return item_cat not in categories

        hidemask = [ishidden(i) for i in range(model.rowCount())]

        # compute FDR according the selected categories
        pvals = [model.item(i, 4).data(Qt.UserRole)
                 for i, hidden in enumerate(hidemask) if not hidden]
        fdrs = utils.stats.FDR(pvals)

        # update FDR for the selected collections and apply filtering rules
        itemsHidden = []
        fdriter = iter(fdrs)
        for index, hidden in enumerate(hidemask):
            if not hidden:
                fdr = next(fdriter)
                pval = model.index(index, 4).data(Qt.UserRole)
                count = model.index(index, 2).data(Qt.ToolTipRole)

                hidden = (self.useMinCountFilter and count < self.minClusterCount) or \
                         (self.useMaxPValFilter and pval > self.maxPValue) or \
                         (self.useMaxFDRFilter and fdr > self.maxFDR)

                if not hidden:
                    fdr_item = model.item(index, 5)
                    fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole)
                    fdr_item.setData(fmtp(fdr), Qt.DisplayRole)
                    fdr_item.setData(fdr, Qt.UserRole)

            self.annotationsChartView.setRowHidden(
                index, QModelIndex(), hidden)

            itemsHidden.append(hidden)

        if model.rowCount() and all(itemsHidden):
            self.information(0, "All sets were filtered out.")
        else:
            self.information(0)

        self._updatesummary()

    @Slot(float)
    def _setProgress(self, value):
        assert QThread.currentThread() is self.thread()
        self.progressBarSet(value, processEvents=None)

    @Slot(str)
    def _setRunInfo(self, text):
        self.setStatusMessage(text)

    def commit(self):
        if self.data is None or \
                self.__state & OWSetEnrichment.RunningEnrichment:
            return

        model = self.annotationsChartView.model()
        rows = self.annotationsChartView.selectionModel().selectedRows(0)
        selected = [model.item(index.row(), 0) for index in rows]
        mapped = reduce(operator.ior,
                        (item.enrichment.query_mapped for item in selected),
                        set())
        assert self.state.namematcher.done()
        matcher = self.state.namematcher.result()

        axis = 1 if self.genesinrows else 0
        if axis == 1:
            mapped = [attr for attr in self.data.domain.attributes
                      if matcher.umatch(attr.name) in mapped]

            newdomain = Orange.data.Domain(
                mapped, self.data.domain.class_vars, self.data.domain.metas)
            data = self.data.from_table(newdomain, self.data)
        else:
            geneattr = self.geneAttrs[self.geneattr]
            selected = [i for i, ex in enumerate(self.data)
                        if matcher.umatch(str(ex[geneattr])) in mapped]
            data = self.data[selected]
        self.send("Data subset", data)

    def onDeleteWidget(self):
        if self.state is not None:
            self._cancelPending()
            self.state = None
        self._executor.shutdown(wait=False)
Пример #12
0
class CNNM(OWWidget):
    name = "M CNN"
    description = ""
    # icon = "icons/robot.svg"

    want_main_area = True

    class Inputs:
        data = Input('Data', ImageDataBunch, default=True)

    def __init__(self):
        super().__init__()
        self.learn = None

        # train_button = gui.button(self.controlArea, self, "开始训练", callback=self.train)
        self.label = gui.label(self.mainArea, self, "模型结构")

        #: The current evaluating task (if any)
        self._task = None  # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()

        self.model = nn.Sequential(
            self.conv(1, 8),  # 14
            nn.BatchNorm2d(8),
            nn.ReLU(),
            self.conv(8, 16),  # 7
            nn.BatchNorm2d(16),
            nn.ReLU(),
            self.conv(16, 32),  # 4
            nn.BatchNorm2d(32),
            nn.ReLU(),
            self.conv(32, 16),  # 2
            nn.BatchNorm2d(16),
            nn.ReLU(),
            self.conv(16, 10),  # 1
            nn.BatchNorm2d(10),
            Flatten()  # remove (1,1) grid
        )

    def handleNewSignals(self):
        self._update()

    def _update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        if self.data is None:
            return
        # collect all learners for which results have not yet been computed
        if not self.learn:
            return

        # setup the task state
        self._task = task = Task()
        # The learning_curve[_with_test_data] also takes a callback function
        # to report the progress. We instrument this callback to both invoke
        # the appropriate slots on this widget for reporting the progress
        # (in a thread safe manner) and to implement cooperative cancellation.
        set_progress = methodinvoke(self, "setProgressValue", (float,))

        def callback(finished):
            # check if the task has been cancelled and raise an exception
            # from within. This 'strategy' can only be used with code that
            # properly cleans up after itself in the case of an exception
            # (does not leave any global locks, opened file descriptors, ...)
            if task.cancelled:
                raise KeyboardInterrupt()
            set_progress(finished * 100)

        self.progressBarInit()
        # Submit the evaluation function to the executor and fill in the
        # task with the resultant Future.
        # task.future = self._executor.submit(self.learn.fit_one_cycle(1))

        with progress_disabled_ctx(self.learn) as learn:
            fit_model = partial(my_fit, learn, 1, callback=callback)
            task.future = self._executor.submit(fit_model)
            # Setup the FutureWatcher to notify us of completion
            task.watcher = FutureWatcher(task.future)
            # by using FutureWatcher we ensure `_task_finished` slot will be
            # called from the main GUI thread by the Qt's event loop
            task.watcher.done.connect(self._task_finished)

    @pyqtSlot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the result of learner evaluation.
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        # try:
        #     result = f.result()  # type: List[Results]
        # except Exception as ex:
        #     # Log the exception with a traceback
        #     log = logging.getLogger()
        #     log.exception(__name__, exc_info=True)
        #     self.error("Exception occurred during evaluation: {!r}".format(ex))
        #     # clear all results
        #     self.result= None
        # else:
        print(self.learn.validate())
            # ... and update self.results

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self._task = None

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    def conv(self, ni, nf):
        return nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1)

    def train(self):
        if self.learn is None:
            return
        self.learn.fit_one_cycle(3)

    @Inputs.data
    def set_data(self, data):
        if data is not None:
            self.data = data
            self.learn = Learner(self.data, self.model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy,
                                 add_time=False, bn_wd=False, silent=True)
            self.label.setText(self.learn.summary())
        else:
            self.data = None
class OWImportImages(widget.OWWidget):
    name = "Import Images"
    description = "Import images from a directory(s)"
    icon = "icons/ImportImages.svg"
    priority = 110

    outputs = [("Data", Orange.data.Table)]

    #: list of recent paths
    recent_paths = settings.Setting([])  # type: List[RecentPath]
    currentPath = settings.Setting(None)

    want_main_area = False
    resizing_enabled = False

    Modality = Qt.ApplicationModal
    # Modality = Qt.WindowModal

    MaxRecentItems = 20

    def __init__(self):
        super().__init__()
        #: widget's runtime state
        self.__state = State.NoState
        self._imageMeta = []
        self._imageCategories = {}

        self.__invalidated = False
        self.__pendingTask = None

        vbox = gui.vBox(self.controlArea)
        hbox = gui.hBox(vbox)
        self.recent_cb = QComboBox(
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
            minimumContentsLength=16,
        )
        self.recent_cb.activated[int].connect(self.__onRecentActivated)
        icons = standard_icons(self)

        browseaction = QAction(
            "Open/Load Images",
            self,
            iconText="\N{HORIZONTAL ELLIPSIS}",
            icon=icons.dir_open_icon,
            toolTip="Select a directory from which to load the images")
        browseaction.triggered.connect(self.__runOpenDialog)
        reloadaction = QAction("Reload",
                               self,
                               icon=icons.reload_icon,
                               toolTip="Reload current image set")
        reloadaction.triggered.connect(self.reload)
        self.__actions = namespace(
            browse=browseaction,
            reload=reloadaction,
        )

        browsebutton = QPushButton(browseaction.iconText(),
                                   icon=browseaction.icon(),
                                   toolTip=browseaction.toolTip(),
                                   clicked=browseaction.trigger)
        reloadbutton = QPushButton(
            reloadaction.iconText(),
            icon=reloadaction.icon(),
            clicked=reloadaction.trigger,
            default=True,
        )

        hbox.layout().addWidget(self.recent_cb)
        hbox.layout().addWidget(browsebutton)
        hbox.layout().addWidget(reloadbutton)

        self.addActions([browseaction, reloadaction])

        reloadaction.changed.connect(
            lambda: reloadbutton.setEnabled(reloadaction.isEnabled()))
        box = gui.vBox(vbox, "Info")
        self.infostack = QStackedWidget()

        self.info_area = QLabel(text="No image set selected", wordWrap=True)
        self.progress_widget = QProgressBar(minimum=0, maximum=0)
        self.cancel_button = QPushButton(
            "Cancel",
            icon=icons.cancel_icon,
        )
        self.cancel_button.clicked.connect(self.cancel)

        w = QWidget()
        vlayout = QVBoxLayout()
        vlayout.setContentsMargins(0, 0, 0, 0)
        hlayout = QHBoxLayout()
        hlayout.setContentsMargins(0, 0, 0, 0)

        hlayout.addWidget(self.progress_widget)
        hlayout.addWidget(self.cancel_button)
        vlayout.addLayout(hlayout)

        self.pathlabel = TextLabel()
        self.pathlabel.setTextElideMode(Qt.ElideMiddle)
        self.pathlabel.setAttribute(Qt.WA_MacSmallSize)

        vlayout.addWidget(self.pathlabel)
        w.setLayout(vlayout)

        self.infostack.addWidget(self.info_area)
        self.infostack.addWidget(w)

        box.layout().addWidget(self.infostack)

        self.__initRecentItemsModel()
        self.__invalidated = True
        self.__executor = ThreadExecutor(self)

        QApplication.postEvent(self, QEvent(RuntimeEvent.Init))

    def __initRecentItemsModel(self):
        if self.currentPath is not None and \
                not os.path.isdir(self.currentPath):
            self.currentPath = None

        recent_paths = []
        for item in self.recent_paths:
            if os.path.isdir(item.abspath):
                recent_paths.append(item)
        recent_paths = recent_paths[:OWImportImages.MaxRecentItems]
        recent_model = self.recent_cb.model()
        for pathitem in recent_paths:
            item = RecentPath_asqstandarditem(pathitem)
            recent_model.appendRow(item)

        self.recent_paths = recent_paths

        if self.currentPath is not None and \
                os.path.isdir(self.currentPath) and self.recent_paths and \
                os.path.samefile(self.currentPath, self.recent_paths[0].abspath):
            self.recent_cb.setCurrentIndex(0)
        else:
            self.currentPath = None
            self.recent_cb.setCurrentIndex(-1)
        self.__actions.reload.setEnabled(self.currentPath is not None)

    def customEvent(self, event):
        """Reimplemented."""
        if event.type() == RuntimeEvent.Init:
            if self.__invalidated:
                try:
                    self.start()
                finally:
                    self.__invalidated = False

        super().customEvent(event)

    def __runOpenDialog(self):
        startdir = os.path.expanduser("~/")
        if self.recent_paths:
            startdir = self.recent_paths[0].abspath

        if OWImportImages.Modality == Qt.WindowModal:
            dlg = QFileDialog(
                self,
                "Select Top Level Directory",
                startdir,
                acceptMode=QFileDialog.AcceptOpen,
                modal=True,
            )
            dlg.setFileMode(QFileDialog.Directory)
            dlg.setOption(QFileDialog.ShowDirsOnly)
            dlg.setDirectory(startdir)
            dlg.setAttribute(Qt.WA_DeleteOnClose)

            @dlg.accepted.connect
            def on_accepted():
                dirpath = dlg.selectedFiles()
                if dirpath:
                    self.setCurrentPath(dirpath[0])
                    self.start()

            dlg.open()
        else:
            dirpath = QFileDialog.getExistingDirectory(
                self, "Select Top Level Directory", startdir)
            if dirpath:
                self.setCurrentPath(dirpath)
                self.start()

    def __onRecentActivated(self, index):
        item = self.recent_cb.itemData(index)
        if item is None:
            return
        assert isinstance(item, RecentPath)
        self.setCurrentPath(item.abspath)
        self.start()

    def __updateInfo(self):
        if self.__state == State.NoState:
            text = "No image set selected"
        elif self.__state == State.Processing:
            text = "Processing"
        elif self.__state == State.Done:
            nvalid = sum(imeta.isvalid for imeta in self._imageMeta)
            ncategories = len(self._imageCategories)
            if ncategories < 2:
                text = "{} images".format(nvalid)
            else:
                text = "{} images / {} categories".format(nvalid, ncategories)
        elif self.__state == State.Cancelled:
            text = "Cancelled"
        elif self.__state == State.Error:
            text = "Error state"
        else:
            assert False

        self.info_area.setText(text)

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

    def setCurrentPath(self, path):
        """
        Set the current root image path to path

        If the path does not exists or is not a directory the current path
        is left unchanged

        Parameters
        ----------
        path : str
            New root import path.

        Returns
        -------
        status : bool
            True if the current root import path was successfully
            changed to path.
        """
        if self.currentPath is not None and path is not None and \
                os.path.isdir(self.currentPath) and os.path.isdir(path) and \
                os.path.samefile(self.currentPath, path):
            return True

        if not os.path.exists(path):
            warnings.warn("'{}' does not exist".format(path), UserWarning)
            return False
        elif not os.path.isdir(path):
            warnings.warn("'{}' is not a directory".format(path), UserWarning)
            return False

        newindex = self.addRecentPath(path)
        self.recent_cb.setCurrentIndex(newindex)
        if newindex >= 0:
            self.currentPath = path
        else:
            self.currentPath = None
        self.__actions.reload.setEnabled(self.currentPath is not None)

        if self.__state == State.Processing:
            self.cancel()

        return True

    def addRecentPath(self, path):
        """
        Prepend a path entry to the list of recent paths

        If an entry with the same path already exists in the recent path
        list it is moved to the first place

        Parameters
        ----------
        path : str
        """
        existing = None
        for pathitem in self.recent_paths:
            if os.path.samefile(pathitem.abspath, path):
                existing = pathitem
                break

        model = self.recent_cb.model()

        if existing is not None:
            selected_index = self.recent_paths.index(existing)
            assert model.item(selected_index).data(Qt.UserRole) is existing
            self.recent_paths.remove(existing)
            row = model.takeRow(selected_index)
            self.recent_paths.insert(0, existing)
            model.insertRow(0, row)
        else:
            item = RecentPath(path, None, None)
            self.recent_paths.insert(0, item)
            model.insertRow(0, RecentPath_asqstandarditem(item))
        return 0

    def __setRuntimeState(self, state):
        assert state in State
        self.setBlocking(state == State.Processing)
        message = ""
        if state == State.Processing:
            assert self.__state in [
                State.Done, State.NoState, State.Error, State.Cancelled
            ]
            message = "Processing"
        elif state == State.Done:
            assert self.__state == State.Processing
        elif state == State.Cancelled:
            assert self.__state == State.Processing
            message = "Cancelled"
        elif state == State.Error:
            message = "Error during processing"
        elif state == State.NoState:
            message = ""
        else:
            assert False

        self.__state = state

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

        self.setStatusMessage(message)
        self.__updateInfo()

    def reload(self):
        """
        Restart the image scan task
        """
        if self.__state == State.Processing:
            self.cancel()

        self._imageMeta = []
        self._imageCategories = {}
        self.start()

    def start(self):
        """
        Start/execute the image indexing operation
        """
        self.error()

        self.__invalidated = False
        if self.currentPath is None:
            return

        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            log.info("Starting a new task while one is in progress. "
                     "Cancel the existing task (dir:'{}')".format(
                         self.__pendingTask.startdir))
            self.cancel()

        startdir = self.currentPath

        self.__setRuntimeState(State.Processing)

        report_progress = methodinvoke(self, "__onReportProgress", (object, ))

        task = ImageScan(startdir, report_progress=report_progress)

        # collect the task state in one convenient place
        self.__pendingTask = taskstate = namespace(
            task=task,
            startdir=startdir,
            future=None,
            watcher=None,
            cancelled=False,
            cancel=None,
        )

        def cancel():
            # Cancel the task and disconnect
            if taskstate.future.cancel():
                pass
            else:
                taskstate.task.cancelled = True
                taskstate.cancelled = True
                try:
                    taskstate.future.result(timeout=3)
                except UserInterruptError:
                    pass
                except TimeoutError:
                    log.info("The task did not stop in in a timely manner")
            taskstate.watcher.finished.disconnect(self.__onRunFinished)

        taskstate.cancel = cancel

        def run_image_scan_task_interupt():
            try:
                return task.run()
            except UserInterruptError:
                # Suppress interrupt errors, so they are not logged
                return

        taskstate.future = self.__executor.submit(run_image_scan_task_interupt)
        taskstate.watcher = FutureWatcher(taskstate.future)
        taskstate.watcher.finished.connect(self.__onRunFinished)

    @Slot()
    def __onRunFinished(self):
        assert QThread.currentThread() is self.thread()
        assert self.__state == State.Processing
        assert self.__pendingTask is not None
        assert self.sender() is self.__pendingTask.watcher
        assert self.__pendingTask.future.done()
        task = self.__pendingTask
        self.__pendingTask = None

        try:
            image_meta = task.future.result()
        except Exception as err:
            sys.excepthook(*sys.exc_info())
            state = State.Error
            image_meta = []
            self.error(traceback.format_exc())
        else:
            state = State.Done
            self.error()

        categories = {}

        for imeta in image_meta:
            # derive categories from the path relative to the starting dir
            dirname = os.path.dirname(imeta.path)
            relpath = os.path.relpath(dirname, task.startdir)
            categories[dirname] = relpath

        self._imageMeta = image_meta
        self._imageCategories = categories

        self.__setRuntimeState(state)
        self.commit()

    def cancel(self):
        """
        Cancel current pending task (if any).
        """
        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            self.__pendingTask.cancel()
            self.__pendingTask = None
            self.__setRuntimeState(State.Cancelled)

    @Slot(object)
    def __onReportProgress(self, arg):
        # report on scan progress from a worker thread
        # arg must be a namespace(count: int, lastpath: str)
        assert QThread.currentThread() is self.thread()
        if self.__state == State.Processing:
            self.pathlabel.setText(prettyfypath(arg.lastpath))

    def commit(self):
        """
        Create and commit a Table from the collected image meta data.
        """
        if self._imageMeta:
            categories = self._imageCategories
            if len(categories) > 1:
                cat_var = Orange.data.DiscreteVariable(
                    "category", values=list(sorted(categories.values())))
            else:
                cat_var = None
            # Image name (file basename without the extension)
            imagename_var = Orange.data.StringVariable("image name")
            # Full fs path
            image_var = Orange.data.StringVariable("image")
            image_var.attributes["type"] = "image"
            # file size/width/height
            size_var = Orange.data.ContinuousVariable("size",
                                                      number_of_decimals=0)
            width_var = Orange.data.ContinuousVariable("width",
                                                       number_of_decimals=0)
            height_var = Orange.data.ContinuousVariable("height",
                                                        number_of_decimals=0)
            domain = Orange.data.Domain(
                [], [cat_var] if cat_var is not None else [],
                [imagename_var, image_var, size_var, width_var, height_var])
            cat_data = []
            meta_data = []

            for imgmeta in self._imageMeta:
                if imgmeta.isvalid:
                    if cat_var is not None:
                        category = categories.get(os.path.dirname(
                            imgmeta.path))
                        cat_data.append([cat_var.to_val(category)])
                    else:
                        cat_data.append([])
                    basename = os.path.basename(imgmeta.path)
                    imgname, _ = os.path.splitext(basename)

                    meta_data.append([
                        imgname, imgmeta.path, imgmeta.size, imgmeta.width,
                        imgmeta.height
                    ])

            cat_data = numpy.array(cat_data, dtype=float)
            meta_data = numpy.array(meta_data, dtype=object)
            table = Orange.data.Table.from_numpy(
                domain, numpy.empty((len(cat_data), 0), dtype=float), cat_data,
                meta_data)
        else:
            table = None

        self.send("Data", table)

    def onDeleteWidget(self):
        self.cancel()
        self.__executor.shutdown(wait=True)
Пример #14
0
class OWImportImages(widget.OWWidget):
    name = "Import Images"
    description = "Import images from a directory(s)"
    icon = "icons/ImportImages.svg"
    priority = 110

    outputs = [("Data", Orange.data.Table)]

    #: list of recent paths
    recent_paths = settings.Setting([])  # type: List[RecentPath]

    want_main_area = False
    resizing_enabled = False

    Modality = Qt.ApplicationModal
    # Modality = Qt.WindowModal

    MaxRecentItems = 20

    def __init__(self):
        super().__init__()
        #: widget's runtime state
        self.__state = State.NoState
        self.data = None
        self._n_image_categories = 0
        self._n_image_data = 0
        self._n_skipped = 0

        self.__invalidated = False
        self.__pendingTask = None

        vbox = gui.vBox(self.controlArea)
        hbox = gui.hBox(vbox)
        self.recent_cb = QComboBox(
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
            minimumContentsLength=16,
            acceptDrops=True
        )
        self.recent_cb.installEventFilter(self)
        self.recent_cb.activated[int].connect(self.__onRecentActivated)
        icons = standard_icons(self)

        browseaction = QAction(
            "Open/Load Images", self,
            iconText="\N{HORIZONTAL ELLIPSIS}",
            icon=icons.dir_open_icon,
            toolTip="Select a directory from which to load the images"
        )
        browseaction.triggered.connect(self.__runOpenDialog)
        reloadaction = QAction(
            "Reload", self,
            icon=icons.reload_icon,
            toolTip="Reload current image set"
        )
        reloadaction.triggered.connect(self.reload)
        self.__actions = namespace(
            browse=browseaction,
            reload=reloadaction,
        )

        browsebutton = QPushButton(
            browseaction.iconText(),
            icon=browseaction.icon(),
            toolTip=browseaction.toolTip(),
            clicked=browseaction.trigger
        )
        reloadbutton = QPushButton(
            reloadaction.iconText(),
            icon=reloadaction.icon(),
            clicked=reloadaction.trigger,
            default=True,
        )

        hbox.layout().addWidget(self.recent_cb)
        hbox.layout().addWidget(browsebutton)
        hbox.layout().addWidget(reloadbutton)

        self.addActions([browseaction, reloadaction])

        reloadaction.changed.connect(
            lambda: reloadbutton.setEnabled(reloadaction.isEnabled())
        )
        box = gui.vBox(vbox, "Info")
        self.infostack = QStackedWidget()

        self.info_area = QLabel(
            text="No image set selected",
            wordWrap=True
        )
        self.progress_widget = QProgressBar(
            minimum=0, maximum=0
        )
        self.cancel_button = QPushButton(
            "Cancel", icon=icons.cancel_icon,
        )
        self.cancel_button.clicked.connect(self.cancel)

        w = QWidget()
        vlayout = QVBoxLayout()
        vlayout.setContentsMargins(0, 0, 0, 0)
        hlayout = QHBoxLayout()
        hlayout.setContentsMargins(0, 0, 0, 0)

        hlayout.addWidget(self.progress_widget)
        hlayout.addWidget(self.cancel_button)
        vlayout.addLayout(hlayout)

        self.pathlabel = TextLabel()
        self.pathlabel.setTextElideMode(Qt.ElideMiddle)
        self.pathlabel.setAttribute(Qt.WA_MacSmallSize)

        vlayout.addWidget(self.pathlabel)
        w.setLayout(vlayout)

        self.infostack.addWidget(self.info_area)
        self.infostack.addWidget(w)

        box.layout().addWidget(self.infostack)

        self.__initRecentItemsModel()
        self.__invalidated = True
        self.__executor = ThreadExecutor(self)

        QApplication.postEvent(self, QEvent(RuntimeEvent.Init))

    def __initRecentItemsModel(self):
        self._relocate_recent_files()
        recent_paths = []
        for item in self.recent_paths:
            recent_paths.append(item)
        recent_paths = recent_paths[:OWImportImages.MaxRecentItems]
        recent_model = self.recent_cb.model()
        recent_model.clear()

        for pathitem in recent_paths:
            item = RecentPath_asqstandarditem(pathitem)
            recent_model.appendRow(item)

        self.recent_paths = recent_paths

        if self.recent_paths and os.path.isdir(self.recent_paths[0].abspath):
            self.recent_cb.setCurrentIndex(0)
            self.__actions.reload.setEnabled(True)
        else:
            self.recent_cb.setCurrentIndex(-1)
            self.__actions.reload.setEnabled(False)

    def customEvent(self, event):
        """Reimplemented."""
        if event.type() == RuntimeEvent.Init:
            if self.__invalidated:
                try:
                    self.start()
                finally:
                    self.__invalidated = False

        super().customEvent(event)

    def __runOpenDialog(self):
        startdir = os.path.expanduser("~/")
        if self.recent_paths:
            startdir = os.path.dirname(self.recent_paths[0].abspath)

        if OWImportImages.Modality == Qt.WindowModal:
            dlg = QFileDialog(
                self, "Select Top Level Directory", startdir,
                acceptMode=QFileDialog.AcceptOpen,
                modal=True,
            )
            dlg.setFileMode(QFileDialog.Directory)
            dlg.setOption(QFileDialog.ShowDirsOnly)
            dlg.setDirectory(startdir)
            dlg.setAttribute(Qt.WA_DeleteOnClose)

            @dlg.accepted.connect
            def on_accepted():
                dirpath = dlg.selectedFiles()
                if dirpath:
                    self.setCurrentPath(dirpath[0])
                    self.start()
            dlg.open()
        else:
            dirpath = QFileDialog.getExistingDirectory(
                self, "Select Top Level Directory", startdir
            )
            if dirpath:
                self.setCurrentPath(dirpath)
                self.start()

    def __onRecentActivated(self, index):
        item = self.recent_cb.itemData(index)
        if item is None:
            return
        assert isinstance(item, RecentPath)
        self.setCurrentPath(item.abspath)
        self.start()

    def __updateInfo(self):
        if self.__state == State.NoState:
            text = "No image set selected"
        elif self.__state == State.Processing:
            text = "Processing"
        elif self.__state == State.Done:
            nvalid = self._n_image_data
            ncategories = self._n_image_categories
            n_skipped = self._n_skipped
            if ncategories < 2:
                text = "{} image{}".format(nvalid, "s" if nvalid != 1 else "")
            else:
                text = "{} images / {} categories".format(nvalid, ncategories)
            if n_skipped > 0:
                text = text + ", {} skipped".format(n_skipped)
        elif self.__state == State.Cancelled:
            text = "Cancelled"
        elif self.__state == State.Error:
            text = "Error state"
        else:
            assert False

        self.info_area.setText(text)

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

    def setCurrentPath(self, path):
        """
        Set the current root image path to path

        If the path does not exists or is not a directory the current path
        is left unchanged

        Parameters
        ----------
        path : str
            New root import path.

        Returns
        -------
        status : bool
            True if the current root import path was successfully
            changed to path.
        """
        if self.recent_paths and path is not None and \
                os.path.isdir(self.recent_paths[0].abspath) and os.path.isdir(path) \
                and os.path.samefile(os.path.isdir(self.recent_paths[0].abspath), path):
            return True

        success = True
        error = None
        if path is not None:
            if not os.path.exists(path):
                error = "'{}' does not exist".format(path)
                path = None
                success = False
            elif not os.path.isdir(path):
                error = "'{}' is not a directory".format(path)
                path = None
                success = False

        if error is not None:
            self.error(error)
            warnings.warn(error, UserWarning, stacklevel=3)
        else:
            self.error()

        if path is not None:
            newindex = self.addRecentPath(path)
            self.recent_cb.setCurrentIndex(newindex)

        self.__actions.reload.setEnabled(len(self.recent_paths) > 0)

        if self.__state == State.Processing:
            self.cancel()

        return success

    def _search_paths(self):
        basedir = self.workflowEnv().get("basedir", None)
        if basedir is None:
            return []
        return [("basedir", basedir)]

    def addRecentPath(self, path):
        """
        Prepend a path entry to the list of recent paths

        If an entry with the same path already exists in the recent path
        list it is moved to the first place

        Parameters
        ----------
        path : str
        """
        existing = None
        for pathitem in self.recent_paths:
            try:
                if os.path.samefile(pathitem.abspath, path):
                    existing = pathitem
                    break
            except FileNotFoundError:
                # file not found if the `pathitem.abspath` no longer exists
                pass

        model = self.recent_cb.model()

        if existing is not None:
            selected_index = self.recent_paths.index(existing)
            assert model.item(selected_index).data(Qt.UserRole) is existing
            self.recent_paths.remove(existing)
            row = model.takeRow(selected_index)
            self.recent_paths.insert(0, existing)
            model.insertRow(0, row)
        else:
            item = RecentPath.create(path, self._search_paths())
            self.recent_paths.insert(0, item)
            model.insertRow(0, RecentPath_asqstandarditem(item))
        return 0

    def __setRuntimeState(self, state):
        assert state in State
        self.setBlocking(state == State.Processing)
        message = ""
        if state == State.Processing:
            assert self.__state in [State.Done,
                                    State.NoState,
                                    State.Error,
                                    State.Cancelled]
            message = "Processing"
        elif state == State.Done:
            assert self.__state == State.Processing
        elif state == State.Cancelled:
            assert self.__state == State.Processing
            message = "Cancelled"
        elif state == State.Error:
            message = "Error during processing"
        elif state == State.NoState:
            message = ""
        else:
            assert False

        self.__state = state

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

        self.setStatusMessage(message)
        self.__updateInfo()

    def reload(self):
        """
        Restart the image scan task
        """
        if self.__state == State.Processing:
            self.cancel()

        self.data = None
        self.start()

    def start(self):
        """
        Start/execute the image indexing operation
        """
        self.error()

        self.__invalidated = False
        if not self.recent_paths:
            return

        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            log.info("Starting a new task while one is in progress. "
                     "Cancel the existing task (dir:'{}')"
                     .format(self.__pendingTask.startdir))
            self.cancel()

        startdir = self.recent_paths[0].abspath

        self.__setRuntimeState(State.Processing)

        report_progress = methodinvoke(
            self, "__onReportProgress", (object,))

        task = ImportImages(report_progress=report_progress)

        # collect the task state in one convenient place
        self.__pendingTask = taskstate = namespace(
            task=task,
            startdir=startdir,
            future=None,
            watcher=None,
            cancelled=False,
            cancel=None,
        )

        def cancel():
            # Cancel the task and disconnect
            if taskstate.future.cancel():
                pass
            else:
                taskstate.task.cancelled = True
                taskstate.cancelled = True
                try:
                    taskstate.future.result(timeout=3)
                except UserInterruptError:
                    pass
                except TimeoutError:
                    log.info("The task did not stop in in a timely manner")
            taskstate.watcher.finished.disconnect(self.__onRunFinished)

        taskstate.cancel = cancel

        def run_image_scan_task_interupt():
            try:
                return task(startdir)
            except UserInterruptError:
                # Suppress interrupt errors, so they are not logged
                return

        taskstate.future = self.__executor.submit(run_image_scan_task_interupt)
        taskstate.watcher = FutureWatcher(taskstate.future)
        taskstate.watcher.finished.connect(self.__onRunFinished)

    @Slot()
    def __onRunFinished(self):
        assert QThread.currentThread() is self.thread()
        assert self.__state == State.Processing
        assert self.__pendingTask is not None
        assert self.sender() is self.__pendingTask.watcher
        assert self.__pendingTask.future.done()
        task = self.__pendingTask
        self.__pendingTask = None

        try:
            data, n_skipped = task.future.result()
        except Exception:
            sys.excepthook(*sys.exc_info())
            state = State.Error
            data = None
            n_skipped = 0
            self.error(traceback.format_exc())
        else:
            state = State.Done
            self.error()

        if data:
            self._n_image_data = len(data)
            self._n_image_categories = len(data.domain.class_var.values)\
                if data.domain.class_var else 0
        else:
            self._n_image_data, self._n_image_categories = 0, 0

        self.data = data
        self._n_skipped = n_skipped

        self.__setRuntimeState(state)
        self.commit()

    def cancel(self):
        """
        Cancel current pending task (if any).
        """
        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            self.__pendingTask.cancel()
            self.__pendingTask = None
            self.__setRuntimeState(State.Cancelled)

    @Slot(object)
    def __onReportProgress(self, arg):
        # report on scan progress from a worker thread
        # arg must be a namespace(count: int, lastpath: str)
        assert QThread.currentThread() is self.thread()
        if self.__state == State.Processing:
            self.pathlabel.setText(prettyfypath(arg.lastpath))

    def commit(self):
        """
        Commit a Table from the collected image meta data.
        """
        self.send("Data", self.data)

    def onDeleteWidget(self):
        self.cancel()
        self.__executor.shutdown(wait=True)
        self.__invalidated = False

    def eventFilter(self, receiver, event):
        # re-implemented from QWidget
        # intercept and process drag drop events on the recent directory
        # selection combo box
        def dirpath(event):
            # type: (QDropEvent) -> Optional[str]
            """Return the directory from a QDropEvent."""
            data = event.mimeData()
            urls = data.urls()
            if len(urls) == 1:
                url = urls[0]
                path = url.toLocalFile()
                if path.endswith("/"):
                    path = path[:-1]  # remove last /
                if os.path.isdir(path):
                    return path
            return None

        if receiver is self.recent_cb and \
                event.type() in {QEvent.DragEnter, QEvent.DragMove,
                                 QEvent.Drop}:
            assert isinstance(event, QDropEvent)
            path = dirpath(event)
            if path is not None and event.possibleActions() & Qt.LinkAction:
                event.setDropAction(Qt.LinkAction)
                event.accept()
                if event.type() == QEvent.Drop:
                    self.setCurrentPath(path)
                    self.start()
            else:
                event.ignore()
            return True

        return super().eventFilter(receiver, event)

    def _relocate_recent_files(self):
        search_paths = self._search_paths()
        rec = []
        for recent in self.recent_paths:
            kwargs = dict(
                title=recent.title, sheet=recent.sheet,
                file_format=recent.file_format)
            resolved = recent.resolve(search_paths)
            if resolved is not None:
                rec.append(
                    RecentPath.create(resolved.abspath, search_paths, **kwargs))
            else:
                rec.append(recent)
        # change the list in-place for the case the widgets wraps this list
        self.recent_paths[:] = rec

    def workflowEnvChanged(self, key, value, oldvalue):
        """
        Function called when environment changes (e.g. while saving the scheme)
        It make sure that all environment connected values are modified
        (e.g. relative file paths are changed)
        """
        self.__initRecentItemsModel()
Пример #15
0
class AgentTrainMixin():
    train_results = None
    initial_train_results = None

    trained_episodes = 0
    initial_trained_episodes = 0

    memory = None
    initial_memory = None

    _progress = 0

    def train(self, episodes, seconds, ow_widget, ow_widget_on_finish):
        self.ow_widget = ow_widget
        self.ow_widget_on_finish = ow_widget_on_finish

        self._executor = ThreadExecutor()

        self.ow_widget.progressBarInit()

        def on_progress(self, progress):
            self.on_progress(progress)

        def on_finish(self):
            self.on_finish()

        self._executor.submit(
            partial(self.train_task, episodes, seconds, on_progress,
                    on_finish))

    def train_episode(self):
        done = False

        steps_to_finish = 0
        total_reward = 0

        state = self.environment.reset()

        while not done:
            # pylint: disable=assignment-from-no-return
            action, action_info = self.train_action(state)

            new_state, reward, done, _info = self.environment.step(action)

            self.process_reward(state, action, reward, new_state)

            state = new_state

            steps_to_finish += 1
            total_reward += reward

        return {
            'steps_to_finish': steps_to_finish,
            'total_reward': total_reward,
            'last_action_info': action_info
        }

    def process_reward(self, state, action, reward, new_state):
        pass

    def train_action(self, state):
        pass

    def on_progress(self, progress):
        progress = int(progress)
        # Performance reasons: only update the
        # progress when is realy necessary.
        if progress != self._progress:
            self._progress = progress
            self.ow_widget.progressBarSet(progress)

    def on_finish(self):
        self.ow_widget.progressBarFinished()
        self.ow_widget_on_finish()

    @staticmethod
    def spend_seconds(started_time):
        return time.time() - started_time

    def has_available_time(self, started_time, seconds):
        return self.spend_seconds(started_time) < seconds

    def current_progress(self, started_time, seconds, episodes, interations):
        progress = self._progress

        estimated_seconds = seconds

        spend_seconds = self.spend_seconds(started_time)

        if episodes > 0 and spend_seconds > 0:
            interation_mean_seconds = spend_seconds / interations
            estimated_seconds += episodes * interation_mean_seconds

        if estimated_seconds > 0.0:
            progress = (spend_seconds / estimated_seconds) * 100

            if progress >= 100.0:
                progress = 99.999

        return progress

    def train_task(self, episodes, seconds, on_progress, on_finish):
        episode = 1
        interations = 0

        started_time = time.time()

        self.trained_episodes = self.initial_trained_episodes
        self.train_results = deepcopy(self.initial_train_results)
        self.memory = deepcopy(self.initial_memory)

        while episode <= episodes or self.has_available_time(
                started_time, seconds):
            interations += 1

            on_progress(
                self,
                self.current_progress(started_time, seconds, episodes,
                                      interations))

            self.trained_episodes += 1

            # pylint: disable=assignment-from-no-return
            result = self.train_episode()

            self.train_results = np.append(self.train_results, result)

            if not self.has_available_time(started_time, seconds):
                episode += 1

        on_finish(self)
    def __init__(
        self,
        parent=None,
    ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.map_input_to_ensembl = None
        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n")

        self.organisms = None
        self.organismBox = gui.widgetBox(self.controlArea,
                                         "Organism",
                                         addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox,
            self,
            "organism_index",
            callback=self._onSelectedOrganismChanged)

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(box,
                                             self,
                                             "gene_attr",
                                             "Gene attribute",
                                             callback=self.updateInfoItems)
        self.geneAttrComboBox.setEnabled(not self.useAttr)

        self.geneAttrCheckbox = gui.checkBox(box,
                                             self,
                                             "useAttr",
                                             "Use column names",
                                             callback=self.updateInfoItems)
        self.geneAttrCheckbox.toggled[bool].connect(
            self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea,
                     self,
                     "search_string",
                     "Filter",
                     callbackOnType=True,
                     callback=self.searchUpdate)

        self.treeWidget = QTreeView(self.mainArea)

        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setSelectionMode(QTreeView.ExtendedSelection)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setRootIsDecorated(False)

        self.treeWidget.setItemDelegateForColumn(
            HEADER_SCHEMA['NCBI ID'],
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.setItemDelegateForColumn(
            HEADER_SCHEMA['Ensembl ID'],
            gui.LinkStyledItemDelegate(self.treeWidget))

        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box,
                   self,
                   "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(taxonomy.ensure_downloaded,
                             callback=methodinvoke(self, "advance", ())))

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)
class OWGeneInfo(widget.OWWidget):
    name = "Gene Info"
    description = "Displays gene information from NCBI and other sources."
    icon = "../widgets/icons/OWGeneInfo.svg"
    priority = 5

    class Inputs:
        data = Input("Data", Orange.data.Table)

    class Outputs:
        selected_genes = Output("Selected Genes", Orange.data.Table)
        data = Output("Data", Orange.data.Table)

    settingsHandler = settings.DomainContextHandler()

    organism_index = settings.ContextSetting(0)
    taxid = settings.ContextSetting("9606")

    gene_attr = settings.ContextSetting(0)

    auto_commit = settings.Setting(False)
    search_string = settings.Setting("")

    useAttr = settings.ContextSetting(False)
    useAltSource = settings.ContextSetting(False)

    def __init__(
        self,
        parent=None,
    ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.map_input_to_ensembl = None
        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n")

        self.organisms = None
        self.organismBox = gui.widgetBox(self.controlArea,
                                         "Organism",
                                         addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox,
            self,
            "organism_index",
            callback=self._onSelectedOrganismChanged)

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(box,
                                             self,
                                             "gene_attr",
                                             "Gene attribute",
                                             callback=self.updateInfoItems)
        self.geneAttrComboBox.setEnabled(not self.useAttr)

        self.geneAttrCheckbox = gui.checkBox(box,
                                             self,
                                             "useAttr",
                                             "Use column names",
                                             callback=self.updateInfoItems)
        self.geneAttrCheckbox.toggled[bool].connect(
            self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea,
                     self,
                     "search_string",
                     "Filter",
                     callbackOnType=True,
                     callback=self.searchUpdate)

        self.treeWidget = QTreeView(self.mainArea)

        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setSelectionMode(QTreeView.ExtendedSelection)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setRootIsDecorated(False)

        self.treeWidget.setItemDelegateForColumn(
            HEADER_SCHEMA['NCBI ID'],
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.setItemDelegateForColumn(
            HEADER_SCHEMA['Ensembl ID'],
            gui.LinkStyledItemDelegate(self.treeWidget))

        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box,
                   self,
                   "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(taxonomy.ensure_downloaded,
                             callback=methodinvoke(self, "advance", ())))

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 720)

    @Slot()
    def advance(self):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(self.progressBarValue + 1, processEvents=None)

    def _get_available_organisms(self):
        available_organism = sorted([(tax_id, taxonomy.name(tax_id))
                                     for tax_id in taxonomy.common_taxids()],
                                    key=lambda x: x[1])

        self.organisms = [tax_id[0] for tax_id in available_organism]

        self.organismComboBox.addItems(
            [tax_id[1] for tax_id in available_organism])

    def initialize(self):
        if self.__initialized:
            # Already initialized
            return
        self.__initialized = True

        self._get_available_organisms()
        self.organism_index = self.organisms.index(taxonomy.DEFAULT_ORGANISM)
        self.taxid = self.organisms[self.organism_index]

        self.infoLabel.setText("No data on input\n")
        self.initfuture = None

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)

    def _onInitializeError(self, exc):
        sys.excepthook(type(exc), exc, None)
        self.error(0, "Could not download the necessary files.")

    def _onSelectedOrganismChanged(self):
        assert 0 <= self.organism_index <= len(self.organisms)
        self.taxid = self.organisms[self.organism_index]

        if self.data is not None:
            self.updateInfoItems()

    @Inputs.data
    def setData(self, data=None):
        if not self.__initialized:
            self.initfuture.result()
            self.initialize()

        if self.itemsfuture is not None:
            raise Exception("Already processing")

        self.data = data

        if data is not None:
            self.geneAttrComboBox.clear()
            self.attributes = [
                attr for attr in data.domain.variables + data.domain.metas
                if isinstance(attr, (Orange.data.StringVariable,
                                     Orange.data.DiscreteVariable))
            ]

            for var in self.attributes:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            self.taxid = str(self.data.attributes.get(TAX_ID, ''))
            self.useAttr = self.data.attributes.get(GENE_AS_ATTRIBUTE_NAME,
                                                    self.useAttr)

            self.gene_attr = min(self.gene_attr, len(self.attributes) - 1)

            if self.taxid in self.organisms:
                self.organism_index = self.organisms.index(self.taxid)

            self.updateInfoItems()
        else:
            self.clear()

    def updateInfoItems(self):
        self.warning(0)
        if self.data is None:
            return

        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [
                str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])
            ]
        else:
            genes = []
        if not genes:
            self.warning(0, "Could not extract genes from input dataset.")

        self.warning(1)
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        source_name, info_getter = ("NCBI Info", ncbi_info)

        self.error(0)

        self.progressBarInit()
        self.setBlocking(True)
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving info records.\n")

        self.genes = genes

        task = Task(function=partial(
            info_getter, org, genes, advance=methodinvoke(self, "advance", (
            ))))
        self.itemsfuture = self.executor.submit(task)
        task.finished.connect(self._onItemsCompleted)

    def _onItemsCompleted(self):
        self.setBlocking(False)
        self.progressBarFinished()
        self.setEnabled(True)

        try:
            self.map_input_to_ensembl, geneinfo = self.itemsfuture.result()
        finally:
            self.itemsfuture = None

        self.geneinfo = geneinfo
        self.cells = cells = []
        self.row2geneinfo = {}

        for i, (input_name, gi) in enumerate(geneinfo):
            if gi:
                row = []
                for item in gi:
                    row.append(item)

                # parse synonyms
                row[HEADER_SCHEMA['Synonyms']] = ','.join(
                    row[HEADER_SCHEMA['Synonyms']])
                cells.append(row)
                self.row2geneinfo[len(cells) - 1] = i

        model = TreeModel(cells, list(HEADER_SCHEMA.keys()), None)

        proxyModel = QSortFilterProxyModel(self)
        proxyModel.setSourceModel(model)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(self.commit)

        for i in range(len(HEADER_SCHEMA)):
            self.treeWidget.resizeColumnToContents(i)
            self.treeWidget.setColumnWidth(
                i, min(self.treeWidget.columnWidth(i), 200))

        self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" %
                               (len(self.genes), len(cells)))
        self.matchedInfo = len(self.genes), len(cells)

        if self.useAttr:
            new_data = self.data.from_table(self.data.domain, self.data)

            for gene_var in new_data.domain.attributes:
                gene_var.attributes['Ensembl ID'] = str(
                    self.map_input_to_ensembl[gene_var.name])

            self.Outputs.data.send(new_data)

        elif self.attributes:
            ensembl_ids = []
            for gene_name in self.data.get_column_view(
                    self.attributes[self.gene_attr])[0]:
                if gene_name and gene_name in self.map_input_to_ensembl:
                    ensembl_ids.append(self.map_input_to_ensembl[gene_name])
                else:
                    ensembl_ids.append('')

            data_with_ensembl = append_columns(
                self.data,
                metas=[(Orange.data.StringVariable('Ensembl ID'), ensembl_ids)
                       ])
            self.Outputs.data.send(data_with_ensembl)

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.treeWidget.setModel(
            TreeModel([], [
                "NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description",
                "Synonyms", "Nomenclature"
            ], self.treeWidget))

        self.geneAttrComboBox.clear()
        self.Outputs.selected_genes.send(None)

    def commit(self):
        if self.data is None:
            self.Outputs.selected_genes.send(None)
            self.Outputs.data.send(None)
            return

        model = self.treeWidget.model()
        selection = self.treeWidget.selectionModel().selection()
        selection = model.mapSelectionToSource(selection)
        selectedRows = list(
            chain(*(range(r.top(),
                          r.bottom() + 1) for r in selection)))
        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)
        gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row)
                        for row in selectedRows)

        isselected = selectedIds.__contains__

        if selectedIds:

            if self.useAttr:
                attrs = [
                    attr for attr in self.data.domain.attributes
                    if isselected(attr.name)
                ]
                domain = Orange.data.Domain(attrs, self.data.domain.class_vars,
                                            self.data.domain.metas)
                newdata = self.data.from_table(domain, self.data)

                self.Outputs.selected_genes.send(newdata)

            elif self.attributes:
                attr = self.attributes[self.gene_attr]
                gene_col = [
                    attr.str_val(v) for v in self.data.get_column_view(attr)[0]
                ]
                gene_col = [(i, name) for i, name in enumerate(gene_col)
                            if isselected(name)]
                indices = [i for i, _ in gene_col]

                # SELECTED GENES OUTPUT
                selected_genes_metas = [
                    Orange.data.StringVariable(name)
                    for name in gene.GENE_INFO_HEADER_LABELS
                ]
                selected_genes_domain = Orange.data.Domain(
                    self.data.domain.attributes, self.data.domain.class_vars,
                    self.data.domain.metas + tuple(selected_genes_metas))

                selected_genes_data = self.data.from_table(
                    selected_genes_domain, self.data)[indices]

                model_rows = [gene2row[gene_name] for _, gene_name in gene_col]
                for col, meta in zip(range(model.columnCount()),
                                     selected_genes_metas):
                    col_data = [
                        str(model.index(row, col).data(Qt.DisplayRole))
                        for row in model_rows
                    ]
                    col_data = np.array(col_data, dtype=object, ndmin=2).T
                    selected_genes_data[:, meta] = col_data

                if not len(selected_genes_data):
                    selected_genes_data = None

                self.Outputs.selected_genes.send(selected_genes_data)
        else:
            self.Outputs.selected_genes.send(None)

    def rowFiltered(self, row):
        searchStrings = self.search_string.lower().split()
        row = " ".join(self.cells[row]).lower()
        return not all([s in row for s in searchStrings])

    def searchUpdate(self):
        if not self.data:
            return
        searchStrings = self.search_string.lower().split()
        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            row = " ".join(row).lower()
            self.treeWidget.setRowHidden(
                mapFromSource(index(i, 0)).row(), QModelIndex(),
                not all([s in row for s in searchStrings]))

    def selectFiltered(self):
        if not self.data:
            return
        itemSelection = QItemSelection()

        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            if not self.rowFiltered(i):
                itemSelection.select(mapFromSource(index(i, 0)),
                                     mapFromSource(index(i, 0)))
        self.treeWidget.selectionModel().select(
            itemSelection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)

    def onAltSourceChange(self):
        self.updateInfoItems()

    def onDeleteWidget(self):
        # try to cancel pending tasks
        if self.initfuture:
            self.initfuture.cancel()
        if self.itemsfuture:
            self.itemsfuture.cancel()

        self.executor.shutdown(wait=False)
        super().onDeleteWidget()
Пример #18
0
class OWNNLearner(OWBaseLearner):
    name = "Neural Network"
    description = "A multi-layer perceptron (MLP) algorithm with " \
                  "backpropagation."
    icon = "icons/NN.svg"
    priority = 90
    keywords = ["mlp"]

    LEARNER = NNLearner

    activation = ["identity", "logistic", "tanh", "relu"]
    act_lbl = ["Identity", "Logistic", "tanh", "ReLu"]
    solver = ["lbfgs", "sgd", "adam"]
    solv_lbl = ["L-BFGS-B", "SGD", "Adam"]

    learner_name = Setting("Neural Network")
    hidden_layers_input = Setting("100,")
    activation_index = Setting(3)
    solver_index = Setting(2)
    max_iterations = Setting(200)
    alpha_index = Setting(0)
    settings_version = 1

    alphas = list(chain([x / 10000 for x in range(1, 10)],
                        [x / 1000 for x in range(1, 10)],
                        [x / 100 for x in range(1, 10)],
                        [x / 10 for x in range(1, 10)],
                        range(1, 10),
                        range(10, 100, 5),
                        range(100, 200, 10),
                        range(100, 1001, 50)))

    def add_main_layout(self):
        form = QFormLayout()
        form.setFieldGrowthPolicy(form.AllNonFixedFieldsGrow)
        form.setVerticalSpacing(25)
        gui.widgetBox(self.controlArea, True, orientation=form)
        form.addRow(
            "Neurons in hidden layers:",
            gui.lineEdit(
                None, self, "hidden_layers_input",
                orientation=Qt.Horizontal, callback=self.settings_changed,
                tooltip="A list of integers defining neurons. Length of list "
                        "defines the number of layers. E.g. 4, 2, 2, 3.",
                placeholderText="e.g. 100,"))
        form.addRow(
            "Activation:",
            gui.comboBox(
                None, self, "activation_index", orientation=Qt.Horizontal,
                label="Activation:", items=[i for i in self.act_lbl],
                callback=self.settings_changed))

        form.addRow(" ", gui.separator(None, 16))
        form.addRow(
            "Solver:",
            gui.comboBox(
                None, self, "solver_index", orientation=Qt.Horizontal,
                label="Solver:", items=[i for i in self.solv_lbl],
                callback=self.settings_changed))
        self.reg_label = QLabel()
        slider = gui.hSlider(
            None, self, "alpha_index",
            minValue=0, maxValue=len(self.alphas) - 1,
            callback=lambda: (self.set_alpha(), self.settings_changed()),
            createLabel=False)
        form.addRow(self.reg_label, slider)
        self.set_alpha()

        form.addRow(
            "Maximal number of iterations:",
            gui.spin(
                None, self, "max_iterations", 10, 10000, step=10,
                label="Max iterations:", orientation=Qt.Horizontal,
                alignment=Qt.AlignRight, callback=self.settings_changed))

    def set_alpha(self):
        self.strength_C = self.alphas[self.alpha_index]
        self.reg_label.setText("Regularization, α={}:".format(self.strength_C))

    @property
    def alpha(self):
        return self.alphas[self.alpha_index]

    def setup_layout(self):
        super().setup_layout()

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # just a test cancel button
        gui.button(self.apply_button, self, "Cancel", callback=self.cancel)

    def create_learner(self):
        return self.LEARNER(
            hidden_layer_sizes=self.get_hidden_layers(),
            activation=self.activation[self.activation_index],
            solver=self.solver[self.solver_index],
            alpha=self.alpha,
            max_iter=self.max_iterations,
            preprocessors=self.preprocessors)

    def get_learner_parameters(self):
        return (("Hidden layers", ', '.join(map(str, self.get_hidden_layers()))),
                ("Activation", self.act_lbl[self.activation_index]),
                ("Solver", self.solv_lbl[self.solver_index]),
                ("Alpha", self.alpha),
                ("Max iterations", self.max_iterations))

    def get_hidden_layers(self):
        layers = tuple(map(int, re.findall(r'\d+', self.hidden_layers_input)))
        if not layers:
            layers = (100,)
            self.hidden_layers_input = "100,"
        return layers

    def update_model(self):
        self.show_fitting_failed(None)
        self.model = None
        if self.check_data():
            self.__update()
        else:
            self.Outputs.model.send(self.model)

    @Slot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

    def __update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        max_iter = self.learner.kwargs["max_iter"]

        # Setup the task state
        task = Task()
        lastemitted = 0.

        def callback(iteration):
            nonlocal task  # type: Task
            nonlocal lastemitted
            if task.isInterruptionRequested():
                raise CancelTaskException()
            progress = round(iteration / max_iter * 100)
            if progress != lastemitted:
                task.emitProgressUpdate(progress)
                lastemitted = progress

        # copy to set the callback so that the learner output is not modified
        # (currently we can not pass callbacks to learners __call__)
        learner = copy.copy(self.learner)
        learner.callback = callback

        def build_model(data, learner):
            try:
                return learner(data)
            except CancelTaskException:
                return None

        build_model_func = partial(build_model, self.data, learner)

        task.setFuture(self._executor.submit(build_model_func))
        task.done.connect(self._task_finished)
        task.progressChanged.connect(self.setProgressValue)

        self._task = task
        self.progressBarInit()
        self.setBlocking(True)

    @Slot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the built model
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()
        self._task.deleteLater()
        self._task = None
        self.setBlocking(False)
        self.progressBarFinished()

        try:
            self.model = f.result()
        except Exception as ex:  # pylint: disable=broad-except
            # Log the exception with a traceback
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.model = None
            self.show_fitting_failed(ex)
        else:
            self.model.name = self.learner_name
            self.model.instances = self.data
            self.model.skl_model.orange_callback = None  # remove unpicklable callback
            self.Outputs.model.send(self.model)

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect from the task
            self._task.done.disconnect(self._task_finished)
            self._task.progressChanged.disconnect(self.setProgressValue)
            self._task.deleteLater()
            self._task = None

        self.progressBarFinished()
        self.setBlocking(False)

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            alpha = settings.pop("alpha", None)
            if alpha is not None:
                settings["alpha_index"] = \
                    np.argmin(np.abs(np.array(cls.alphas) - alpha))
Пример #19
0
    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.clusterings = {}

        self.__executor = ThreadExecutor(parent=self)
        self.__task = None  # type: Optional[Task]

        layout = QGridLayout()
        bg = gui.radioButtonsInBox(
            self.controlArea, self, "optimize_k", orientation=layout,
            box="Number of Clusters", callback=self.update_method,
        )

        layout.addWidget(
            gui.appendRadioButton(bg, "Fixed:", addToLayout=False), 1, 1)
        sb = gui.hBox(None, margin=0)
        gui.spin(
            sb, self, "k", minv=2, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight, callback=self.update_k)
        gui.rubber(sb)
        layout.addWidget(sb, 1, 2)

        layout.addWidget(
            gui.appendRadioButton(bg, "From", addToLayout=False), 2, 1)
        ftobox = gui.hBox(None)
        ftobox.layout().setContentsMargins(0, 0, 0, 0)
        layout.addWidget(ftobox, 2, 2)
        gui.spin(
            ftobox, self, "k_from", minv=2, maxv=29,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_from)
        gui.widgetLabel(ftobox, "to")
        gui.spin(
            ftobox, self, "k_to", minv=3, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_to)
        gui.rubber(ftobox)

        box = gui.vBox(self.controlArea, "Initialization")
        gui.comboBox(
            box, self, "smart_init", items=[m[0] for m in self.INIT_METHODS],
            callback=self.invalidate)

        layout = QGridLayout()
        gui.widgetBox(box, orientation=layout)
        layout.addWidget(gui.widgetLabel(None, "Re-runs: "), 0, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 0, 1)
        gui.lineEdit(
            sb, self, "n_init", controlWidth=60,
            valueType=int, validator=QIntValidator(), callback=self.invalidate)
        layout.addWidget(
            gui.widgetLabel(None, "Maximum iterations: "), 1, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 1, 1)
        gui.lineEdit(
            sb, self, "max_iterations", controlWidth=60, valueType=int,
            validator=QIntValidator(), callback=self.invalidate)

        self.apply_button = gui.auto_commit(
            self.buttonsArea, self, "auto_commit", "Apply", box=None,
            commit=self.commit)
        gui.rubber(self.controlArea)

        box = gui.vBox(self.mainArea, box="Silhouette Scores")
        self.mainArea.setVisible(self.optimize_k)
        self.table_model = ClusterTableModel(self)
        table = self.table_view = QTableView(self.mainArea)
        table.setModel(self.table_model)
        table.setSelectionMode(QTableView.SingleSelection)
        table.setSelectionBehavior(QTableView.SelectRows)
        table.setItemDelegate(gui.ColoredBarItemDelegate(self, color=Qt.cyan))
        table.selectionModel().selectionChanged.connect(self.select_row)
        table.setMaximumWidth(200)
        table.horizontalHeader().setStretchLastSection(True)
        table.horizontalHeader().hide()
        table.setShowGrid(False)
        box.layout().addWidget(table)
Пример #20
0
    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.clusterings = {}

        self.__executor = ThreadExecutor(parent=self)
        self.__task = None  # type: Optional[Task]

        layout = QGridLayout()
        bg = gui.radioButtonsInBox(
            self.controlArea, self, "optimize_k", orientation=layout,
            box="Number of Clusters", callback=self.update_method,
        )

        layout.addWidget(
            gui.appendRadioButton(bg, "Fixed:", addToLayout=False), 1, 1)
        sb = gui.hBox(None, margin=0)
        gui.spin(
            sb, self, "k", minv=2, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight, callback=self.update_k)
        gui.rubber(sb)
        layout.addWidget(sb, 1, 2)

        layout.addWidget(
            gui.appendRadioButton(bg, "From", addToLayout=False), 2, 1)
        ftobox = gui.hBox(None)
        ftobox.layout().setContentsMargins(0, 0, 0, 0)
        layout.addWidget(ftobox, 2, 2)
        gui.spin(
            ftobox, self, "k_from", minv=2, maxv=29,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_from)
        gui.widgetLabel(ftobox, "to")
        gui.spin(
            ftobox, self, "k_to", minv=3, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_to)
        gui.rubber(ftobox)

        box = gui.vBox(self.controlArea, "Initialization")
        gui.comboBox(
            box, self, "smart_init", items=[m[0] for m in self.INIT_METHODS],
            callback=self.invalidate)

        layout = QGridLayout()
        gui.widgetBox(box, orientation=layout)
        layout.addWidget(gui.widgetLabel(None, "Re-runs: "), 0, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 0, 1)
        gui.lineEdit(
            sb, self, "n_init", controlWidth=60,
            valueType=int, validator=QIntValidator(), callback=self.invalidate)
        layout.addWidget(
            gui.widgetLabel(None, "Maximum iterations: "), 1, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 1, 1)
        gui.lineEdit(
            sb, self, "max_iterations", controlWidth=60, valueType=int,
            validator=QIntValidator(), callback=self.invalidate)

        self.apply_button = gui.auto_commit(
            self.buttonsArea, self, "auto_commit", "Apply", box=None,
            commit=self.commit)
        gui.rubber(self.controlArea)

        box = gui.vBox(self.mainArea, box="Silhouette Scores")
        self.mainArea.setVisible(self.optimize_k)
        self.table_model = ClusterTableModel(self)
        table = self.table_view = QTableView(self.mainArea)
        table.setModel(self.table_model)
        table.setSelectionMode(QTableView.SingleSelection)
        table.setSelectionBehavior(QTableView.SelectRows)
        table.setItemDelegate(gui.ColoredBarItemDelegate(self, color=Qt.cyan))
        table.selectionModel().selectionChanged.connect(self.select_row)
        table.setMaximumWidth(200)
        table.horizontalHeader().setStretchLastSection(True)
        table.horizontalHeader().hide()
        table.setShowGrid(False)
        box.layout().addWidget(table)
Пример #21
0
class OWTestLearners(OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    UserAdviceMessages = [
        widget.Message(
            "Click on the table header to select shown columns",
            "click_header")]

    settingsHandler = settings.PerfectDomainContextHandler(metas_in_res=True)

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    BUILTIN_ORDER = {
        DiscreteVariable: ("AUC", "CA", "F1", "Precision", "Recall"),
        ContinuousVariable: ("MSE", "RMSE", "MAE", "R2")}

    shown_scores = \
        settings.Setting(set(chain(*BUILTIN_ORDER.values())))

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train data set is empty.")
        test_data_empty = Msg("Test data set is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg("Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train data sets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        only_one_class_var_value = Msg("Target variable has only one value.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_folds", label="Number of folds: ",
            items=[str(x) for x in self.NFolds], maximumContentsLength=3,
            orientation=Qt.Horizontal, callback=self.kfold_changed)
        gui.checkBox(
            ibox, self, "cv_stratified", "Stratified",
            callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(
            order=DomainModel.METAS, valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(
            ibox, self, "fold_feature", model=self.feature_model,
            orientation=Qt.Horizontal, callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_repeats", label="Repeat train/test: ",
            items=[str(x) for x in self.NRepeats], maximumContentsLength=3,
            orientation=Qt.Horizontal, callback=self.shuffle_split_changed)
        gui.comboBox(
            ibox, self, "sample_size", label="Training set size: ",
            items=["{} %".format(x) for x in self.SampleSizes],
            maximumContentsLength=5, orientation=Qt.Horizontal,
            callback=self.shuffle_split_changed)
        gui.checkBox(
            ibox, self, "shuffle_stratified", "Stratified",
            callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(
            wordWrap=True,
        )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)
        header.setContextMenuPolicy(Qt.CustomContextMenu)
        header.customContextMenuRequested.connect(self.show_column_chooser)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestLearners.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestLearners.FeatureFold and not enabled:
            self.resampling = OWTestLearners.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        else:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        self.Error.class_required.clear()
        self.Error.too_many_classes.clear()
        self.Error.only_one_class_var_value.clear()
        if data is not None and not len(data):
            self.Error.train_data_empty()
            data = None
        if data:
            conds = [not data.domain.class_vars,
                     len(data.domain.class_vars) > 1,
                     data.domain.has_discrete_class and len(data.domain.class_var.values) == 1]
            errors = [self.Error.class_required,
                      self.Error.too_many_classes,
                      self.Error.only_one_class_var_value]
            for cond, error in zip(conds, errors):
                if cond:
                    error()
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestLearners.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not len(data):
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {(True, True): " ",  # both, don't specify
                (True, False): " train ",
                (False, True): " test "}[(self.train_data_missing_vals,
                                          self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data is None or self.data.domain.class_var is None:
            self.scorers = []
            return
        class_var = self.data and self.data.domain.class_var
        order = {name: i
                 for i, name in enumerate(self.BUILTIN_ORDER[type(class_var)])}
        # 'abstract' is retrieved from __dict__ to avoid inheriting
        usable = (cls for cls in scoring.Score.registry.values()
                  if cls.is_scalar and not cls.__dict__.get("abstract")
                  and isinstance(class_var, cls.class_types))
        self.scorers = sorted(usable, key=lambda cls: order.get(cls.name, 99))

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self._update_header()
        self._update_stats_model()
        if self.__needupdate:
            self.__update()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestLearners.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()
        self.__update()

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        model = self.result_model
        model.setColumnCount(1 + len(self.scorers))
        for col, score in enumerate(self.scorers):
            item = QStandardItem(score.name)
            item.setToolTip(score.long_name)
            model.setHorizontalHeaderItem(col + 1, item)
        self._update_shown_columns()

    def _update_shown_columns(self):
        # pylint doesn't know that self.shown_scores is a set, not a Setting
        # pylint: disable=unsupported-membership-test
        model = self.result_model
        header = self.view.horizontalHeader()
        for section in range(1, model.columnCount()):
            col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole)
            header.setSectionHidden(section, col_name not in self.shown_scores)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}"
                              .format(name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(
                        slot.results.value, target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [Try(scorer_caller(scorer, ovr_results))
                             for scorer in self.scorers]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        self.fold_feature_selected = \
            self.resampling == OWTestLearners.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole)
                         for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.__needupdate = True

    def show_column_chooser(self, pos):
        # pylint doesn't know that self.shown_scores is a set, not a Setting
        # pylint: disable=unsupported-membership-test
        def update(col_name, checked):
            if checked:
                self.shown_scores.add(col_name)
            else:
                self.shown_scores.remove(col_name)
            self._update_shown_columns()

        menu = QMenu()
        model = self.result_model
        header = self.view.horizontalHeader()
        for section in range(1, model.columnCount()):
            col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole)
            action = menu.addAction(col_name)
            action.setCheckable(True)
            action.setChecked(col_name in self.shown_scores)
            action.triggered.connect(partial(update, col_name))
        menu.exec(header.mapToGlobal(pos))

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [slot for slot in self.learners.values()
                 if slot.results is not None and slot.results.success]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner)
                                      for slot in valid]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".
                      format(stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [("Sampling type",
                      "{}Shuffle split, {} random samples with {}% data "
                      .format(stratified, self.NRepeats[self.n_repeats],
                              self.SampleSizes[self.sample_size]))]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value, processEvents=False)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Warning.test_data_missing.clear()
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestLearners.KFold and \
                len(self.data) < self.NFolds[self.n_folds]:
            self.Error.too_many_folds()
            self.__state = State.Waiting
            self.commit()
            return

        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
        )
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestLearners.KFold:
            folds = self.NFolds[self.n_folds]
            test_f = partial(
                Orange.evaluation.CrossValidation,
                self.data, learners_c, k=folds,
                random_state=rstate, **common_args)
        elif self.resampling == OWTestLearners.FeatureFold:
            test_f = partial(
                Orange.evaluation.CrossValidationFeature,
                self.data, learners_c, self.fold_feature,
                **common_args
            )
        elif self.resampling == OWTestLearners.LeaveOneOut:
            test_f = partial(
                Orange.evaluation.LeaveOneOut,
                self.data, learners_c, **common_args
            )
        elif self.resampling == OWTestLearners.ShuffleSplit:
            train_size = self.SampleSizes[self.sample_size] / 100
            test_f = partial(
                Orange.evaluation.ShuffleSplit,
                self.data, learners_c,
                n_resamples=self.NRepeats[self.n_repeats],
                train_size=train_size, test_size=None,
                stratified=self.shuffle_stratified,
                random_state=rstate, **common_args
            )
        elif self.resampling == OWTestLearners.TestOnTrain:
            test_f = partial(
                Orange.evaluation.TestOnTrainingData,
                self.data, learners_c, **common_args
            )
        elif self.resampling == OWTestLearners.TestOnTest:
            test_f = partial(
                Orange.evaluation.TestOnTestData,
                self.data, self.test_data, learners_c, **common_args
            )
        else:
            assert False, "self.resampling %s" % self.resampling

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[float]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = Task()

        def progress_callback(finished):
            if task.cancelled:
                raise UserInterrupt()
            QMetaObject.invokeMethod(
                self, "setProgressValue", Qt.QueuedConnection,
                Q_ARG(float, 100 * finished)
            )

        def ondone(_):
            QMetaObject.invokeMethod(
                self, "__task_complete", Qt.QueuedConnection,
                Q_ARG(object, task))

        testfunc = partial(testfunc, callback=progress_callback)
        task.future = self.__executor.submit(testfunc)
        task.future.add_done_callback(ondone)

        self.progressBarInit(processEvents=None)
        self.setBlocking(True)
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, task):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        if self.__task is not task:
            assert task.cancelled
            log.debug("Reaping cancelled task: %r", "<>")
            return

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)
        self.setStatusMessage("")
        result = task.future
        assert result.done()
        self.__task = None
        try:
            results = result.result()    # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:
            log.exception("testing error (in __task_complete):",
                          exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er), er)))
            self.__state = State.Done
            return

        self.__state = State.Done

        learner_key = {slot.learner: key for key, slot in
                       self.learners.items()}
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(scorer_caller(scorer, result))
                             for scorer in self.scorers]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self._update_header()
        self._update_stats_model()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            assert task.future.done()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #22
0
class OWKMeans(widget.OWWidget):
    name = "k-Means"
    description = "k-Means clustering algorithm with silhouette-based " \
                  "quality estimation."
    icon = "icons/KMeans.svg"
    priority = 2100

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        annotated_data = Output(
            ANNOTATED_DATA_SIGNAL_NAME, Table, default=True,
            replaces=["Annotated Data"]
        )
        centroids = Output("Centroids", Table)

    class Error(widget.OWWidget.Error):
        failed = widget.Msg("Clustering failed\nError: {}")
        not_enough_data = widget.Msg(
            "Too few ({}) unique data instances for {} clusters"
        )

    class Warning(widget.OWWidget.Warning):
        no_silhouettes = widget.Msg(
            "Silhouette scores are not computed for >{} samples".format(
                SILHOUETTE_MAX_SAMPLES)
        )
        not_enough_data = widget.Msg(
            "Too few ({}) unique data instances for {} clusters"
        )

    INIT_METHODS = (("Initialize with KMeans++", "k-means++"),
                    ("Random initialization", "random"))

    resizing_enabled = False
    buttons_area_orientation = Qt.Vertical

    k = Setting(3)
    k_from = Setting(2)
    k_to = Setting(8)
    optimize_k = Setting(False)
    max_iterations = Setting(300)
    n_init = Setting(10)
    smart_init = Setting(0)  # KMeans++
    auto_commit = Setting(True)

    settings_version = 2

    @classmethod
    def migrate_settings(cls, settings, version):
        # type: (Dict, int) -> None
        if version < 2:
            if 'auto_apply' in settings:
                settings['auto_commit'] = settings.get('auto_apply', True)
                settings.pop('auto_apply', None)

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.clusterings = {}

        self.__executor = ThreadExecutor(parent=self)
        self.__task = None  # type: Optional[Task]

        layout = QGridLayout()
        bg = gui.radioButtonsInBox(
            self.controlArea, self, "optimize_k", orientation=layout,
            box="Number of Clusters", callback=self.update_method,
        )

        layout.addWidget(
            gui.appendRadioButton(bg, "Fixed:", addToLayout=False), 1, 1)
        sb = gui.hBox(None, margin=0)
        gui.spin(
            sb, self, "k", minv=2, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight, callback=self.update_k)
        gui.rubber(sb)
        layout.addWidget(sb, 1, 2)

        layout.addWidget(
            gui.appendRadioButton(bg, "From", addToLayout=False), 2, 1)
        ftobox = gui.hBox(None)
        ftobox.layout().setContentsMargins(0, 0, 0, 0)
        layout.addWidget(ftobox, 2, 2)
        gui.spin(
            ftobox, self, "k_from", minv=2, maxv=29,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_from)
        gui.widgetLabel(ftobox, "to")
        gui.spin(
            ftobox, self, "k_to", minv=3, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_to)
        gui.rubber(ftobox)

        box = gui.vBox(self.controlArea, "Initialization")
        gui.comboBox(
            box, self, "smart_init", items=[m[0] for m in self.INIT_METHODS],
            callback=self.invalidate)

        layout = QGridLayout()
        gui.widgetBox(box, orientation=layout)
        layout.addWidget(gui.widgetLabel(None, "Re-runs: "), 0, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 0, 1)
        gui.lineEdit(
            sb, self, "n_init", controlWidth=60,
            valueType=int, validator=QIntValidator(), callback=self.invalidate)
        layout.addWidget(
            gui.widgetLabel(None, "Maximum iterations: "), 1, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 1, 1)
        gui.lineEdit(
            sb, self, "max_iterations", controlWidth=60, valueType=int,
            validator=QIntValidator(), callback=self.invalidate)

        self.apply_button = gui.auto_commit(
            self.buttonsArea, self, "auto_commit", "Apply", box=None,
            commit=self.commit)
        gui.rubber(self.controlArea)

        box = gui.vBox(self.mainArea, box="Silhouette Scores")
        self.mainArea.setVisible(self.optimize_k)
        self.table_model = ClusterTableModel(self)
        table = self.table_view = QTableView(self.mainArea)
        table.setModel(self.table_model)
        table.setSelectionMode(QTableView.SingleSelection)
        table.setSelectionBehavior(QTableView.SelectRows)
        table.setItemDelegate(gui.ColoredBarItemDelegate(self, color=Qt.cyan))
        table.selectionModel().selectionChanged.connect(self.select_row)
        table.setMaximumWidth(200)
        table.horizontalHeader().setStretchLastSection(True)
        table.horizontalHeader().hide()
        table.setShowGrid(False)
        box.layout().addWidget(table)

    def adjustSize(self):
        self.ensurePolished()
        s = self.sizeHint()
        self.resize(s)

    def update_method(self):
        self.table_model.clear_scores()
        self.commit()

    def update_k(self):
        self.optimize_k = False
        self.table_model.clear_scores()
        self.commit()

    def update_from(self):
        self.k_to = max(self.k_from + 1, self.k_to)
        self.optimize_k = True
        self.table_model.clear_scores()
        self.commit()

    def update_to(self):
        self.k_from = min(self.k_from, self.k_to - 1)
        self.optimize_k = True
        self.table_model.clear_scores()
        self.commit()

    def enough_data_instances(self, k):
        """k cannot be larger than the number of data instances."""
        return len(self.data) >= k

    @staticmethod
    def _compute_clustering(data, k, init, n_init, max_iter, silhouette):
        # type: (Table, int, str, int, int, bool) -> KMeansModel
        if k > len(data):
            raise NotEnoughData()

        return KMeans(
            n_clusters=k, init=init, n_init=n_init, max_iter=max_iter,
            compute_silhouette_score=silhouette,
        )(data)

    @Slot(int, int)
    def __progress_changed(self, n, d):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None
        self.progressBarSet(100 * n / d)

    @Slot(int, Exception)
    def __on_exception(self, idx, ex):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None

        if isinstance(ex, NotEnoughData):
            self.Error.not_enough_data(len(self.data), self.k_from + idx)

        # Only show failed message if there is only 1 k to compute
        elif not self.optimize_k:
            self.Error.failed(str(ex))

        self.clusterings[self.k_from + idx] = str(ex)

    @Slot(int, object)
    def __clustering_complete(self, _, result):
        # type: (int, KMeansModel) -> None
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None

        self.clusterings[result.k] = result

    @Slot()
    def __commit_finished(self):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None
        assert self.data is not None

        self.__task = None
        self.setBlocking(False)
        self.progressBarFinished()

        if self.optimize_k:
            self.update_results()

        if self.optimize_k and all(isinstance(self.clusterings[i], str)
                                   for i in range(self.k_from, self.k_to + 1)):
            # Show the error of the last clustering
            self.Error.failed(self.clusterings[self.k_to])

        self.send_data()

    def __launch_tasks(self, ks):
        # type: (List[int]) -> None
        """Execute clustering in separate threads for all given ks."""
        futures = [self.__executor.submit(
            self._compute_clustering,
            data=self.data,
            k=k,
            init=self.INIT_METHODS[self.smart_init][1],
            n_init=self.n_init,
            max_iter=self.max_iterations,
            silhouette=True,
        ) for k in ks]
        watcher = FutureSetWatcher(futures)
        watcher.resultReadyAt.connect(self.__clustering_complete)
        watcher.progressChanged.connect(self.__progress_changed)
        watcher.exceptionReadyAt.connect(self.__on_exception)
        watcher.doneAll.connect(self.__commit_finished)

        self.__task = Task(futures, watcher)
        self.progressBarInit(processEvents=False)
        self.setBlocking(True)

    def cancel(self):
        if self.__task is not None:
            task, self.__task = self.__task, None
            task.cancel()

            task.watcher.resultReadyAt.disconnect(self.__clustering_complete)
            task.watcher.progressChanged.disconnect(self.__progress_changed)
            task.watcher.exceptionReadyAt.disconnect(self.__on_exception)
            task.watcher.doneAll.disconnect(self.__commit_finished)

            self.progressBarFinished()
            self.setBlocking(False)

    def run_optimization(self):
        if not self.enough_data_instances(self.k_from):
            self.Error.not_enough_data(len(self.data), self.k_from)
            return

        if not self.enough_data_instances(self.k_to):
            self.Warning.not_enough_data(len(self.data), self.k_to)
            return

        needed_ks = [k for k in range(self.k_from, self.k_to + 1)
                     if k not in self.clusterings]

        if needed_ks:
            self.__launch_tasks(needed_ks)
        else:
            # If we don't need to recompute anything, just set the results to
            # what they were before
            self.update_results()

    def cluster(self):
        # Check if the k already has a computed clustering
        if self.k in self.clusterings:
            self.send_data()
            return

        # Check if there is enough data
        if not self.enough_data_instances(self.k):
            self.Error.not_enough_data(len(self.data), self.k)
            return

        self.__launch_tasks([self.k])

    def commit(self):
        self.cancel()
        self.clear_messages()

        # Some time may pass before the new scores are computed, so clear the
        # old scores to avoid potential confusion. Hiding the mainArea could
        # cause flickering when the clusters are computed quickly, so this is
        # the better alternative
        self.table_model.clear_scores()
        self.mainArea.setVisible(self.optimize_k and self.data is not None)

        if self.data is None:
            self.send_data()
            return

        if self.optimize_k:
            self.run_optimization()
        else:
            self.cluster()

        QTimer.singleShot(100, self.adjustSize)

    def invalidate(self):
        self.cancel()
        self.Error.clear()
        self.Warning.clear()
        self.clusterings = {}
        self.table_model.clear_scores()

        self.commit()

    def update_results(self):
        scores = [
            mk if isinstance(mk, str) else mk.silhouette for mk in (
                self.clusterings[k] for k in range(self.k_from, self.k_to + 1))
        ]
        best_row = max(
            range(len(scores)), default=0,
            key=lambda x: 0 if isinstance(scores[x], str) else scores[x]
        )
        self.table_model.set_scores(scores, self.k_from)
        self.table_view.selectRow(best_row)
        self.table_view.setFocus(Qt.OtherFocusReason)
        self.table_view.resizeRowsToContents()

    def selected_row(self):
        indices = self.table_view.selectedIndexes()
        if indices:
            return indices[0].row()

    def select_row(self):
        self.send_data()

    def send_data(self):
        if self.optimize_k:
            row = self.selected_row()
            k = self.k_from + row if row is not None else None
        else:
            k = self.k

        km = self.clusterings.get(k)
        if self.data is None or km is None or isinstance(km, str):
            self.Outputs.annotated_data.send(None)
            self.Outputs.centroids.send(None)
            return

        domain = self.data.domain
        cluster_var = DiscreteVariable(
            get_next_name(domain, "Cluster"),
            values=["C%d" % (x + 1) for x in range(km.k)]
        )
        clust_ids = km(self.data)
        silhouette_var = ContinuousVariable(get_next_name(domain, "Silhouette"))
        if km.silhouette_samples is not None:
            self.Warning.no_silhouettes.clear()
            scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
        else:
            self.Warning.no_silhouettes()
            scores = np.nan

        new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = clust_ids.X.ravel()
        new_table.get_column_view(silhouette_var)[0][:] = scores

        centroids = Table(Domain(km.pre_domain.attributes), km.centroids)

        self.Outputs.annotated_data.send(new_table)
        self.Outputs.centroids.send(centroids)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.data, old_data = data, self.data

        # Do not needlessly recluster the data if X hasn't changed
        if old_data and self.data and np.array_equal(self.data.X, old_data.X):
            if self.auto_commit:
                self.send_data()
        else:
            self.invalidate()

    def send_report(self):
        # False positives (Setting is not recognized as int)
        # pylint: disable=invalid-sequence-index
        if self.optimize_k and self.selected_row() is not None:
            k_clusters = self.k_from + self.selected_row()
        else:
            k_clusters = self.k
        init_method = self.INIT_METHODS[self.smart_init][0]
        init_method = init_method[0].lower() + init_method[1:]
        self.report_items((
            ("Number of clusters", k_clusters),
            ("Optimization", "{}, {} re-runs limited to {} steps".format(
                init_method, self.n_init, self.max_iterations))))
        if self.data is not None:
            self.report_data("Data", self.data)
            if self.optimize_k:
                self.report_table(
                    "Silhouette scores for different numbers of clusters",
                    self.table_view)

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #23
0
class OWGeneInfo(widget.OWWidget):
    name = "Gene Info"
    description = "Displays gene information from NCBI and other sources."
    icon = "../widgets/icons/GeneInfo.svg"
    priority = 2010

    inputs = [("Data", Orange.data.Table, "setData")]
    outputs = [("Data Subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organism_index = settings.ContextSetting(0)
    taxid = settings.ContextSetting("9606")

    gene_attr = settings.ContextSetting(0)

    auto_commit = settings.Setting(False)
    search_string = settings.Setting("")

    useAttr = settings.ContextSetting(False)
    useAltSource = settings.ContextSetting(False)

    def __init__(self, parent=None, ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n"
        )

        self.organisms = None
        self.organismBox = gui.widgetBox(
            self.controlArea, "Organism", addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox, self, "organism_index",
            callback=self._onSelectedOrganismChanged)

        # For now only support one alt source, with a checkbox
        # In the future this can be extended to multiple selections
        self.altSourceCheck = gui.checkBox(
            self.organismBox, self, "useAltSource",
            "Show information from dictyBase",
            callback=self.onAltSourceChange)

        self.altSourceCheck.hide()

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(
            box, self, "gene_attr",
            "Gene attribute", callback=self.updateInfoItems
        )
        self.geneAttrComboBox.setEnabled(not self.useAttr)
        cb = gui.checkBox(box, self, "useAttr", "Use attribute names",
                          callback=self.updateInfoItems)
        cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        # A label for dictyExpress link (Why oh god why???)
        self.dictyExpressBox = gui.widgetBox(
            self.controlArea, "Dicty Express")
        self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "")
        self.linkLabel.setOpenExternalLinks(False)
        self.linkLabel.linkActivated.connect(self.onDictyExpressLink)

        self.dictyExpressBox.hide()

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea, self, "search_string", "Filter",
                     callbackOnType=True, callback=self.searchUpdate)

        self.treeWidget = QTreeView(
            self.mainArea,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            uniformRowHeights=True,
            sortingEnabled=True)

        self.treeWidget.setItemDelegate(
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box, self, "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(
                taxonomy.ensure_downloaded,
                callback=methodinvoke(self, "advance", ())
            )
        )

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 720)

    @Slot()
    def advance(self):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(self.progressBarValue + 1,
                            processEvents=None)

    def initialize(self):
        if self.__initialized:
            # Already initialized
            return
        self.__initialized = True

        self.organisms = sorted(
            set([name.split(".")[-2] for name in
                 serverfiles.listfiles("NCBI_geneinfo")] +
                gene.NCBIGeneInfo.common_taxids())
        )

        self.organismComboBox.addItems(
            [taxonomy.name(tax_id) for tax_id in self.organisms]
        )
        if self.taxid in self.organisms:
            self.organism_index = self.organisms.index(self.taxid)
        else:
            self.organism_index = 0
            self.taxid = self.organisms[self.organism_index]

        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        self.infoLabel.setText("No data on input\n")
        self.initfuture = None

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)

    def _onInitializeError(self, exc):
        sys.excepthook(type(exc), exc.args, None)
        self.error(0, "Could not download the necessary files.")

    def _onSelectedOrganismChanged(self):
        assert 0 <= self.organism_index <= len(self.organisms)
        self.taxid = self.organisms[self.organism_index]
        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        if self.data is not None:
            self.updateInfoItems()

    def setData(self, data=None):
        if not self.__initialized:
            self.initfuture.result()
            self.initialize()

        if self.itemsfuture is not None:
            raise Exception("Already processing")

        self.closeContext()
        self.data = data

        if data is not None:
            self.geneAttrComboBox.clear()
            self.attributes = \
                [attr for attr in data.domain.variables + data.domain.metas
                 if isinstance(attr, (Orange.data.StringVariable,
                                      Orange.data.DiscreteVariable))]

            for var in self.attributes:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid)
            self.useAttr = data_hints.get_hint(
                self.data, "genesinrows", self.useAttr)

            self.openContext(data)
            self.gene_attr = min(self.gene_attr, len(self.attributes) - 1)

            if self.taxid in self.organisms:
                self.organism_index = self.organisms.index(self.taxid)
            else:
                self.organism_index = 0
                self.taxid = self.organisms[self.organism_index]

            self.updateInfoItems()
        else:
            self.clear()

    def infoSource(self):
        """ Return the current selected info source getter function from
        INFO_SOURCES
        """
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        if org not in INFO_SOURCES:
            org = "default"
        sources = INFO_SOURCES[org]
        name, func = sources[min(self.useAltSource, len(sources) - 1)]
        return name, func

    def inputGenes(self):
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        return genes

    def updateInfoItems(self):
        self.warning(0)
        if self.data is None:
            return

        genes = self.inputGenes()
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        if not genes:
            self.warning(0, "Could not extract genes from input dataset.")

        self.warning(1)
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        source_name, info_getter = self.infoSource()

        self.error(0)

        self.updateDictyExpressLink(genes, show=org == DICTY_TAXID)
        self.altSourceCheck.setVisible(org == DICTY_TAXID)

        self.progressBarInit()
        self.setBlocking(True)
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving info records.\n")

        self.genes = genes

        task = Task(
            function=partial(
                info_getter, org, genes,
                advance=methodinvoke(self, "advance", ()))
        )
        self.itemsfuture = self.executor.submit(task)
        task.finished.connect(self._onItemsCompleted)

    def _onItemsCompleted(self):
        self.setBlocking(False)
        self.progressBarFinished()
        self.setEnabled(True)

        try:
            schema, geneinfo = self.itemsfuture.result()
        finally:
            self.itemsfuture = None

        self.geneinfo = geneinfo = list(zip(self.genes, geneinfo))
        self.cells = cells = []
        self.row2geneinfo = {}
        links = []
        for i, (_, gi) in enumerate(geneinfo):
            if gi:
                row = []
                for _, item in zip(schema, gi):
                    if isinstance(item, Link):
                        # TODO: This should be handled by delegates
                        row.append(item.text)
                        links.append(item.link)
                    else:
                        row.append(item)
                cells.append(row)
                self.row2geneinfo[len(cells) - 1] = i

        model = TreeModel(cells, [str(col) for col in schema], None)

        model.setColumnLinks(0, links)
        proxyModel = QSortFilterProxyModel(self)
        proxyModel.setSourceModel(model)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(self.commit)

        for i in range(7):
            self.treeWidget.resizeColumnToContents(i)
            self.treeWidget.setColumnWidth(
                i, min(self.treeWidget.columnWidth(i), 200)
            )

        self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" %
                               (len(self.genes), len(cells)))
        self.matchedInfo = len(self.genes), len(cells)

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.treeWidget.setModel(
            TreeModel([], ["NCBI ID", "Symbol", "Locus Tag",
                           "Chromosome", "Description", "Synonyms",
                           "Nomenclature"], self.treeWidget))

        self.geneAttrComboBox.clear()
        self.send("Data Subset", None)

    def commit(self):
        if self.data is None:
            self.send("Data Subset", None)
            return

        model = self.treeWidget.model()
        selection = self.treeWidget.selectionModel().selection()
        selection = model.mapSelectionToSource(selection)
        selectedRows = list(
            chain(*(range(r.top(), r.bottom() + 1) for r in selection))
        )

        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)
        gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row)
                        for row in selectedRows)

        isselected = selectedIds.__contains__

        if self.useAttr:
            def is_selected(attr):
                return attr.name in selectedIds
            attrs = [attr for attr in self.data.domain.attributes
                     if isselected(attr.name)]
            domain = Orange.data.Domain(
                attrs, self.data.domain.class_vars, self.data.domain.metas)
            newdata = self.data.from_table(domain, self.data)
            self.send("Data Subset", newdata)

        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            gene_col = [attr.str_val(v)
                        for v in self.data.get_column_view(attr)[0]]
            gene_col = [(i, name) for i, name in enumerate(gene_col)
                        if isselected(name)]
            indices = [i for i, _ in gene_col]

            # Add a gene info columns to the output
            headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole))
                       for i in range(model.columnCount())]
            metas = [Orange.data.StringVariable(name) for name in headers]
            domain = Orange.data.Domain(
                self.data.domain.attributes, self.data.domain.class_vars,
                self.data.domain.metas + tuple(metas))

            newdata = self.data.from_table(domain, self.data)[indices]

            model_rows = [gene2row[gene] for _, gene in gene_col]
            for col, meta in zip(range(model.columnCount()), metas):
                col_data = [str(model.index(row, col).data(Qt.DisplayRole))
                            for row in model_rows]
                col_data = np.array(col_data, dtype=object, ndmin=2).T
                newdata[:, meta] = col_data

            if not len(newdata):
                newdata = None

            self.send("Data Subset", newdata)
        else:
            self.send("Data Subset", None)

    def rowFiltered(self, row):
        searchStrings = self.search_string.lower().split()
        row = " ".join(self.cells[row]).lower()
        return not all([s in row for s in searchStrings])

    def searchUpdate(self):
        if not self.data:
            return
        searchStrings = self.search_string.lower().split()
        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            row = " ".join(row).lower()
            self.treeWidget.setRowHidden(
                mapFromSource(index(i, 0)).row(),
                QModelIndex(),
                not all([s in row for s in searchStrings]))

    def selectFiltered(self):
        if not self.data:
            return
        itemSelection = QItemSelection()

        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            if not self.rowFiltered(i):
                itemSelection.select(mapFromSource(index(i, 0)),
                                     mapFromSource(index(i, 0)))
        self.treeWidget.selectionModel().select(
            itemSelection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)

    def updateDictyExpressLink(self, genes, show=False):
        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None
        if show:
            genes = [fix(gene) for gene in genes if fix(gene)]
            link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>'
            link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>'
            self.linkLabel.setText(link1 + "<br/>" + link2)

            show = any(genes)

        if show:
            self.dictyExpressBox.show()
        else:
            self.dictyExpressBox.hide()

    def onDictyExpressLink(self, link):
        if not self.data:
            return

        selectedIndexes = self.treeWidget.selectedIndexes()
        if not len(selectedIndexes):
            QMessageBox.information(
                self, "No gene ids selected",
                "Please select some genes and try again."
            )
            return
        model = self.treeWidget.model()
        mapToSource = model.mapToSource
        selectedRows = self.treeWidget.selectedIndexes()
        selectedRows = [mapToSource(index).row() for index in selectedRows]
        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)

        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None

        genes = [fix(gene) for gene in selectedIds if fix(gene)]
        url = str(link) % " ".join(genes)
        QDesktopServices.openUrl(QUrl(url))

    def onAltSourceChange(self):
        self.updateInfoItems()

    def onDeleteWidget(self):
        # try to cancel pending tasks
        if self.initfuture:
            self.initfuture.cancel()
        if self.itemsfuture:
            self.itemsfuture.cancel()

        self.executor.shutdown(wait=False)
        super().onDeleteWidget()
Пример #24
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(wordWrap=True, )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)
        header.setContextMenuPolicy(Qt.CustomContextMenu)
        header.customContextMenuRequested.connect(self.show_column_chooser)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)
Пример #25
0
class OWLearningCurveC(widget.OWWidget):
    name = "Learning Curve (C)"
    description = ("Takes a dataset and a set of learners and shows a "
                   "learning curve in a table")
    icon = "icons/LearningCurve.svg"
    priority = 1010

    inputs = [("Data", Orange.data.Table, "set_dataset", widget.Default),
              ("Test Data", Orange.data.Table, "set_testdataset"),
              ("Learner", Orange.classification.Learner, "set_learner",
               widget.Multiple + widget.Default)]

    #: cross validation folds
    folds = settings.Setting(5)
    #: points in the learning curve
    steps = settings.Setting(10)
    #: index of the selected scoring function
    scoringF = settings.Setting(0)
    #: compute curve on any change of parameters
    commitOnChange = settings.Setting(True)

    def __init__(self):
        super().__init__()

        # sets self.curvePoints, self.steps equidistant points from
        # 1/self.steps to 1
        self.updateCurvePoints()

        self.scoring = [
            ("Classification Accuracy", Orange.evaluation.scoring.CA),
            ("AUC", Orange.evaluation.scoring.AUC),
            ("Precision", Orange.evaluation.scoring.Precision),
            ("Recall", Orange.evaluation.scoring.Recall)
        ]
        #: input data on which to construct the learning curve
        self.data = None
        #: optional test data
        self.testdata = None
        #: A {input_id: Learner} mapping of current learners from input channel
        self.learners = OrderedDict()
        #: A {input_id: List[Results]} mapping of input id to evaluation
        #: results list, one for each curve point
        self.results = OrderedDict()
        #: A {input_id: List[float]} mapping of input id to learning curve
        #: point scores
        self.curves = OrderedDict()

        # [start-snippet-3]
        #: The current evaluating task (if any)
        self._task = None   # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()
        # [end-snippet-3]

        # GUI
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoa = gui.widgetLabel(box, 'No data on input.')
        self.infob = gui.widgetLabel(box, 'No learners.')

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Evaluation Scores")
        gui.comboBox(box, self, "scoringF",
                     items=[x[0] for x in self.scoring],
                     callback=self._invalidate_curves)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Options")
        gui.spin(box, self, 'folds', 2, 100, step=1,
                 label='Cross validation folds:  ', keyboardTracking=False,
                 callback=lambda:
                    self._invalidate_results() if self.commitOnChange else None
        )
        gui.spin(box, self, 'steps', 2, 100, step=1,
                 label='Learning curve points:  ', keyboardTracking=False,
                 callback=[self.updateCurvePoints,
                           lambda: self._invalidate_results() if self.commitOnChange else None])
        gui.checkBox(box, self, 'commitOnChange', 'Apply setting on any change')
        self.commitBtn = gui.button(box, self, "Apply Setting",
                                    callback=self._invalidate_results,
                                    disabled=True)

        gui.rubber(self.controlArea)

        # table widget
        self.table = gui.table(self.mainArea,
                               selectionMode=QTableWidget.NoSelection)

    ##########################################################################
    # slots: handle input signals

    def set_dataset(self, data):
        """Set the input train dataset."""
        # Clear all results/scores
        for id in list(self.results):
            self.results[id] = None
        for id in list(self.curves):
            self.curves[id] = None

        self.data = data

        if data is not None:
            self.infoa.setText('%d instances in input dataset' % len(data))
        else:
            self.infoa.setText('No data on input.')

        self.commitBtn.setEnabled(self.data is not None)

    def set_testdataset(self, testdata):
        """Set a separate test dataset."""
        # Clear all results/scores
        for id in list(self.results):
            self.results[id] = None
        for id in list(self.curves):
            self.curves[id] = None

        self.testdata = testdata

    def set_learner(self, learner, id):
        """Set the input learner for channel id."""
        if id in self.learners:
            if learner is None:
                # remove a learner and corresponding results
                del self.learners[id]
                del self.results[id]
                del self.curves[id]
            else:
                # update/replace a learner on a previously connected link
                self.learners[id] = learner
                # invalidate the cross-validation results and curve scores
                # (will be computed/updated in `_update`)
                self.results[id] = None
                self.curves[id] = None
        else:
            if learner is not None:
                self.learners[id] = learner
                # initialize the cross-validation results and curve scores
                # (will be computed/updated in `_update`)
                self.results[id] = None
                self.curves[id] = None

        if len(self.learners):
            self.infob.setText("%d learners on input." % len(self.learners))
        else:
            self.infob.setText("No learners.")

        self.commitBtn.setEnabled(len(self.learners))

# [start-snippet-4]
    def handleNewSignals(self):
        self._update()
# [end-snippet-4]

    def _invalidate_curves(self):
        if self.data is not None:
            self._update_curve_points()
        self._update_table()

    def _invalidate_results(self):
        for id in self.learners:
            self.curves[id] = None
            self.results[id] = None
        self._update()

# [start-snippet-5]
    def _update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        if self.data is None:
            return
        # collect all learners for which results have not yet been computed
        need_update = [(id, learner) for id, learner in self.learners.items()
                       if self.results[id] is None]
        if not need_update:
            return
# [end-snippet-5]
# [start-snippet-6]
        learners = [learner for _, learner in need_update]
        # setup the learner evaluations as partial function capturing
        # the necessary arguments.
        if self.testdata is None:
            learning_curve_func = partial(
                learning_curve,
                learners, self.data, folds=self.folds,
                proportions=self.curvePoints,
            )
        else:
            learning_curve_func = partial(
                learning_curve_with_test_data,
                learners, self.data, self.testdata, times=self.folds,
                proportions=self.curvePoints,
            )
# [end-snippet-6]
# [start-snippet-7]
        # setup the task state
        self._task = task = Task()
        # The learning_curve[_with_test_data] also takes a callback function
        # to report the progress. We instrument this callback to both invoke
        # the appropriate slots on this widget for reporting the progress
        # (in a thread safe manner) and to implement cooperative cancellation.
        set_progress = methodinvoke(self, "setProgressValue", (float,))

        def callback(finished):
            # check if the task has been cancelled and raise an exception
            # from within. This 'strategy' can only be used with code that
            # properly cleans up after itself in the case of an exception
            # (does not leave any global locks, opened file descriptors, ...)
            if task.cancelled:
                raise KeyboardInterrupt()
            set_progress(finished * 100)

        # capture the callback in the partial function
        learning_curve_func = partial(learning_curve_func, callback=callback)
# [end-snippet-7]
# [start-snippet-8]
        self.progressBarInit()
        # Submit the evaluation function to the executor and fill in the
        # task with the resultant Future.
        task.future = self._executor.submit(learning_curve_func)
        # Setup the FutureWatcher to notify us of completion
        task.watcher = FutureWatcher(task.future)
        # by using FutureWatcher we ensure `_task_finished` slot will be
        # called from the main GUI thread by the Qt's event loop
        task.watcher.done.connect(self._task_finished)
# [end-snippet-8]

# [start-snippet-progress]
    @pyqtSlot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)
# [end-snippet-progress]

# [start-snippet-9]
    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the result of learner evaluation.
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progressBarFinished()

        try:
            results = f.result()  # type: List[Results]
        except Exception as ex:
            # Log the exception with a traceback
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.error("Exception occurred during evaluation: {!r}"
                       .format(ex))
            # clear all results
            for key in self.results.keys():
                self.results[key] = None
        else:
            # split the combined result into per learner/model results ...
            results = [list(Results.split_by_model(p_results))
                       for p_results in results]  # type: List[List[Results]]
            assert all(len(r.learners) == 1 for r1 in results for r in r1)
            assert len(results) == len(self.curvePoints)

            learners = [r.learners[0] for r in results[0]]
            learner_id = {learner: id_ for id_, learner in self.learners.items()}

            # ... and update self.results
            for i, learner in enumerate(learners):
                id_ = learner_id[learner]
                self.results[id_] = [p_results[i] for p_results in results]
# [end-snippet-9]
        # update the display
        self._update_curve_points()
        self._update_table()
# [end-snippet-9]

# [start-snippet-10]
    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self._task = None
# [end-snippet-10]

# [start-snippet-11]
    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
# [end-snippet-11]

    def _update_curve_points(self):
        for id in self.learners:
            curve = [self.scoring[self.scoringF][1](x)[0]
                     for x in self.results[id]]
            self.curves[id] = curve

    def _update_table(self):
        self.table.setRowCount(0)
        self.table.setRowCount(len(self.curvePoints))
        self.table.setColumnCount(len(self.learners))

        self.table.setHorizontalHeaderLabels(
            [learner.name for _, learner in self.learners.items()])
        self.table.setVerticalHeaderLabels(
            ["{:.2f}".format(p) for p in self.curvePoints])

        if self.data is None:
            return

        for column, curve in enumerate(self.curves.values()):
            for row, point in enumerate(curve):
                self.table.setItem(
                    row, column, QTableWidgetItem("{:.5f}".format(point)))

        for i in range(len(self.learners)):
            sh = self.table.sizeHintForColumn(i)
            cwidth = self.table.columnWidth(i)
            self.table.setColumnWidth(i, max(sh, cwidth))

    def updateCurvePoints(self):
        self.curvePoints = [(x + 1.)/self.steps for x in range(self.steps)]
Пример #26
0
class OWTestAndScore(OWWidget):
    name = "Test and Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100
    keywords = ['Cross Validation', 'CV']
    replaces = ["Orange.widgets.evaluate.owtestlearners.OWTestLearners"]

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    UserAdviceMessages = [
        widget.Message(
            "Click on the table header to select shown columns",
            "click_header")]

    settingsHandler = settings.PerfectDomainContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(2)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    use_rope = settings.Setting(False)
    rope = settings.Setting(0.1)
    comparison_criterion = settings.Setting(0, schema_only=True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        test_data_empty = Msg("Test dataset is empty.")
        class_required_test = Msg("Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train datasets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        test_data_incompatible = Msg(
            "Test data may be incompatible with train data.")
        train_data_error = Msg("{}")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")
        test_data_transformed = Msg(
            "Test data has been transformed to match the train data.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []
        self.__pending_comparison_criterion = self.comparison_criterion

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[TaskState]
        self.__executor = ThreadExecutor()

        self.info.set_input_summary(self.info.NoInput)
        self.info.set_output_summary(self.info.NoOutput)

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_folds", label="Number of folds: ",
            items=[str(x) for x in self.NFolds],
            orientation=Qt.Horizontal, callback=self.kfold_changed)
        gui.checkBox(
            ibox, self, "cv_stratified", "Stratified",
            callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(
            order=DomainModel.METAS, valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(
            ibox, self, "fold_feature", model=self.feature_model,
            orientation=Qt.Horizontal, searchable=True,
            callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_repeats", label="Repeat train/test: ",
            items=[str(x) for x in self.NRepeats], orientation=Qt.Horizontal,
            callback=self.shuffle_split_changed
        )
        gui.comboBox(
            ibox, self, "sample_size", label="Training set size: ",
            items=["{} %".format(x) for x in self.SampleSizes],
            orientation=Qt.Horizontal, callback=self.shuffle_split_changed
        )
        gui.checkBox(
            ibox, self, "shuffle_stratified", "Stratified",
            callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, contentsLength=8, searchable=True,
            callback=self._on_target_class_changed
        )

        self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison")
        gui.comboBox(
            box, self, "comparison_criterion",
            callback=self.update_comparison_table)

        hbox = gui.hBox(box)
        gui.checkBox(hbox, self, "use_rope",
                     "Negligible difference: ",
                     callback=self._on_use_rope_changed)
        gui.lineEdit(hbox, self, "rope", validator=QDoubleValidator(),
                     controlWidth=70, callback=self.update_comparison_table,
                     alignment=Qt.AlignRight)
        self.controls.rope.setEnabled(self.use_rope)

        gui.rubber(self.controlArea)
        self.score_table = ScoreTable(self)
        self.score_table.shownScoresChanged.connect(self.update_stats_model)
        view = self.score_table.view
        view.setSizeAdjustPolicy(view.AdjustToContents)

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.score_table.view)

        self.compbox = box = gui.vBox(self.mainArea, box="Model comparison")
        table = self.comparison_table = QTableWidget(
            wordWrap=False, editTriggers=QTableWidget.NoEditTriggers,
            selectionMode=QTableWidget.NoSelection)
        table.setSizeAdjustPolicy(table.AdjustToContents)
        header = table.verticalHeader()
        header.setSectionResizeMode(QHeaderView.Fixed)
        header.setSectionsClickable(False)

        header = table.horizontalHeader()
        header.setTextElideMode(Qt.ElideRight)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setSectionsClickable(False)
        header.setStretchLastSection(False)
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        avg_width = self.fontMetrics().averageCharWidth()
        header.setMinimumSectionSize(8 * avg_width)
        header.setMaximumSectionSize(15 * avg_width)
        header.setDefaultSectionSize(15 * avg_width)
        box.layout().addWidget(table)
        box.layout().addWidget(QLabel(
            "<small>Table shows probabilities that the score for the model in "
            "the row is higher than that of the model in the column. "
            "Small numbers show the probability that the difference is "
            "negligible.</small>", wordWrap=True))

    @staticmethod
    def sizeHint():
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestAndScore.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestAndScore.FeatureFold and not enabled:
            self.resampling = OWTestAndScore.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        elif learner is not None:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.cancel()
        self.Information.data_sampled.clear()
        self.Error.train_data_error.clear()

        if data is not None:
            data_errors = [
                ("Train dataset is empty.", len(data) == 0),
                (
                    "Train data input requires a target variable.",
                    not data.domain.class_vars
                ),
                ("Too many target variables.", len(data.domain.class_vars) > 1),
                ("Target variable has no values.", np.isnan(data.Y).all()),
                (
                    "Target variable has only one value.",
                    data.domain.has_discrete_class and len(unique(data.Y)) < 2
                ),
                ("Data has no features to learn from.", data.X.shape[1] == 0),
            ]

            for error_msg, cond in data_errors:
                if cond:
                    self.Error.train_data_error(error_msg)
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestAndScore.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not data:
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestAndScore.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {(True, True): " ",  # both, don't specify
                (True, False): " train ",
                (False, True): " test "}[(self.train_data_missing_vals,
                                          self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data and self.data.domain.class_var:
            new_scorers = usable_scorers(self.data.domain.class_var)
        else:
            new_scorers = []
        # Don't unnecessarily reset the combo because this would always reset
        # comparison_criterion; we also set it explicitly, though, for clarity
        if new_scorers != self.scorers:
            self.scorers = new_scorers
            combo = self.controls.comparison_criterion
            combo.clear()
            combo.addItems([scorer.long_name or scorer.name
                            for scorer in self.scorers])
            if self.scorers:
                self.comparison_criterion = 0
        if self.__pending_comparison_criterion is not None:
            # Check for the unlikely case that some scorers have been removed
            # from modules
            if self.__pending_comparison_criterion < len(self.scorers):
                self.comparison_criterion = self.__pending_comparison_criterion
            self.__pending_comparison_criterion = None
        self._update_compbox_title()

    def _update_compbox_title(self):
        criterion = self.comparison_criterion
        if criterion < len(self.scorers):
            scorer = self.scorers[criterion]()
            self.compbox.setTitle(f"Model Comparison by {scorer.name}")
        else:
            self.compbox.setTitle(f"Model Comparison")

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.score_table.update_header(self.scorers)
        self._update_view_enabled()
        self.update_stats_model()
        self.set_input_summary()
        if self.__needupdate:
            self.__update()

    def set_input_summary(self):
        summary, details, kwargs = self.info.NoInput, "", {}
        if self.data and self.test_data:
            summary = f"{self.info.format_number(len(self.data))}," \
                      f" {self.info.format_number(len(self.test_data))}"
            details = format_multiple_summaries([
                ("Data", self.data), ("Test data", self.test_data)
            ])
            kwargs = {"format": Qt.RichText}
        elif self.data and not self.test_data:
            summary, details = len(self.data), format_summary_details(self.data)
        elif self.test_data and not self.data:
            summary = len(self.test_data)
            details = format_summary_details(self.test_data)
        self.info.set_input_summary(summary, details, **kwargs)

    def kfold_changed(self):
        self.resampling = OWTestAndScore.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestAndScore.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestAndScore.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self.modcompbox.setEnabled(self.resampling == OWTestAndScore.KFold)
        self._update_view_enabled()
        self._invalidate()
        self.__update()

    def _update_view_enabled(self):
        self.comparison_table.setEnabled(
            self.resampling == OWTestAndScore.KFold
            and len(self.learners) > 1
            and self.data is not None)
        self.score_table.view.setEnabled(
            self.data is not None)

    def update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.score_table.model
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        names = []
        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            names.append(name)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            results = slot.results
            if results is not None and results.success:
                train = QStandardItem("{:.3f}".format(results.value.train_time))
                train.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                train.setData(key, Qt.UserRole)
                test = QStandardItem("{:.3f}".format(results.value.test_time))
                test.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                test.setData(key, Qt.UserRole)
                row = [head, train, test]
            else:
                row = [head]
            if isinstance(results, Try.Fail):
                head.setToolTip(str(results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                if isinstance(results.exception, DomainTransformationError) \
                        and self.resampling == self.TestOnTest:
                    self.Error.test_data_incompatible()
                    self.Information.test_data_transformed.clear()
                else:
                    errors.append("{name} failed with error:\n"
                                  "{exc.__class__.__name__}: {exc!s}"
                                  .format(name=name, exc=slot.results.exception)
                                  )

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(
                        slot.results.value, target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [Try(scorer_caller(scorer, ovr_results, target=1))
                             for scorer in self.scorers]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat, scorer in zip(stats, self.scorers):
                    item = QStandardItem()
                    item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    if stat.success:
                        item.setData(float(stat.value[0]), Qt.DisplayRole)
                    else:
                        item.setToolTip(str(stat.exception))
                        if scorer.name in self.score_table.shown_scores:
                            has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        # Resort rows based on current sorting
        header = self.score_table.view.horizontalHeader()
        model.sort(
            header.sortIndicatorSection(),
            header.sortIndicatorOrder()
        )
        self._set_comparison_headers(names)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _on_use_rope_changed(self):
        self.controls.rope.setEnabled(self.use_rope)
        self.update_comparison_table()

    def update_comparison_table(self):
        self.comparison_table.clearContents()
        slots = self._successful_slots()
        if not (slots and self.scorers):
            return
        names = [learner_name(slot.learner) for slot in slots]
        self._set_comparison_headers(names)
        if self.resampling == OWTestAndScore.KFold:
            scores = self._scores_by_folds(slots)
            self._fill_table(names, scores)

    def _successful_slots(self):
        model = self.score_table.model
        proxy = self.score_table.sorted_model

        keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole)
                for row in range(proxy.rowCount()))
        slots = [slot for slot in (self.learners[key] for key in keys)
                 if slot.results is not None and slot.results.success]
        return slots

    def _set_comparison_headers(self, names):
        table = self.comparison_table
        try:
            # Prevent glitching during update
            table.setUpdatesEnabled(False)
            header = table.horizontalHeader()
            if len(names) > 2:
                header.setSectionResizeMode(QHeaderView.Stretch)
            else:
                header.setSectionResizeMode(QHeaderView.Fixed)
            table.setRowCount(len(names))
            table.setColumnCount(len(names))
            table.setVerticalHeaderLabels(names)
            table.setHorizontalHeaderLabels(names)
        finally:
            table.setUpdatesEnabled(True)

    def _scores_by_folds(self, slots):
        scorer = self.scorers[self.comparison_criterion]()
        self._update_compbox_title()
        if scorer.is_binary:
            if self.class_selection != self.TARGET_AVERAGE:
                class_var = self.data.domain.class_var
                target_index = class_var.values.index(self.class_selection)
                kw = dict(target=target_index)
            else:
                kw = dict(average='weighted')
        else:
            kw = {}

        def call_scorer(results):
            def thunked():
                return scorer.scores_by_folds(results.value, **kw).flatten()

            return thunked

        scores = [Try(call_scorer(slot.results)) for slot in slots]
        scores = [score.value if score.success else None for score in scores]
        # `None in scores doesn't work -- these are np.arrays)
        if any(score is None for score in scores):
            self.Warning.scores_not_computed()
        return scores

    def _fill_table(self, names, scores):
        table = self.comparison_table
        for row, row_name, row_scores in zip(count(), names, scores):
            for col, col_name, col_scores in zip(range(row), names, scores):
                if row_scores is None or col_scores is None:
                    continue
                if self.use_rope and self.rope:
                    p0, rope, p1 = baycomp.two_on_single(
                        row_scores, col_scores, self.rope)
                    if np.isnan(p0) or np.isnan(rope) or np.isnan(p1):
                        self._set_cells_na(table, row, col)
                        continue
                    self._set_cell(table, row, col,
                                   f"{p0:.3f}<br/><small>{rope:.3f}</small>",
                                   f"p({row_name} > {col_name}) = {p0:.3f}\n"
                                   f"p({row_name} = {col_name}) = {rope:.3f}")
                    self._set_cell(table, col, row,
                                   f"{p1:.3f}<br/><small>{rope:.3f}</small>",
                                   f"p({col_name} > {row_name}) = {p1:.3f}\n"
                                   f"p({col_name} = {row_name}) = {rope:.3f}")
                else:
                    p0, p1 = baycomp.two_on_single(row_scores, col_scores)
                    if np.isnan(p0) or np.isnan(p1):
                        self._set_cells_na(table, row, col)
                        continue
                    self._set_cell(table, row, col,
                                   f"{p0:.3f}",
                                   f"p({row_name} > {col_name}) = {p0:.3f}")
                    self._set_cell(table, col, row,
                                   f"{p1:.3f}",
                                   f"p({col_name} > {row_name}) = {p1:.3f}")

    @classmethod
    def _set_cells_na(cls, table, row, col):
        cls._set_cell(table, row, col, "NA", "comparison cannot be computed")
        cls._set_cell(table, col, row, "NA", "comparison cannot be computed")

    @staticmethod
    def _set_cell(table, row, col, label, tooltip):
        item = QLabel(label)
        item.setToolTip(tooltip)
        item.setAlignment(Qt.AlignCenter)
        table.setCellWidget(row, col, item)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = (self.TARGET_AVERAGE, ) + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self.update_stats_model()
        self.update_comparison_table()

    def _invalidate(self, which=None):
        self.cancel()
        self.fold_feature_selected = \
            self.resampling == OWTestAndScore.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.score_table.model
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole)
                         for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.comparison_table.clearContents()

        self.__needupdate = True

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [slot for slot in self.learners.values()
                 if slot.results is not None and slot.results.success]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner)
                                      for slot in valid]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        summary = len(predictions) if predictions else self.info.NoOutput
        details = format_summary_details(predictions) if predictions else ""
        self.info.set_output_summary(summary, details)

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".
                      format(stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [("Sampling type",
                      "{}Shuffle split, {} random samples with {}% data "
                      .format(stratified, self.NRepeats[self.n_repeats],
                              self.SampleSizes[self.sample_size]))]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.score_table.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Error.test_data_incompatible.clear()
        self.Warning.test_data_missing.clear()
        self.Information.test_data_transformed(
            shown=self.resampling == self.TestOnTest
            and self.data is not None
            and self.test_data is not None
            and self.data.domain.attributes != self.test_data.domain.attributes)
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestAndScore.KFold and \
                len(self.data) < self.NFolds[self.n_folds]:
            self.Error.too_many_folds()
            self.__state = State.Waiting
            self.commit()
            return

        elif self.resampling == OWTestAndScore.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestAndScore.TestOnTest:
            test_f = partial(
                Orange.evaluation.TestOnTestData(
                    store_data=True, store_models=True),
                self.data, self.test_data, learners_c, self.preprocessor
            )
        else:
            if self.resampling == OWTestAndScore.KFold:
                sampler = Orange.evaluation.CrossValidation(
                    k=self.NFolds[self.n_folds],
                    random_state=rstate)
            elif self.resampling == OWTestAndScore.FeatureFold:
                sampler = Orange.evaluation.CrossValidationFeature(
                    feature=self.fold_feature)
            elif self.resampling == OWTestAndScore.LeaveOneOut:
                sampler = Orange.evaluation.LeaveOneOut()
            elif self.resampling == OWTestAndScore.ShuffleSplit:
                sampler = Orange.evaluation.ShuffleSplit(
                    n_resamples=self.NRepeats[self.n_repeats],
                    train_size=self.SampleSizes[self.sample_size] / 100,
                    test_size=None,
                    stratified=self.shuffle_stratified,
                    random_state=rstate)
            elif self.resampling == OWTestAndScore.TestOnTrain:
                sampler = Orange.evaluation.TestOnTrainingData(
                    store_models=True)
            else:
                assert False, "self.resampling %s" % self.resampling

            sampler.store_data = True
            test_f = partial(
                sampler, self.data, learners_c, self.preprocessor)

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[[float], None]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = TaskState()

        def progress_callback(finished):
            if task.is_interruption_requested():
                raise UserInterrupt()
            task.set_progress_value(100 * finished)

        testfunc = partial(testfunc, callback=progress_callback)
        task.start(self.__executor, testfunc)

        task.progress_changed.connect(self.setProgressValue)
        task.watcher.finished.connect(self.__task_complete)

        self.Outputs.evaluations_results.invalidate()
        self.Outputs.predictions.invalidate()
        self.progressBarInit()
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, f: 'Future[Results]'):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        assert self.__task is not None and self.__task.future is f
        self.progressBarFinished()
        self.setStatusMessage("")
        assert f.done()
        self.__task = None
        self.__state = State.Done
        try:
            results = f.result()    # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:  # pylint: disable=broad-except
            log.exception("testing error (in __task_complete):",
                          exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er), er)))
            return

        learner_key = {slot.learner: key for key, slot in
                       self.learners.items()}
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(scorer_caller(scorer, result))
                             for scorer in self.scorers]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.score_table.update_header(self.scorers)
        self.update_stats_model()
        self.update_comparison_table()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            task.progress_changed.disconnect(self.setProgressValue)
            task.watcher.finished.disconnect(self.__task_complete)

            self.progressBarFinished()
            self.setStatusMessage("")

    def onDeleteWidget(self):
        self.cancel()
        self.__executor.shutdown(wait=False)
        super().onDeleteWidget()

    def copy_to_clipboard(self):
        self.score_table.copy_selection_to_clipboard()
Пример #27
0
class OWGeneInfo(widget.OWWidget):
    name = "Gene Info"
    description = "Displays gene information from NCBI and other sources."
    icon = "../widgets/icons/GeneInfo.svg"
    priority = 2010

    inputs = [("Data", Orange.data.Table, "setData")]
    outputs = [("Data Subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organism_index = settings.ContextSetting(0)
    taxid = settings.ContextSetting("9606")

    gene_attr = settings.ContextSetting(0)

    auto_commit = settings.Setting(False)
    search_string = settings.Setting("")

    useAttr = settings.ContextSetting(False)
    useAltSource = settings.ContextSetting(False)

    def __init__(self, parent=None, ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n"
        )

        self.organisms = None
        self.organismBox = gui.widgetBox(
            self.controlArea, "Organism", addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox, self, "organism_index",
            callback=self._onSelectedOrganismChanged)

        # For now only support one alt source, with a checkbox
        # In the future this can be extended to multiple selections
        self.altSourceCheck = gui.checkBox(
            self.organismBox, self, "useAltSource",
            "Show information from dictyBase",
            callback=self.onAltSourceChange)

        self.altSourceCheck.hide()

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(
            box, self, "gene_attr",
            "Gene atttibute", callback=self.updateInfoItems
        )
        self.geneAttrComboBox.setEnabled(not self.useAttr)
        cb = gui.checkBox(box, self, "useAttr", "Use attribute names",
                          callback=self.updateInfoItems)
        cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        # A label for dictyExpress link (Why oh god why???)
        self.dictyExpressBox = gui.widgetBox(
            self.controlArea, "Dicty Express")
        self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "")
        self.linkLabel.setOpenExternalLinks(False)
        self.linkLabel.linkActivated.connect(self.onDictyExpressLink)

        self.dictyExpressBox.hide()

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea, self, "search_string", "Filter",
                     callbackOnType=True, callback=self.searchUpdate)

        self.treeWidget = QTreeView(
            self.mainArea,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            uniformRowHeights=True,
            sortingEnabled=True)

        self.treeWidget.setItemDelegate(
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box, self, "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(
                taxonomy.ensure_downloaded,
                callback=methodinvoke(self, "advance", ())
            )
        )

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 720)

    @Slot()
    def advance(self):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(self.progressBarValue + 1,
                            processEvents=None)

    def initialize(self):
        if self.__initialized:
            # Already initialized
            return
        self.__initialized = True

        self.organisms = sorted(
            set([name.split(".")[-2] for name in
                 serverfiles.listfiles("NCBI_geneinfo")] +
                gene.NCBIGeneInfo.common_taxids())
        )

        self.organismComboBox.addItems(
            [taxonomy.name(tax_id) for tax_id in self.organisms]
        )
        if self.taxid in self.organisms:
            self.organism_index = self.organisms.index(self.taxid)
        else:
            self.organism_index = 0
            self.taxid = self.organisms[self.organism_index]

        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        self.infoLabel.setText("No data on input\n")
        self.initfuture = None

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)

    def _onInitializeError(self, exc):
        sys.excepthook(type(exc), exc.args, None)
        self.error(0, "Could not download the necessary files.")

    def _onSelectedOrganismChanged(self):
        assert 0 <= self.organism_index <= len(self.organisms)
        self.taxid = self.organisms[self.organism_index]
        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        if self.data is not None:
            self.updateInfoItems()

    def setData(self, data=None):
        if not self.__initialized:
            self.initfuture.result()
            self.initialize()

        if self.itemsfuture is not None:
            raise Exception("Already processing")

        self.closeContext()
        self.data = data

        if data is not None:
            self.geneAttrComboBox.clear()
            self.attributes = \
                [attr for attr in data.domain.variables + data.domain.metas
                 if isinstance(attr, (Orange.data.StringVariable,
                                      Orange.data.DiscreteVariable))]

            for var in self.attributes:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid)
            self.useAttr = data_hints.get_hint(
                self.data, "genesinrows", self.useAttr)

            self.openContext(data)
            self.gene_attr = min(self.gene_attr, len(self.attributes) - 1)

            if self.taxid in self.organisms:
                self.organism_index = self.organisms.index(self.taxid)
            else:
                self.organism_index = 0
                self.taxid = self.organisms[self.organism_index]

            self.updateInfoItems()
        else:
            self.clear()

    def infoSource(self):
        """ Return the current selected info source getter function from
        INFO_SOURCES
        """
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        if org not in INFO_SOURCES:
            org = "default"
        sources = INFO_SOURCES[org]
        name, func = sources[min(self.useAltSource, len(sources) - 1)]
        return name, func

    def inputGenes(self):
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        return genes

    def updateInfoItems(self):
        self.warning(0)
        if self.data is None:
            return

        genes = self.inputGenes()
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        if not genes:
            self.warning(0, "Could not extract genes from input dataset.")

        self.warning(1)
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        source_name, info_getter = self.infoSource()

        self.error(0)

        self.updateDictyExpressLink(genes, show=org == DICTY_TAXID)
        self.altSourceCheck.setVisible(org == DICTY_TAXID)

        self.progressBarInit()
        self.setBlocking(True)
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving info records.\n")

        self.genes = genes

        task = Task(
            function=partial(
                info_getter, org, genes,
                advance=methodinvoke(self, "advance", ()))
        )
        self.itemsfuture = self.executor.submit(task)
        task.finished.connect(self._onItemsCompleted)

    def _onItemsCompleted(self):
        self.setBlocking(False)
        self.progressBarFinished()
        self.setEnabled(True)

        try:
            schema, geneinfo = self.itemsfuture.result()
        finally:
            self.itemsfuture = None

        self.geneinfo = geneinfo = list(zip(self.genes, geneinfo))
        self.cells = cells = []
        self.row2geneinfo = {}
        links = []
        for i, (_, gi) in enumerate(geneinfo):
            if gi:
                row = []
                for _, item in zip(schema, gi):
                    if isinstance(item, Link):
                        # TODO: This should be handled by delegates
                        row.append(item.text)
                        links.append(item.link)
                    else:
                        row.append(item)
                cells.append(row)
                self.row2geneinfo[len(cells) - 1] = i

        model = TreeModel(cells, [str(col) for col in schema], None)

        model.setColumnLinks(0, links)
        proxyModel = QSortFilterProxyModel(self)
        proxyModel.setSourceModel(model)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(self.commit)

        for i in range(7):
            self.treeWidget.resizeColumnToContents(i)
            self.treeWidget.setColumnWidth(
                i, min(self.treeWidget.columnWidth(i), 200)
            )

        self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" %
                               (len(self.genes), len(cells)))
        self.matchedInfo = len(self.genes), len(cells)

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.treeWidget.setModel(
            TreeModel([], ["NCBI ID", "Symbol", "Locus Tag",
                           "Chromosome", "Description", "Synonyms",
                           "Nomenclature"], self.treeWidget))

        self.geneAttrComboBox.clear()
        self.send("Data Subset", None)

    def commit(self):
        if self.data is None:
            self.send("Data Subset", None)
            return

        model = self.treeWidget.model()
        selection = self.treeWidget.selectionModel().selection()
        selection = model.mapSelectionToSource(selection)
        selectedRows = list(
            chain(*(range(r.top(), r.bottom() + 1) for r in selection))
        )

        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)
        gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row)
                        for row in selectedRows)

        isselected = selectedIds.__contains__

        if self.useAttr:
            def is_selected(attr):
                return attr.name in selectedIds
            attrs = [attr for attr in self.data.domain.attributes
                     if isselected(attr.name)]
            domain = Orange.data.Domain(
                attrs, self.data.domain.class_vars, self.data.domain.metas)
            newdata = self.data.from_table(domain, self.data)
            self.send("Data Subset", newdata)

        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            gene_col = [attr.str_val(v)
                        for v in self.data.get_column_view(attr)[0]]
            gene_col = [(i, name) for i, name in enumerate(gene_col)
                        if isselected(name)]
            indices = [i for i, _ in gene_col]

            # Add a gene info columns to the output
            headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole))
                       for i in range(model.columnCount())]
            metas = [Orange.data.StringVariable(name) for name in headers]
            domain = Orange.data.Domain(
                self.data.domain.attributes, self.data.domain.class_vars,
                self.data.domain.metas + tuple(metas))

            newdata = self.data.from_table(domain, self.data)[indices]

            model_rows = [gene2row[gene] for _, gene in gene_col]
            for col, meta in zip(range(model.columnCount()), metas):
                col_data = [str(model.index(row, col).data(Qt.DisplayRole))
                            for row in model_rows]
                newdata[:, meta] = col_data

            if not len(newdata):
                newdata = None

            self.send("Data Subset", newdata)
        else:
            self.send("Data Subset", None)

    def rowFiltered(self, row):
        searchStrings = self.search_string.lower().split()
        row = " ".join(self.cells[row]).lower()
        return not all([s in row for s in searchStrings])

    def searchUpdate(self):
        if not self.data:
            return
        searchStrings = self.search_string.lower().split()
        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            row = " ".join(row).lower()
            self.treeWidget.setRowHidden(
                mapFromSource(index(i, 0)).row(),
                QModelIndex(),
                not all([s in row for s in searchStrings]))

    def selectFiltered(self):
        if not self.data:
            return
        itemSelection = QItemSelection()

        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            if not self.rowFiltered(i):
                itemSelection.select(mapFromSource(index(i, 0)),
                                     mapFromSource(index(i, 0)))
        self.treeWidget.selectionModel().select(
            itemSelection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)

    def updateDictyExpressLink(self, genes, show=False):
        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None
        if show:
            genes = [fix(gene) for gene in genes if fix(gene)]
            link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>'
            link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>'
            self.linkLabel.setText(link1 + "<br/>" + link2)

            show = any(genes)

        if show:
            self.dictyExpressBox.show()
        else:
            self.dictyExpressBox.hide()

    def onDictyExpressLink(self, link):
        if not self.data:
            return

        selectedIndexes = self.treeWidget.selectedIndexes()
        if not len(selectedIndexes):
            QMessageBox.information(
                self, "No gene ids selected",
                "Please select some genes and try again."
            )
            return
        model = self.treeWidget.model()
        mapToSource = model.mapToSource
        selectedRows = self.treeWidget.selectedIndexes()
        selectedRows = [mapToSource(index).row() for index in selectedRows]
        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)

        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None

        genes = [fix(gene) for gene in selectedIds if fix(gene)]
        url = str(link) % " ".join(genes)
        QDesktopServices.openUrl(QUrl(url))

    def onAltSourceChange(self):
        self.updateInfoItems()

    def onDeleteWidget(self):
        # try to cancel pending tasks
        if self.initfuture:
            self.initfuture.cancel()
        if self.itemsfuture:
            self.itemsfuture.cancel()

        self.executor.shutdown(wait=False)
        super().onDeleteWidget()
Пример #28
0
class OWExplainPred(OWWidget):

    name = "Explain Predictions"
    description = "Computes attribute contributions to the final prediction with an approximation algorithm for shapely value"
    icon = "icons/ExplainPredictions.svg"
    priority = 200
    gui_error = settings.Setting(0.05)
    gui_p_val = settings.Setting(0.05)

    class Inputs:
        data = Input("Data", Table, default=True)
        model = Input("Model", Model, multiple=False)
        sample = Input("Sample", Table)

    class Outputs:
        explanations = Output("Explanations", Table)

    class Error(OWWidget.Error):
        sample_too_big = widget.Msg("Can only explain one sample at the time.")

    class Warning(OWWidget.Warning):
        unknowns_increased = widget.Msg(
            "Number of unknown values increased, Data and Sample domains mismatch.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.model = None
        self.to_explain = None
        self.explanations = None
        self.stop = True
        self.e = None

        self._task = None
        self._executor = ThreadExecutor()

        self.dataview = QTableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                   sortingEnabled=True,
                                   selectionMode=QTableView.NoSelection,
                                   focusPolicy=Qt.StrongFocus)

        self.dataview.sortByColumn(2, Qt.DescendingOrder)
        self.dataview.horizontalHeader().setResizeMode(QHeaderView.Stretch)

        domain = Domain([ContinuousVariable("Score"),
                         ContinuousVariable("Error")],
                        metas=[StringVariable(name="Feature"), StringVariable(name="Value")])
        self.placeholder_table_model = TableModel(
            Table.from_domain(domain), parent=None)

        self.dataview.setModel(self.placeholder_table_model)

        info_box = gui.vBox(self.controlArea, "Info")
        self.data_info = gui.widgetLabel(info_box, "Data: N/A")
        self.model_info = gui.widgetLabel(info_box, "Model: N/A")
        self.sample_info = gui.widgetLabel(info_box, "Sample: N/A")

        criteria_box = gui.vBox(self.controlArea, "Stopping criteria")
        self.error_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_error",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error < ",
                                   spinType=float,
                                   callback=self._update_error_spin,
                                   controlWidth=80,
                                   keyboardTracking=False)

        self.p_val_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_p_val",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error p-value < ",
                                   spinType=float,
                                   callback=self._update_p_val_spin,
                                   controlWidth=80, keyboardTracking=False)

        gui.rubber(self.controlArea)

        self.cancel_button = gui.button(self.controlArea,
                                        self,
                                        "Stop Computation",
                                        callback=self.toggle_button,
                                        autoDefault=True,
                                        tooltip="Stops and restarts computation")
        self.cancel_button.setDisabled(True)

        predictions_box = gui.vBox(self.mainArea, "Model prediction")
        self.predict_info = gui.widgetLabel(predictions_box, "")

        self.mainArea.layout().addWidget(self.dataview)

        self.resize(640, 480)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """Set input 'Data'"""
        self.data = data
        self.explanations = None
        self.data_info.setText("Data: N/A")
        self.e = None
        if data is not None:
            model = TableModel(data, parent=None)
            if data.X.shape[0] == 1:
                inst = "1 instance and "
            else:
                inst = str(data.X.shape[0]) + " instances and "
            if data.X.shape[1] == 1:
                feat = "1 feature "
            else:
                feat = str(data.X.shape[1]) + " features"
            self.data_info.setText("Data: " + inst + feat)

    @Inputs.model
    def set_predictor(self, model):
        """Set input 'Model'"""
        self.model = model
        self.model_info.setText("Model: N/A")
        self.explanations = None
        self.e = None
        if model is not None:
            self.model_info.setText("Model: " + str(model.name))

    @Inputs.sample
    @check_sql_input
    def set_sample(self, sample):
        """Set input 'Sample', checks if size is appropriate"""
        self.to_explain = sample
        self.explanations = None
        self.Error.sample_too_big.clear()
        self.sample_info.setText("Sample: N/A")
        if sample is not None:
            if len(sample.X) != 1:
                self.to_explain = None
                self.Error.sample_too_big()
            else:
                if sample.X.shape[1] == 1:
                    feat = "1 feature"
                else:
                    feat = str(sample.X.shape[1]) + " features"
                self.sample_info.setText("Sample: " + feat)
                if self.e is not None:
                    self.e.saved = False

    def handleNewSignals(self):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self.dataview.setModel(self.placeholder_table_model)
        self.predict_info.setText("")
        self.Warning.unknowns_increased.clear()
        self.stop = True
        self.cancel_button.setText("Stop Computation")
        self.commit_calc_or_output()

    def commit_calc_or_output(self):
        if self.data is not None and self.to_explain is not None:
            self.commit_calc()
        else:
            self.commit_output()


    def commit_calc(self):
        num_nan = np.count_nonzero(np.isnan(self.to_explain.X[0]))

        self.to_explain = self.to_explain.transform(self.data.domain)
        if num_nan != np.count_nonzero(np.isnan(self.to_explain.X[0])):
            self.Warning.unknowns_increased()
        if self.model is not None:
            # calculate contributions
            if self.e is None:
                self.e = ExplainPredictions(self.data,
                                       self.model,
                                       batch_size=min(
                                           len(self.data.X), 500),
                                       p_val=self.gui_p_val,
                                       error=self.gui_error)
            self._task = task = Task()


            def callback(progress):
                nonlocal task
                # update progress bar
                QMetaObject.invokeMethod(
                    self, "set_progress_value", Qt.QueuedConnection, Q_ARG(int, progress))
                if task.canceled:
                    return True
                return False

            def callback_update(table):
                QMetaObject.invokeMethod(
                    self, "update_view", Qt.QueuedConnection, Q_ARG(Orange.data.Table, table))

            def callback_prediction(class_value):
                QMetaObject.invokeMethod(
                    self, "update_model_prediction", Qt.QueuedConnection, Q_ARG(float, class_value))

            self.was_canceled = False
            explain_func = partial(
                self.e.anytime_explain, self.to_explain[0], callback=callback, update_func=callback_update, update_prediction=callback_prediction)

            self.progressBarInit(processEvents=None)
            task.future = self._executor.submit(explain_func)
            task.watcher = FutureWatcher(task.future)
            task.watcher.done.connect(self._task_finished)
            self.cancel_button.setDisabled(False)

    @pyqtSlot(Orange.data.Table)
    def update_view(self, table):
        self.explanations = table
        model = TableModel(table, parent=None)
        header = self.dataview.horizontalHeader()
        model.sort(
            header.sortIndicatorSection(),
            header.sortIndicatorOrder())
        self.dataview.setModel(model)
        self.commit_output()

    @pyqtSlot(float)
    def update_model_prediction(self, value):
        self._print_prediction(value)

    @pyqtSlot(int)
    def set_progress_value(self, value):
        self.progressBarSet(value, processEvents=False)

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters:
        ----------
        f: conncurent.futures.Future
            future instance holding the result of learner evaluation
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None

        if not self.was_canceled:
            self.cancel_button.setDisabled(True)

        try:
            results = f.result()
        except Exception as ex:
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.error("Exception occured during evaluation: {!r}".format(ex))

            for key in self.results.keys():
                self.results[key] = None
        else:
            self.update_view(results[1])

        self.progressBarFinished(processEvents=False)


    def commit_output(self):
        """
        Sends best-so-far results forward
        """
        self.Outputs.explanations.send(self.explanations)

    def toggle_button(self):
        if self.stop :
            self.stop = False
            self.cancel_button.setText("Restart Computation")
            self.cancel()
        else:
            self.stop = True
            self.cancel_button.setText("Stop Computation")
            self.commit_calc_or_output()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self.was_canceled = True
            self._task_finished(self._task.future)

    def _print_prediction(self, class_value):
        """
        Parameters
        ----------
        class_value: float 
            Number representing either index of predicted class value, looked up in domain, or predicted value (regression)
        """
        name = self.data.domain.class_vars[0].name
        if isinstance(self.data.domain.class_vars[0], ContinuousVariable):
            self.predict_info.setText(name + ":      " + str(class_value))
        else:
            self.predict_info.setText(
                name + ":      " + self.data.domain.class_vars[0].values[int(class_value)])

    def _update_error_spin(self):
        self.cancel()
        if self.e is not None:
            self.e.error = self.gui_error
        self.handleNewSignals()

    def _update_p_val_spin(self):
        self.cancel()
        if self.e is not None:
            self.e.p_val = self.gui_p_val
        self.handleNewSignals()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #29
0
class OWNNLearner(OWBaseLearner):
    name = "神经网络(Neural Network)"
    description = "一种具有反向传播的多层感知器(MLP)算法。"
    icon = "icons/NN.svg"
    priority = 90
    keywords = ["mlp"]

    LEARNER = NNLearner

    activation = ["identity", "logistic", "tanh", "relu"]
    act_lbl = ["Identity", "Logistic", "tanh", "ReLu"]
    chinese_act_lbl = ["相等", "Logistic", "tanh", "ReLu"]
    solver = ["lbfgs", "sgd", "adam"]
    solv_lbl = ["L-BFGS-B", "SGD", "Adam"]

    learner_name = Setting("Neural Network")
    hidden_layers_input = Setting("100,")
    activation_index = Setting(3)
    solver_index = Setting(2)
    max_iterations = Setting(200)
    alpha_index = Setting(0)
    replicable = Setting(True)
    settings_version = 1

    alphas = list(
        chain([x / 10000 for x in range(1, 10)],
              [x / 1000 for x in range(1, 10)],
              [x / 100 for x in range(1, 10)], [x / 10 for x in range(1, 10)],
              range(1, 10), range(10, 100, 5), range(100, 200, 10),
              range(100, 1001, 50)))

    def add_main_layout(self):
        # this is part of init, pylint: disable=attribute-defined-outside-init
        form = QFormLayout()
        form.setFieldGrowthPolicy(form.AllNonFixedFieldsGrow)
        form.setVerticalSpacing(25)
        form.setLabelAlignment(Qt.AlignLeft)
        gui.widgetBox(self.controlArea, True, orientation=form)
        form.addRow(
            "隐藏层中的神经元:",
            gui.lineEdit(None,
                         self,
                         "hidden_layers_input",
                         orientation=Qt.Horizontal,
                         callback=self.settings_changed,
                         tooltip="定义神经元的整数列表。列表长度定义层数。例如4、2、2、3。",
                         placeholderText="e.g. 10,"))
        form.addRow(
            "激活:",
            gui.comboBox(None,
                         self,
                         "activation_index",
                         orientation=Qt.Horizontal,
                         label="Activation:",
                         items=[i for i in self.chinese_act_lbl],
                         callback=self.settings_changed))

        form.addRow(" ", gui.separator(None, 16))
        form.addRow(
            "求解器(Solver):",
            gui.comboBox(None,
                         self,
                         "solver_index",
                         orientation=Qt.Horizontal,
                         label="Solver:",
                         items=[i for i in self.solv_lbl],
                         callback=self.settings_changed))
        self.reg_label = QLabel()
        slider = gui.hSlider(None,
                             self,
                             "alpha_index",
                             minValue=0,
                             maxValue=len(self.alphas) - 1,
                             callback=lambda:
                             (self.set_alpha(), self.settings_changed()),
                             createLabel=False)
        form.addRow(self.reg_label, slider)
        self.set_alpha()

        form.addRow(
            "最大迭代次数:",
            gui.spin(None,
                     self,
                     "max_iterations",
                     10,
                     1000000,
                     step=10,
                     label="Max iterations:",
                     orientation=Qt.Horizontal,
                     alignment=Qt.AlignRight,
                     callback=self.settings_changed))

        form.addRow(gui.separator(None))
        form.addRow(
            gui.checkBox(None,
                         self,
                         "replicable",
                         label="可重复的训练",
                         callback=self.settings_changed), )

    def set_alpha(self):
        # called from init, pylint: disable=attribute-defined-outside-init
        self.strength_C = self.alphas[self.alpha_index]
        self.reg_label.setText("正则化, α={}:".format(self.strength_C))

    @property
    def alpha(self):
        return self.alphas[self.alpha_index]

    def setup_layout(self):
        # this is part of init, pylint: disable=attribute-defined-outside-init
        super().setup_layout()

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # just a test cancel button
        gui.button(self.apply_button, self, "取消", callback=self.cancel)

    def create_learner(self):
        return self.LEARNER(hidden_layer_sizes=self.get_hidden_layers(),
                            activation=self.activation[self.activation_index],
                            solver=self.solver[self.solver_index],
                            alpha=self.alpha,
                            random_state=1 if self.replicable else None,
                            max_iter=self.max_iterations,
                            preprocessors=self.preprocessors)

    def get_learner_parameters(self):
        return (("Hidden layers", ', '.join(map(str,
                                                self.get_hidden_layers()))),
                ("Activation", self.act_lbl[self.activation_index]),
                ("Solver", self.solv_lbl[self.solver_index]),
                ("Alpha", self.alpha), ("Max iterations", self.max_iterations),
                ("Replicable training", self.replicable))

    def get_hidden_layers(self):
        layers = tuple(map(int, re.findall(r'\d+', self.hidden_layers_input)))
        if not layers:
            layers = (10, )
            self.hidden_layers_input = "10,"
        return layers

    def update_model(self):
        self.show_fitting_failed(None)
        self.model = None
        if self.check_data():
            self.__update()
        else:
            self.Outputs.model.send(self.model)

    @Slot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

    def __update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        max_iter = self.learner.kwargs["max_iter"]

        # Setup the task state
        task = Task()
        lastemitted = 0.

        def callback(iteration):
            nonlocal task
            nonlocal lastemitted
            if task.isInterruptionRequested():
                raise CancelTaskException()
            progress = round(iteration / max_iter * 100)
            if progress != lastemitted:
                task.emitProgressUpdate(progress)
                lastemitted = progress

        # copy to set the callback so that the learner output is not modified
        # (currently we can not pass callbacks to learners __call__)
        learner = copy.copy(self.learner)
        learner.callback = callback

        def build_model(data, learner):
            try:
                return learner(data)
            except CancelTaskException:
                return None

        build_model_func = partial(build_model, self.data, learner)

        task.setFuture(self._executor.submit(build_model_func))
        task.done.connect(self._task_finished)
        task.progressChanged.connect(self.setProgressValue)

        # set in setup_layout; pylint: disable=attribute-defined-outside-init
        self._task = task

        self.progressBarInit()
        self.setBlocking(True)

    @Slot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the built model
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()
        self._task.deleteLater()
        self._task = None  # pylint: disable=attribute-defined-outside-init
        self.setBlocking(False)
        self.progressBarFinished()

        try:
            self.model = f.result()
        except Exception as ex:  # pylint: disable=broad-except
            # Log the exception with a traceback
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.model = None
            self.show_fitting_failed(ex)
        else:
            self.model.name = self.learner_name
            self.model.instances = self.data
            self.model.skl_model.orange_callback = None  # remove unpicklable callback
            self.Outputs.model.send(self.model)

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect from the task
            self._task.done.disconnect(self._task_finished)
            self._task.progressChanged.disconnect(self.setProgressValue)
            self._task.deleteLater()
            self._task = None  # pylint: disable=attribute-defined-outside-init

        self.progressBarFinished()
        self.setBlocking(False)

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()

    @classmethod
    def migrate_settings(cls, settings, version):
        if not version:
            alpha = settings.pop("alpha", None)
            if alpha is not None:
                settings["alpha_index"] = \
                    np.argmin(np.abs(np.array(cls.alphas) - alpha))
Пример #30
0
    def __init__(self):
        super().__init__()
        self.data = None
        self.model = None
        self.to_explain = None
        self.explanations = None
        self.stop = True
        self.e = None

        self._task = None
        self._executor = ThreadExecutor()

        self.dataview = QTableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                   sortingEnabled=True,
                                   selectionMode=QTableView.NoSelection,
                                   focusPolicy=Qt.StrongFocus)

        self.dataview.sortByColumn(2, Qt.DescendingOrder)
        self.dataview.horizontalHeader().setResizeMode(QHeaderView.Stretch)

        domain = Domain([ContinuousVariable("Score"),
                         ContinuousVariable("Error")],
                        metas=[StringVariable(name="Feature"), StringVariable(name="Value")])
        self.placeholder_table_model = TableModel(
            Table.from_domain(domain), parent=None)

        self.dataview.setModel(self.placeholder_table_model)

        info_box = gui.vBox(self.controlArea, "Info")
        self.data_info = gui.widgetLabel(info_box, "Data: N/A")
        self.model_info = gui.widgetLabel(info_box, "Model: N/A")
        self.sample_info = gui.widgetLabel(info_box, "Sample: N/A")

        criteria_box = gui.vBox(self.controlArea, "Stopping criteria")
        self.error_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_error",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error < ",
                                   spinType=float,
                                   callback=self._update_error_spin,
                                   controlWidth=80,
                                   keyboardTracking=False)

        self.p_val_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_p_val",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error p-value < ",
                                   spinType=float,
                                   callback=self._update_p_val_spin,
                                   controlWidth=80, keyboardTracking=False)

        gui.rubber(self.controlArea)

        self.cancel_button = gui.button(self.controlArea,
                                        self,
                                        "Stop Computation",
                                        callback=self.toggle_button,
                                        autoDefault=True,
                                        tooltip="Stops and restarts computation")
        self.cancel_button.setDisabled(True)

        predictions_box = gui.vBox(self.mainArea, "Model prediction")
        self.predict_info = gui.widgetLabel(predictions_box, "")

        self.mainArea.layout().addWidget(self.dataview)

        self.resize(640, 480)
Пример #31
0
    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea, self,
            "speciesIndex", "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(
            box, self, "geneattr", "Entity feature", sendSelectedValue=0,
            callback=self.updateAnnotations)

        cb = gui.checkBox(
            box, self, "genesinrows", "Use feature names",
            callback=self.updateAnnotations,
            disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

#         gui.button(box, self, "Gene matcher settings",
#                    callback=self.updateGeneMatcherSettings,
#                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self, "useReferenceData",
            ["All entities", "Reference set (input)"],
            tooltips=["Use entire genome (for gene set enrichment) or all " +
                      "available entities for reference",
                      "Use entities from Reference Examples input signal " +
                      "as reference"],
            box="Reference", callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QtGui.QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QtGui.QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget, self, "minClusterCount",
                 0, 100, label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            pvalfilterbox, self, "useMaxPValFilter", "p-value",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            fdrfilterbox, self, "useMaxFDRFilter", "FDR",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QtGui.QLineEdit(
            self, placeholderText="Filter ...")

        self.filterCompleter = QtGui.QCompleter(self.filterLineEdit)
        self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QtGui.QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QtGui.QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QtGui.QTreeView.NoEditTriggers,
        )
        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             (geneset.sfdomain, "index.pck")]
        )

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(
            parent=self, threadPool=QtCore.QThreadPool(self))
        self._executor.submit(task)
class ClusterModel(QAbstractListModel):
    def __init__(self, parent=None):
        QAbstractListModel.__init__(self)
        self.__items = []
        self.parent = parent

        self._task = None  # type: Union[Task, None]
        self._executor = ThreadExecutor()

    def add_rows(self, rows):
        self.__items = rows

    def get_rows(self):
        return self.__items

    def rowCount(self, *args, **kwargs):
        return len(self.__items)

    def data(self, model_index, role=None):
        # check if data is set
        if not self.__items:
            return QVariant()

        # return empty QVariant if model index is unknown
        if not model_index.isValid() or not (0 <= model_index.row() < len(self.__items)):
            return QVariant()

        row_obj = self.__items[model_index.row()]

        if role == Qt.DisplayRole:
            return row_obj

    @Slot(concurrent.futures.Future)
    def _end_task(self, f):
        assert self.thread() is QThread.currentThread()
        assert threading.current_thread() == threading.main_thread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.parent.progressBarFinished()
        self.parent.filter_genes()

        try:
            f.result()
        except Exception as ex:
            raise ex

    def _score_genes(self, callback, **kwargs):
        for item in self.get_rows():
            item.cluster_scores(**kwargs)
            callback()

    @Slot(bool)
    def progress_advance(self, finish):
        # GUI should be updated in main thread. That's why wex are calling advance method here
        if self.parent.progress_bar:
            if finish:
                self.parent.progressBarFinished()
            else:
                self.parent.progress_bar.advance()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._end_task)
            self._task = None

    def score_genes(self, **kwargs):
        """ Run gene enrichment.

        :param design:
        :param data_x:
        :param rows_by_cluster:
        :param method:


        Note:
            We do not apply filter nor notify view that data is changed. This is done after filters
        """
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        progress_advance = methodinvoke(self, "progress_advance", (bool,))

        def callback():
            if self._task.cancelled:
                raise KeyboardInterrupt()
            progress_advance(self._task.cancelled)

        self.parent.progress_bar = ProgressBar(self.parent, iterations=len(self.get_rows()))
        f = partial(self._score_genes, callback=callback, **kwargs)
        self._task = Task()
        self._task.future = self._executor.submit(f)

        self._task.watcher = FutureWatcher(self._task.future)
        self._task.watcher.done.connect(self._end_task)

    def gene_sets_enrichment(self, gs_object, gene_sets, reference_genes):
        """ Run gene sets enrichment.

        :param gs_object:
        :param gene_sets:
        :param reference_genes:

        Note:
            We do not apply filter nor notify view that data is changed. This is done after filters

        """

        for item in self.get_rows():
            genes = [gene.ncbi_id for gene in item.filtered_genes]
            item.gene_set_enrichment(gs_object, gene_sets, set(genes), reference_genes)

    def apply_gene_filters(self, p_val=None, fdr=None, count=None):
        [item.filter_enriched_genes(p_val, fdr, max_gene_count=count) for item in self.get_rows()]
        self.dataChanged.emit(self.createIndex(0, 0), self.createIndex(self.rowCount(0), 0))

    def apply_gene_sets_filters(self, p_val=None, fdr=None, count=None):
        [item.filter_gene_sets(p_val, fdr, max_set_count=count) for item in self.get_rows()]
        self.dataChanged.emit(self.createIndex(0, 0), self.createIndex(self.rowCount(0), 0))
Пример #33
0
    def __init__(self):
        super().__init__()

        # sets self.curvePoints, self.steps equidistant points from
        # 1/self.steps to 1
        self.updateCurvePoints()

        self.scoring = [("Classification Accuracy",
                         Orange.evaluation.scoring.CA),
                        ("AUC", Orange.evaluation.scoring.AUC),
                        ("Precision", Orange.evaluation.scoring.Precision),
                        ("Recall", Orange.evaluation.scoring.Recall)]
        #: input data on which to construct the learning curve
        self.data = None
        #: optional test data
        self.testdata = None
        #: A {input_id: Learner} mapping of current learners from input channel
        self.learners = OrderedDict()
        #: A {input_id: List[Results]} mapping of input id to evaluation
        #: results list, one for each curve point
        self.results = OrderedDict()
        #: A {input_id: List[float]} mapping of input id to learning curve
        #: point scores
        self.curves = OrderedDict()

        # [start-snippet-3]
        #: The current evaluating task (if any)
        self._task = None  # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()
        # [end-snippet-3]

        # GUI
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoa = gui.widgetLabel(box, 'No data on input.')
        self.infob = gui.widgetLabel(box, 'No learners.')

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Evaluation Scores")
        gui.comboBox(box,
                     self,
                     "scoringF",
                     items=[x[0] for x in self.scoring],
                     callback=self._invalidate_curves)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Options")
        gui.spin(box,
                 self,
                 'folds',
                 2,
                 100,
                 step=1,
                 label='Cross validation folds:  ',
                 keyboardTracking=False,
                 callback=lambda: self._invalidate_results()
                 if self.commitOnChange else None)
        gui.spin(box,
                 self,
                 'steps',
                 2,
                 100,
                 step=1,
                 label='Learning curve points:  ',
                 keyboardTracking=False,
                 callback=[
                     self.updateCurvePoints,
                     lambda: self._invalidate_results()
                     if self.commitOnChange else None
                 ])
        gui.checkBox(box, self, 'commitOnChange',
                     'Apply setting on any change')
        self.commitBtn = gui.button(box,
                                    self,
                                    "Apply Setting",
                                    callback=self._invalidate_results,
                                    disabled=True)

        gui.rubber(self.controlArea)

        # table widget
        self.table = gui.table(self.mainArea,
                               selectionMode=QTableWidget.NoSelection)
Пример #34
0
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = "Access to Gene Expression Omnibus data sets."
    icon = "icons/OWGEODatasets.svg"
    priority = 2

    inputs = []
    outputs = [("Expression Data", Table)]

    settingsList = [
        "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings",
        "currentGds", "autoCommit", "datasetNames"
    ]

    outputRows = Setting(True)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01'
    ))

    autoCommit = Setting(False)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box,
                              self,
                              "outputRows",
                              ["Genes in rows", "Samples in rows"],
                              "Rows",
                              callback=self.commitIf)

        gui.checkBox(box,
                     self,
                     "mergeSpots",
                     "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box,
            self,
            "datasetName",
            "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited)
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(box,
                                           self,
                                           "Commit",
                                           callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea,
                            self,
                            "autoCommit",
                            "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(textChanged=self.filter)
        self.completer = TokenListCompleter(self,
                                            caseSensitivity=Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = gui.LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection)
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"])
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged)
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = [
            "dataset_id", "title", "platform_organism", "description"
        ]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float, )))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(gds[key] for gds in self.gds
                                for key in self.searchKeys)
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.setTokenList(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, proxy.data(proxy.index(i, 1), Qt.DisplayRole))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows)
                self.treeWidget.scrollTo(current_index,
                                         QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable
                                | Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked))
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = self.filterLineEdit.text()
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    @Slot(int, int)
    def progressCompleted(self, value, total):
        if total > 0:
            self.progressBarSet(100. * value / total, processEvents=False)
        else:
            pass
            # TODO: report 'indeterminate progress'

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit(processEvents=None)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            progress = methodinvoke(self, "progressCompleted", (int, int))

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds_ensure_downloaded(gds_id, progress)
                gds = GDS(gds_id)
                data = gds.get_data(report_genes=report_genes,
                                    transpose=transpose,
                                    sample_type=sample_type)
                data.name = title
                return data

            get_data = partial(get_data,
                               self.currentGds["dataset_id"],
                               report_genes=self.mergeSpots,
                               transpose=self.outputRows,
                               sample_type=sample_type,
                               title=self.datasetName
                               or self.currentGds["title"])
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)
        self.progressBarFinished(processEvents=False)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, ("Error while connecting to the NCBI ftp server! "
                           "'%s'" % error))
            sys.excepthook(type(error), error, getattr(error, "__traceback__"))
            return
        finally:
            self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:

            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out

            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Domain([
                attr for attr in data.domain.attributes
                if samples.issuperset(attr.attributes.items())
            ], data.domain.class_var, data.domain.metas)
            #             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes)
            data = Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, TAX_ID, self.currentGds.get("taxid", ""))
        data_hints.set_hint(data, GENE_NAME, bool(self.outputRows))

        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0), " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def send_report(self):
        self.report_items("GEO Dataset",
                          [("ID", self.currentGds['dataset_id']),
                           ("Title", self.currentGds['title']),
                           ("Organism", self.currentGds['sample_organism'])])
        self.report_items("Data",
                          [("Samples", self.currentGds['sample_count']),
                           ("Features", self.currentGds['feature_count']),
                           ("Genes", self.currentGds['gene_count'])])
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.currentGds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for type in subsets:
            self.report_html += "<b>" + type + ":</b></br>"
            for desc, count in subsets[type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = self.nameEdit.text()
            self.commitIf()
Пример #35
0
    def __init__(self):
        self._task = None  # type: Optional[self.Task]
        self._executor = ThreadExecutor(self)

        self.data = None
        self.test_type = ''

        self.discrete_model = DomainModel(separators=False,
                                          valid_types=(DiscreteVariable, ),
                                          parent=self)
        self.domain_model = DomainModel(valid_types=DomainModel.PRIMITIVE,
                                        parent=self)

        box = gui.vBox(self.controlArea, 'Hypotheses Testing')
        gui.listView(
            box,
            self,
            'chosen_X',
            model=self.discrete_model,
            box='Grouping Variables',
            selectionMode=QListView.ExtendedSelection,
            callback=self.Error.no_vars_selected.clear,
            toolTip='Select multiple variables with Ctrl+ or Shift+Click.')
        target = gui.comboBox(
            box,
            self,
            'chosen_y',
            sendSelectedValue=True,
            label='Test Variable',
            callback=[self.set_test_type, self.Error.no_class_selected.clear])
        target.setModel(self.domain_model)

        gui.checkBox(box,
                     self,
                     'is_permutation',
                     label='Permutation test',
                     callback=self.set_test_type)
        gui.comboBox(box,
                     self,
                     'test_statistic',
                     label='Statistic:',
                     items=tuple(self.TEST_STATISTICS),
                     orientation=Qt.Horizontal,
                     sendSelectedValue=True,
                     callback=self.set_test_type)
        gui.label(box, self, 'Test: %(test_type)s')

        box = gui.vBox(self.controlArea, 'Filter')
        gui.spin(box,
                 self,
                 'min_count',
                 5,
                 1000,
                 5,
                 label='Minimum group size (count):')

        self.btn_compute = gui.button(self.controlArea,
                                      self,
                                      '&Compute',
                                      callback=self.compute)
        gui.rubber(self.controlArea)

        class Model(PyTableModel):
            _n_vars = 0
            _BACKGROUND = [QBrush(QColor('#eee')), QBrush(QColor('#ddd'))]

            def setHorizontalHeaderLabels(self, labels, n_vars):
                self._n_vars = n_vars
                super().setHorizontalHeaderLabels(labels)

            def data(self, index, role=Qt.DisplayRole):
                if role == Qt.BackgroundRole and index.column() < self._n_vars:
                    return self._BACKGROUND[index.row() % 2]
                if role == Qt.DisplayRole or role == Qt.ToolTipRole:
                    colname = self.headerData(index.column(), Qt.Horizontal)
                    if colname.lower() in ('count', 'count | class'):
                        row = self.mapToSourceRows(index.row())
                        return int(self[row][index.column()])
                return super().data(index, role)

        owwidget = self

        class View(gui.TableView):
            _vars = None

            def set_vars(self, vars):
                self._vars = vars

            def selectionChanged(self, *args):
                super().selectionChanged(*args)

                rows = list({
                    index.row()
                    for index in self.selectionModel().selectedRows(0)
                })

                if not rows:
                    owwidget.Outputs.data.send(None)
                    return

                model = self.model().tolist()
                filters = [
                    Values([
                        FilterDiscrete(self._vars[col], {model[row][col]})
                        for col in range(len(self._vars))
                    ]) for row in self.model().mapToSourceRows(rows)
                ]
                data = Values(filters, conjunction=False)(owwidget.data)

                annotated = create_annotated_table(owwidget.data, data.ids)

                owwidget.Outputs.selected_data.send(data)
                owwidget.Outputs.data.send(annotated)

        self.view = view = View(self)
        self.model = Model(parent=self)
        view.setModel(self.model)
        view.horizontalHeader().setStretchLastSection(False)
        self.mainArea.layout().addWidget(view)

        self.set_test_type()
Пример #36
0
class OWNNLearner(OWBaseLearner):
    name = "Neural Network"
    description = "A multi-layer perceptron (MLP) algorithm with " \
                  "backpropagation."
    icon = "icons/NN.svg"
    priority = 90

    LEARNER = NNLearner

    activation = ["identity", "logistic", "tanh", "relu"]
    act_lbl = ["Identity", "Logistic", "tanh", "ReLu"]
    solver = ["lbfgs", "sgd", "adam"]
    solv_lbl = ["L-BFGS-B", "SGD", "Adam"]

    learner_name = Setting("Neural Network")
    hidden_layers_input = Setting("100,")
    activation_index = Setting(3)
    solver_index = Setting(2)
    alpha = Setting(0.0001)
    max_iterations = Setting(200)

    def add_main_layout(self):
        box = gui.vBox(self.controlArea, "Network")
        self.hidden_layers_edit = gui.lineEdit(
            box, self, "hidden_layers_input", label="Neurons per hidden layer:",
            orientation=Qt.Horizontal, callback=self.settings_changed,
            tooltip="A list of integers defining neurons. Length of list "
                    "defines the number of layers. E.g. 4, 2, 2, 3.",
            placeholderText="e.g. 100,")
        self.activation_combo = gui.comboBox(
            box, self, "activation_index", orientation=Qt.Horizontal,
            label="Activation:", items=[i for i in self.act_lbl],
            callback=self.settings_changed)
        self.solver_combo = gui.comboBox(
            box, self, "solver_index", orientation=Qt.Horizontal,
            label="Solver:", items=[i for i in self.solv_lbl],
            callback=self.settings_changed)
        self.alpha_spin = gui.doubleSpin(
            box, self, "alpha", 1e-5, 1.0, 1e-2,
            label="Alpha:", decimals=5, alignment=Qt.AlignRight,
            callback=self.settings_changed, controlWidth=80)
        self.max_iter_spin = gui.spin(
            box, self, "max_iterations", 10, 10000, step=10,
            label="Max iterations:", orientation=Qt.Horizontal,
            alignment=Qt.AlignRight, callback=self.settings_changed,
            controlWidth=80)

    def setup_layout(self):
        super().setup_layout()

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # just a test cancel button
        gui.button(self.controlArea, self, "Cancel", callback=self.cancel)

    def create_learner(self):
        return self.LEARNER(
            hidden_layer_sizes=self.get_hidden_layers(),
            activation=self.activation[self.activation_index],
            solver=self.solver[self.solver_index],
            alpha=self.alpha,
            max_iter=self.max_iterations,
            preprocessors=self.preprocessors)

    def get_learner_parameters(self):
        return (("Hidden layers", ', '.join(map(str, self.get_hidden_layers()))),
                ("Activation", self.act_lbl[self.activation_index]),
                ("Solver", self.solv_lbl[self.solver_index]),
                ("Alpha", self.alpha),
                ("Max iterations", self.max_iterations))

    def get_hidden_layers(self):
        layers = tuple(map(int, re.findall(r'\d+', self.hidden_layers_input)))
        if not layers:
            layers = (100,)
            self.hidden_layers_edit.setText("100,")
        return layers

    def update_model(self):
        self.show_fitting_failed(None)
        self.model = None
        if self.check_data():
            self.__update()
        else:
            self.Outputs.model.send(self.model)

    @Slot(float)
    def setProgressValue(self, value):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(value)

    def __update(self):
        if self._task is not None:
            # First make sure any pending tasks are cancelled.
            self.cancel()
        assert self._task is None

        max_iter = self.learner.kwargs["max_iter"]

        # Setup the task state
        task = Task()
        lastemitted = 0.

        def callback(iteration):
            nonlocal task  # type: Task
            nonlocal lastemitted
            if task.isInterruptionRequested():
                raise CancelTaskException()
            progress = round(iteration / max_iter * 100)
            if progress != lastemitted:
                task.emitProgressUpdate(progress)
                lastemitted = progress

        # copy to set the callback so that the learner output is not modified
        # (currently we can not pass callbacks to learners __call__)
        learner = copy.copy(self.learner)
        learner.callback = callback

        def build_model(data, learner):
            try:
                return learner(data)
            except CancelTaskException:
                return None

        build_model_func = partial(build_model, self.data, learner)

        task.setFuture(self._executor.submit(build_model_func))
        task.done.connect(self._task_finished)
        task.progressChanged.connect(self.setProgressValue)

        self._task = task
        self.progressBarInit()
        self.setBlocking(True)

    @Slot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters
        ----------
        f : Future
            The future instance holding the built model
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()
        self._task.deleteLater()
        self._task = None
        self.setBlocking(False)
        self.progressBarFinished()

        try:
            self.model = f.result()
        except Exception as ex:  # pylint: disable=broad-except
            # Log the exception with a traceback
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.model = None
            self.show_fitting_failed(ex)
        else:
            self.model.name = self.learner_name
            self.model.instances = self.data
            self.Outputs.model.send(self.model)

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect from the task
            self._task.done.disconnect(self._task_finished)
            self._task.progressChanged.disconnect(self.setProgressValue)
            self._task.deleteLater()
            self._task = None

        self.progressBarFinished()
        self.setBlocking(False)

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #37
0
class OWKMeans(widget.OWWidget):
    name = "k-Means"
    description = "k-Means clustering algorithm with silhouette-based " \
                  "quality estimation."
    icon = "icons/KMeans.svg"
    priority = 2100
    keywords = ["kmeans", "clustering"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        annotated_data = Output(
            ANNOTATED_DATA_SIGNAL_NAME, Table, default=True,
            replaces=["Annotated Data"]
        )
        centroids = Output("Centroids", Table)

    class Error(widget.OWWidget.Error):
        failed = widget.Msg("Clustering failed\nError: {}")
        not_enough_data = widget.Msg(
            "Too few ({}) unique data instances for {} clusters"
        )
        no_attributes = widget.Msg("Data is missing features.")

    class Warning(widget.OWWidget.Warning):
        no_silhouettes = widget.Msg(
            "Silhouette scores are not computed for >{} samples".format(
                SILHOUETTE_MAX_SAMPLES)
        )
        not_enough_data = widget.Msg(
            "Too few ({}) unique data instances for {} clusters"
        )

    INIT_METHODS = (("Initialize with KMeans++", "k-means++"),
                    ("Random initialization", "random"))

    resizing_enabled = False
    buttons_area_orientation = Qt.Vertical

    k = Setting(3)
    k_from = Setting(2)
    k_to = Setting(8)
    optimize_k = Setting(False)
    max_iterations = Setting(300)
    n_init = Setting(10)
    smart_init = Setting(0)  # KMeans++
    auto_commit = Setting(True)

    settings_version = 2

    @classmethod
    def migrate_settings(cls, settings, version):
        # type: (Dict, int) -> None
        if version < 2:
            if 'auto_apply' in settings:
                settings['auto_commit'] = settings.get('auto_apply', True)
                settings.pop('auto_apply', None)

    def __init__(self):
        super().__init__()

        self.data = None  # type: Optional[Table]
        self.clusterings = {}

        self.__executor = ThreadExecutor(parent=self)
        self.__task = None  # type: Optional[Task]

        layout = QGridLayout()
        bg = gui.radioButtonsInBox(
            self.controlArea, self, "optimize_k", orientation=layout,
            box="Number of Clusters", callback=self.update_method,
        )

        layout.addWidget(
            gui.appendRadioButton(bg, "Fixed:", addToLayout=False), 1, 1)
        sb = gui.hBox(None, margin=0)
        gui.spin(
            sb, self, "k", minv=2, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight, callback=self.update_k)
        gui.rubber(sb)
        layout.addWidget(sb, 1, 2)

        layout.addWidget(
            gui.appendRadioButton(bg, "From", addToLayout=False), 2, 1)
        ftobox = gui.hBox(None)
        ftobox.layout().setContentsMargins(0, 0, 0, 0)
        layout.addWidget(ftobox, 2, 2)
        gui.spin(
            ftobox, self, "k_from", minv=2, maxv=29,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_from)
        gui.widgetLabel(ftobox, "to")
        gui.spin(
            ftobox, self, "k_to", minv=3, maxv=30,
            controlWidth=60, alignment=Qt.AlignRight,
            callback=self.update_to)
        gui.rubber(ftobox)

        box = gui.vBox(self.controlArea, "Initialization")
        gui.comboBox(
            box, self, "smart_init", items=[m[0] for m in self.INIT_METHODS],
            callback=self.invalidate)

        layout = QGridLayout()
        gui.widgetBox(box, orientation=layout)
        layout.addWidget(gui.widgetLabel(None, "Re-runs: "), 0, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 0, 1)
        gui.lineEdit(
            sb, self, "n_init", controlWidth=60,
            valueType=int, validator=QIntValidator(), callback=self.invalidate)
        layout.addWidget(
            gui.widgetLabel(None, "Maximum iterations: "), 1, 0, Qt.AlignLeft)
        sb = gui.hBox(None, margin=0)
        layout.addWidget(sb, 1, 1)
        gui.lineEdit(
            sb, self, "max_iterations", controlWidth=60, valueType=int,
            validator=QIntValidator(), callback=self.invalidate)

        self.apply_button = gui.auto_commit(
            self.buttonsArea, self, "auto_commit", "Apply", box=None,
            commit=self.commit)
        gui.rubber(self.controlArea)

        box = gui.vBox(self.mainArea, box="Silhouette Scores")
        self.mainArea.setVisible(self.optimize_k)
        self.table_model = ClusterTableModel(self)
        table = self.table_view = QTableView(self.mainArea)
        table.setModel(self.table_model)
        table.setSelectionMode(QTableView.SingleSelection)
        table.setSelectionBehavior(QTableView.SelectRows)
        table.setItemDelegate(gui.ColoredBarItemDelegate(self, color=Qt.cyan))
        table.selectionModel().selectionChanged.connect(self.select_row)
        table.setMaximumWidth(200)
        table.horizontalHeader().setStretchLastSection(True)
        table.horizontalHeader().hide()
        table.setShowGrid(False)
        box.layout().addWidget(table)

    def adjustSize(self):
        self.ensurePolished()
        s = self.sizeHint()
        self.resize(s)

    def update_method(self):
        self.table_model.clear_scores()
        self.commit()

    def update_k(self):
        self.optimize_k = False
        self.table_model.clear_scores()
        self.commit()

    def update_from(self):
        self.k_to = max(self.k_from + 1, self.k_to)
        self.optimize_k = True
        self.table_model.clear_scores()
        self.commit()

    def update_to(self):
        self.k_from = min(self.k_from, self.k_to - 1)
        self.optimize_k = True
        self.table_model.clear_scores()
        self.commit()

    def enough_data_instances(self, k):
        """k cannot be larger than the number of data instances."""
        return len(self.data) >= k

    @property
    def has_attributes(self):
        return len(self.data.domain.attributes)

    @staticmethod
    def _compute_clustering(data, k, init, n_init, max_iter, silhouette, random_state):
        # type: (Table, int, str, int, int, bool) -> KMeansModel
        if k > len(data):
            raise NotEnoughData()

        return KMeans(
            n_clusters=k, init=init, n_init=n_init, max_iter=max_iter,
            compute_silhouette_score=silhouette, random_state=random_state,
        )(data)

    @Slot(int, int)
    def __progress_changed(self, n, d):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None
        self.progressBarSet(100 * n / d)

    @Slot(int, Exception)
    def __on_exception(self, idx, ex):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None

        if isinstance(ex, NotEnoughData):
            self.Error.not_enough_data(len(self.data), self.k_from + idx)

        # Only show failed message if there is only 1 k to compute
        elif not self.optimize_k:
            self.Error.failed(str(ex))

        self.clusterings[self.k_from + idx] = str(ex)

    @Slot(int, object)
    def __clustering_complete(self, _, result):
        # type: (int, KMeansModel) -> None
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None

        self.clusterings[result.k] = result

    @Slot()
    def __commit_finished(self):
        assert QThread.currentThread() is self.thread()
        assert self.__task is not None
        assert self.data is not None

        self.__task = None
        self.setBlocking(False)
        self.progressBarFinished()

        if self.optimize_k:
            self.update_results()

        if self.optimize_k and all(isinstance(self.clusterings[i], str)
                                   for i in range(self.k_from, self.k_to + 1)):
            # Show the error of the last clustering
            self.Error.failed(self.clusterings[self.k_to])

        self.send_data()

    def __launch_tasks(self, ks):
        # type: (List[int]) -> None
        """Execute clustering in separate threads for all given ks."""
        futures = [self.__executor.submit(
            self._compute_clustering,
            data=self.data,
            k=k,
            init=self.INIT_METHODS[self.smart_init][1],
            n_init=self.n_init,
            max_iter=self.max_iterations,
            silhouette=True,
            random_state=RANDOM_STATE,
        ) for k in ks]
        watcher = FutureSetWatcher(futures)
        watcher.resultReadyAt.connect(self.__clustering_complete)
        watcher.progressChanged.connect(self.__progress_changed)
        watcher.exceptionReadyAt.connect(self.__on_exception)
        watcher.doneAll.connect(self.__commit_finished)

        self.__task = Task(futures, watcher)
        self.progressBarInit(processEvents=False)
        self.setBlocking(True)

    def cancel(self):
        if self.__task is not None:
            task, self.__task = self.__task, None
            task.cancel()

            task.watcher.resultReadyAt.disconnect(self.__clustering_complete)
            task.watcher.progressChanged.disconnect(self.__progress_changed)
            task.watcher.exceptionReadyAt.disconnect(self.__on_exception)
            task.watcher.doneAll.disconnect(self.__commit_finished)

            self.progressBarFinished()
            self.setBlocking(False)

    def run_optimization(self):
        if not self.enough_data_instances(self.k_from):
            self.Error.not_enough_data(len(self.data), self.k_from)
            return

        if not self.enough_data_instances(self.k_to):
            self.Warning.not_enough_data(len(self.data), self.k_to)
            return

        needed_ks = [k for k in range(self.k_from, self.k_to + 1)
                     if k not in self.clusterings]

        if needed_ks:
            self.__launch_tasks(needed_ks)
        else:
            # If we don't need to recompute anything, just set the results to
            # what they were before
            self.update_results()

    def cluster(self):
        # Check if the k already has a computed clustering
        if self.k in self.clusterings:
            self.send_data()
            return

        # Check if there is enough data
        if not self.enough_data_instances(self.k):
            self.Error.not_enough_data(len(self.data), self.k)
            return

        self.__launch_tasks([self.k])

    def commit(self):
        self.cancel()
        self.clear_messages()

        # Some time may pass before the new scores are computed, so clear the
        # old scores to avoid potential confusion. Hiding the mainArea could
        # cause flickering when the clusters are computed quickly, so this is
        # the better alternative
        self.table_model.clear_scores()
        self.mainArea.setVisible(self.optimize_k and self.data is not None and
                                 self.has_attributes)

        if self.data is None:
            self.send_data()
            return

        if not self.has_attributes:
            self.Error.no_attributes()
            self.send_data()
            return

        if self.optimize_k:
            self.run_optimization()
        else:
            self.cluster()

        QTimer.singleShot(100, self.adjustSize)

    def invalidate(self):
        self.cancel()
        self.Error.clear()
        self.Warning.clear()
        self.clusterings = {}
        self.table_model.clear_scores()

        self.commit()

    def update_results(self):
        scores = [
            mk if isinstance(mk, str) else mk.silhouette for mk in (
                self.clusterings[k] for k in range(self.k_from, self.k_to + 1))
        ]
        best_row = max(
            range(len(scores)), default=0,
            key=lambda x: 0 if isinstance(scores[x], str) else scores[x]
        )
        self.table_model.set_scores(scores, self.k_from)
        self.table_view.selectRow(best_row)
        self.table_view.setFocus(Qt.OtherFocusReason)
        self.table_view.resizeRowsToContents()

    def selected_row(self):
        indices = self.table_view.selectedIndexes()
        if indices:
            return indices[0].row()

    def select_row(self):
        self.send_data()

    def send_data(self):
        if self.optimize_k:
            row = self.selected_row()
            k = self.k_from + row if row is not None else None
        else:
            k = self.k

        km = self.clusterings.get(k)
        if self.data is None or km is None or isinstance(km, str):
            self.Outputs.annotated_data.send(None)
            self.Outputs.centroids.send(None)
            return

        domain = self.data.domain
        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=["C%d" % (x + 1) for x in range(km.k)]
        )
        clust_ids = km(self.data)
        silhouette_var = ContinuousVariable(
            get_unique_names(domain, "Silhouette"))
        if km.silhouette_samples is not None:
            self.Warning.no_silhouettes.clear()
            scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
        else:
            self.Warning.no_silhouettes()
            scores = np.nan

        new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = clust_ids.X.ravel()
        new_table.get_column_view(silhouette_var)[0][:] = scores

        centroids = Table(Domain(km.pre_domain.attributes), km.centroids)

        self.Outputs.annotated_data.send(new_table)
        self.Outputs.centroids.send(centroids)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        self.data, old_data = data, self.data

        # Do not needlessly recluster the data if X hasn't changed
        if old_data and self.data and np.array_equal(self.data.X, old_data.X):
            if self.auto_commit:
                self.send_data()
        else:
            self.invalidate()

    def send_report(self):
        # False positives (Setting is not recognized as int)
        # pylint: disable=invalid-sequence-index
        if self.optimize_k and self.selected_row() is not None:
            k_clusters = self.k_from + self.selected_row()
        else:
            k_clusters = self.k
        init_method = self.INIT_METHODS[self.smart_init][0]
        init_method = init_method[0].lower() + init_method[1:]
        self.report_items((
            ("Number of clusters", k_clusters),
            ("Optimization", "{}, {} re-runs limited to {} steps".format(
                init_method, self.n_init, self.max_iterations))))
        if self.data is not None:
            self.report_data("Data", self.data)
            if self.optimize_k:
                self.report_table(
                    "Silhouette scores for different numbers of clusters",
                    self.table_view)

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #38
0
class OWGeneNetwork(widget.OWWidget):
    name = "Gene Network"
    description = "Extract a gene network for a set of genes."
    icon = "../widgets/icons/GeneNetwork.svg"

    inputs = [("Data", Orange.data.Table, "set_data")]
    outputs = [("Network", network.Graph)]

    settingsHandler = settings.DomainContextHandler()

    taxid = settings.Setting("9606")
    gene_var_index = settings.ContextSetting(-1)
    use_attr_names = settings.ContextSetting(False)

    network_source = settings.Setting(1)
    include_neighborhood = settings.Setting(True)
    min_score = settings.Setting(0.9)

    want_main_area = False

    def __init__(self, parent=None):
        super().__init__(parent)

        self.taxids = taxonomy.common_taxids()
        self.current_taxid_index = self.taxids.index(self.taxid)

        self.data = None
        self.geneinfo = None
        self.nettask = None
        self._invalidated = False

        box = gui.widgetBox(self.controlArea, "Info")
        self.info = gui.widgetLabel(box, "No data on input\n")

        box = gui.widgetBox(self.controlArea, "Organism")
        self.organism_cb = gui.comboBox(
            box, self, "current_taxid_index",
            items=map(taxonomy.name, self.taxids),
            callback=self._update_organism
        )
        box = gui.widgetBox(self.controlArea, "Genes")
        self.genes_cb = gui.comboBox(
            box, self, "gene_var_index", callback=self._update_query_genes
        )
        self.varmodel = itemmodels.VariableListModel()
        self.genes_cb.setModel(self.varmodel)

        gui.checkBox(
            box, self, "use_attr_names",
            "Use attribute names",
            callback=self._update_query_genes
        )

        box = gui.widgetBox(self.controlArea, "Network")
        gui.comboBox(
            box, self, "network_source",
            items=[s.name for s in SOURCES],
            callback=self._on_source_db_changed
        )
        gui.checkBox(
            box, self, "include_neighborhood",
            "Include immediate gene neighbors",
            callback=self.invalidate
        )
        self.score_spin = gui.doubleSpin(
            box, self, "min_score", 0.0, 1.0, step=0.001,
            label="Minimal edge score",
            callback=self.invalidate
        )
        self.score_spin.setEnabled(SOURCES[self.network_source].score_filter)

        box = gui.widgetBox(self.controlArea, "Commit")
        gui.button(box, self, "Retrieve", callback=self.commit, default=True)

        self.setSizePolicy(QtGui.QSizePolicy.Fixed, QtGui.QSizePolicy.Fixed)
        self.layout().setSizeConstraint(QtGui.QLayout.SetFixedSize)

        self.executor = ThreadExecutor()

    def set_data(self, data):
        self.closeContext()
        self.data = data
        if data is not None:
            self.varmodel[:] = string_variables(data.domain)
            taxid = data_hints.get_hint(data, "taxid", default=self.taxid)

            if taxid in self.taxids:
                self.set_organism(self.taxids.index(taxid))

            self.use_attr_names = data_hints.get_hint(
                data, "genesinrows", default=self.use_attr_names
            )

            if not (0 <= self.gene_var_index < len(self.varmodel)):
                self.gene_var_index = len(self.varmodel) - 1

            self.openContext(data)
            self.invalidate()
            self.commit()
        else:
            self.varmodel[:] = []
            self.send("Network", None)

    def set_source_db(self, dbindex):
        self.network_source = dbindex
        self.invalidate()

    def set_organism(self, index):
        self.current_taxid_index = index
        self.taxid = self.taxids[index]
        self.invalidate()

    def set_gene_var(self, index):
        self.gene_var_index = index
        self.invalidate()

    def query_genes(self):
        if self.use_attr_names:
            if self.data is not None:
                return [var.name for var in self.data.domain.attributes]
            else:
                return []
        elif self.gene_var_index >= 0:
            var = self.varmodel[self.gene_var_index]
            genes = [str(inst[var]) for inst in self.data
                     if not compat.isunknown(inst[var])]
            return list(unique(genes))
        else:
            return []

    def invalidate(self):
        self._invalidated = True

        if self.nettask is not None:
            self.nettask.finished.disconnect(self._on_result_ready)
            self.nettask.future().cancel()
            self.nettask = None

    @Slot()
    def advance(self):
        self.progressBarValue = (self.progressBarValue + 1) % 100

    @Slot(float)
    def set_progress(self, value):
        self.progressBarSet(value, processEvents=None)

    def commit(self):
        include_neighborhood = self.include_neighborhood
        query_genes = self.query_genes()
        source = SOURCES[self.network_source]
        if source.score_filter:
            min_score = self.min_score
            assert source.name == "STRING"
            min_score = min_score * 1000
        else:
            min_score = None

        taxid = self.taxid
        progress = methodinvoke(self, "advance")
        if self.geneinfo is None:
            self.geneinfo = self.executor.submit(
                fetch_ncbi_geneinfo, taxid, progress
            )

        geneinfo_f = self.geneinfo
        taxmap = source.tax_mapping
        db_taxid = taxmap.get(taxid, taxid)

        if db_taxid is None:
            raise ValueError("invalid taxid for this network")

        def fetch_network():
            geneinfo = geneinfo_f.result()
            ppidb = fetch_ppidb(source, db_taxid, progress)
            return get_gene_network(ppidb, geneinfo, db_taxid, query_genes,
                                    include_neighborhood=include_neighborhood,
                                    min_score=min_score,
                                    progress=methodinvoke(self, "set_progress", (float,)))

        self.nettask = Task(function=fetch_network)
        self.nettask.finished.connect(self._on_result_ready)

        self.executor.submit(self.nettask)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()
        self._invalidated = False
        self._update_info()

    @Slot(object)
    def _on_result_ready(self,):
        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)
        net = self.nettask.result()
        self._update_info()
        self.send("Network", net)

    def _on_source_db_changed(self):
        source = SOURCES[self.network_source]
        self.score_spin.setEnabled(source.score_filter)
        self.invalidate()

    def _update_organism(self):
        self.taxid = self.taxids[self.current_taxid_index]
        if self.geneinfo is not None:
            self.geneinfo.cancel()
        self.geneinfo = None
        self.invalidate()

    def _update_query_genes(self):
        self.invalidate()

    def _update_info(self):
        if self.data is None:
            self.info.setText("No data on input\n")
        else:
            names = self.query_genes()
            lines = ["%i unique genes on input" % len(set(names))]
            if self.nettask is not None:
                if not self.nettask.future().done():
                    lines.append("Retrieving ...")
                else:
                    net = self.nettask.result()
                    lines.append("%i nodes %i edges" %
                                 (len(net.nodes()), len(net.edges())))
            else:
                lines.append("")

            self.info.setText("\n".join(lines))
Пример #39
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_folds", label="Number of folds: ",
            items=[str(x) for x in self.NFolds], maximumContentsLength=3,
            orientation=Qt.Horizontal, callback=self.kfold_changed)
        gui.checkBox(
            ibox, self, "cv_stratified", "Stratified",
            callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(
            order=DomainModel.METAS, valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(
            ibox, self, "fold_feature", model=self.feature_model,
            orientation=Qt.Horizontal, callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox, self, "n_repeats", label="Repeat train/test: ",
            items=[str(x) for x in self.NRepeats], maximumContentsLength=3,
            orientation=Qt.Horizontal, callback=self.shuffle_split_changed)
        gui.comboBox(
            ibox, self, "sample_size", label="Training set size: ",
            items=["{} %".format(x) for x in self.SampleSizes],
            maximumContentsLength=5, orientation=Qt.Horizontal,
            callback=self.shuffle_split_changed)
        gui.checkBox(
            ibox, self, "shuffle_stratified", "Stratified",
            callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = gui.TableView(
            wordWrap=True,
        )
        header = self.view.horizontalHeader()
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)
        header.setContextMenuPolicy(Qt.CustomContextMenu)
        header.customContextMenuRequested.connect(self.show_column_chooser)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)
    def __init__(self):
        super().__init__()
        #: widget's runtime state
        self.__state = State.NoState
        self._imageMeta = []
        self._imageCategories = {}

        self.__invalidated = False
        self.__pendingTask = None

        vbox = gui.vBox(self.controlArea)
        hbox = gui.hBox(vbox)
        self.recent_cb = QComboBox(
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
            minimumContentsLength=16,
        )
        self.recent_cb.activated[int].connect(self.__onRecentActivated)
        icons = standard_icons(self)

        browseaction = QAction(
            "Open/Load Images", self,
            iconText="\N{HORIZONTAL ELLIPSIS}",
            icon=icons.dir_open_icon,
            toolTip="Select a directory from which to load the images"
        )
        browseaction.triggered.connect(self.__runOpenDialog)
        reloadaction = QAction(
            "Reload", self,
            icon=icons.reload_icon,
            toolTip="Reload current image set"
        )
        reloadaction.triggered.connect(self.reload)
        self.__actions = namespace(
            browse=browseaction,
            reload=reloadaction,
        )

        browsebutton = QPushButton(
            browseaction.iconText(),
            icon=browseaction.icon(),
            toolTip=browseaction.toolTip(),
            clicked=browseaction.trigger
        )
        reloadbutton = QPushButton(
            reloadaction.iconText(),
            icon=reloadaction.icon(),
            clicked=reloadaction.trigger,
            default=True,
        )

        hbox.layout().addWidget(self.recent_cb)
        hbox.layout().addWidget(browsebutton)
        hbox.layout().addWidget(reloadbutton)

        self.addActions([browseaction, reloadaction])

        reloadaction.changed.connect(
            lambda: reloadbutton.setEnabled(reloadaction.isEnabled())
        )
        box = gui.vBox(vbox, "Info")
        self.infostack = QStackedWidget()

        self.info_area = QLabel(
            text="No image set selected",
            wordWrap=True
        )
        self.progress_widget = QProgressBar(
            minimum=0, maximum=0
        )
        self.cancel_button = QPushButton(
            "Cancel", icon=icons.cancel_icon,
        )
        self.cancel_button.clicked.connect(self.cancel)

        w = QWidget()
        vlayout = QVBoxLayout()
        vlayout.setContentsMargins(0, 0, 0, 0)
        hlayout = QHBoxLayout()
        hlayout.setContentsMargins(0, 0, 0, 0)

        hlayout.addWidget(self.progress_widget)
        hlayout.addWidget(self.cancel_button)
        vlayout.addLayout(hlayout)

        self.pathlabel = TextLabel()
        self.pathlabel.setTextElideMode(Qt.ElideMiddle)
        self.pathlabel.setAttribute(Qt.WA_MacSmallSize)

        vlayout.addWidget(self.pathlabel)
        w.setLayout(vlayout)

        self.infostack.addWidget(self.info_area)
        self.infostack.addWidget(w)

        box.layout().addWidget(self.infostack)

        self.__initRecentItemsModel()
        self.__invalidated = True
        self.__executor = ThreadExecutor(self)

        QApplication.postEvent(self, QEvent(RuntimeEvent.Init))
Пример #41
0
    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box,
                              self,
                              "outputRows",
                              ["Genes in rows", "Samples in rows"],
                              "Rows",
                              callback=self.commitIf)

        gui.checkBox(box,
                     self,
                     "mergeSpots",
                     "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box,
            self,
            "datasetName",
            "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited)
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(box,
                                           self,
                                           "Commit",
                                           callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea,
                            self,
                            "autoCommit",
                            "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(textChanged=self.filter)
        self.completer = TokenListCompleter(self,
                                            caseSensitivity=Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = gui.LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection)
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"])
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged)
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = [
            "dataset_id", "title", "platform_organism", "description"
        ]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float, )))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None
Пример #42
0
    def __init__(self, parent=None, ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n"
        )

        self.organisms = None
        self.organismBox = gui.widgetBox(
            self.controlArea, "Organism", addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox, self, "organism_index",
            callback=self._onSelectedOrganismChanged)

        # For now only support one alt source, with a checkbox
        # In the future this can be extended to multiple selections
        self.altSourceCheck = gui.checkBox(
            self.organismBox, self, "useAltSource",
            "Show information from dictyBase",
            callback=self.onAltSourceChange)

        self.altSourceCheck.hide()

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(
            box, self, "gene_attr",
            "Gene attribute", callback=self.updateInfoItems
        )
        self.geneAttrComboBox.setEnabled(not self.useAttr)
        cb = gui.checkBox(box, self, "useAttr", "Use attribute names",
                          callback=self.updateInfoItems)
        cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        # A label for dictyExpress link (Why oh god why???)
        self.dictyExpressBox = gui.widgetBox(
            self.controlArea, "Dicty Express")
        self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "")
        self.linkLabel.setOpenExternalLinks(False)
        self.linkLabel.linkActivated.connect(self.onDictyExpressLink)

        self.dictyExpressBox.hide()

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea, self, "search_string", "Filter",
                     callbackOnType=True, callback=self.searchUpdate)

        self.treeWidget = QTreeView(
            self.mainArea,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            uniformRowHeights=True,
            sortingEnabled=True)

        self.treeWidget.setItemDelegate(
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box, self, "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(
                taxonomy.ensure_downloaded,
                callback=methodinvoke(self, "advance", ())
            )
        )

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)
Пример #43
0
 def test_executor_map(self):
     executor = ThreadExecutor()
     r = executor.map(pow, list(range(1000)), list(range(1000)))
     results = list(r)
     self.assertTrue(len(results) == 1000)
Пример #44
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []
        self.__pending_comparison_criterion = self.comparison_criterion

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[TaskState]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            callback=self._on_target_class_changed,
            contentsLength=8)

        self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison")
        gui.comboBox(box,
                     self,
                     "comparison_criterion",
                     callback=self.update_comparison_table)

        hbox = gui.hBox(box)
        gui.checkBox(hbox,
                     self,
                     "use_rope",
                     "Negligible difference: ",
                     callback=self._on_use_rope_changed)
        gui.lineEdit(hbox,
                     self,
                     "rope",
                     validator=QDoubleValidator(),
                     controlWidth=70,
                     callback=self.update_comparison_table,
                     alignment=Qt.AlignRight)
        self.controls.rope.setEnabled(self.use_rope)

        gui.rubber(self.controlArea)
        self.score_table = ScoreTable(self)
        self.score_table.shownScoresChanged.connect(self.update_stats_model)
        view = self.score_table.view
        view.setSizeAdjustPolicy(view.AdjustToContents)

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.score_table.view)

        self.compbox = box = gui.vBox(self.mainArea, box="Model comparison")
        table = self.comparison_table = QTableWidget(
            wordWrap=False,
            editTriggers=QTableWidget.NoEditTriggers,
            selectionMode=QTableWidget.NoSelection)
        table.setSizeAdjustPolicy(table.AdjustToContents)
        header = table.verticalHeader()
        header.setSectionResizeMode(QHeaderView.Fixed)
        header.setSectionsClickable(False)

        header = table.horizontalHeader()
        header.setTextElideMode(Qt.ElideRight)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setSectionsClickable(False)
        header.setStretchLastSection(False)
        header.setSectionResizeMode(QHeaderView.ResizeToContents)
        avg_width = self.fontMetrics().averageCharWidth()
        header.setMinimumSectionSize(8 * avg_width)
        header.setMaximumSectionSize(15 * avg_width)
        header.setDefaultSectionSize(15 * avg_width)
        box.layout().addWidget(table)
        box.layout().addWidget(
            QLabel(
                "<small>Table shows probabilities that the score for the model in "
                "the row is higher than that of the model in the column. "
                "Small numbers show the probability that the difference is "
                "negligible.</small>",
                wordWrap=True))
Пример #45
0
class OWImportDocuments(widget.OWWidget):
    name = "Import Documents"
    description = "Import text documents from folders."
    icon = "icons/ImportDocuments.svg"
    priority = 110

    class Outputs:
        data = Output("Corpus", Corpus)

    #: list of recent paths
    recent_paths = settings.Setting([])  # type: List[RecentPath]
    currentPath = settings.Setting(None)

    want_main_area = False
    resizing_enabled = False

    Modality = Qt.ApplicationModal

    MaxRecentItems = 20

    class Warning(widget.OWWidget.Warning):
        read_error = widget.Msg("{} couldn't be read.")

    def __init__(self):
        super().__init__()
        #: widget's runtime state
        self.__state = State.NoState
        self.corpus = None
        self.n_text_categories = 0
        self.n_text_data = 0
        self.n_skipped = 0

        self.__invalidated = False
        self.__pendingTask = None

        vbox = gui.vBox(self.controlArea)
        hbox = gui.hBox(vbox)
        self.recent_cb = QComboBox(
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
            minimumContentsLength=16,
            acceptDrops=True)
        self.recent_cb.installEventFilter(self)
        self.recent_cb.activated[int].connect(self.__onRecentActivated)

        browseaction = QAction(
            "Open/Load Documents",
            self,
            iconText="\N{HORIZONTAL ELLIPSIS}",
            icon=self.style().standardIcon(QStyle.SP_DirOpenIcon),
            toolTip="Select a folder from which to load the documents")
        browseaction.triggered.connect(self.__runOpenDialog)
        reloadaction = QAction("Reload",
                               self,
                               icon=self.style().standardIcon(
                                   QStyle.SP_BrowserReload),
                               toolTip="Reload current document set")
        reloadaction.triggered.connect(self.reload)
        self.__actions = namespace(
            browse=browseaction,
            reload=reloadaction,
        )

        browsebutton = QPushButton(browseaction.iconText(),
                                   icon=browseaction.icon(),
                                   toolTip=browseaction.toolTip(),
                                   clicked=browseaction.trigger)
        reloadbutton = QPushButton(
            reloadaction.iconText(),
            icon=reloadaction.icon(),
            clicked=reloadaction.trigger,
            default=True,
        )

        hbox.layout().addWidget(self.recent_cb)
        hbox.layout().addWidget(browsebutton)
        hbox.layout().addWidget(reloadbutton)

        self.addActions([browseaction, reloadaction])

        reloadaction.changed.connect(
            lambda: reloadbutton.setEnabled(reloadaction.isEnabled()))
        box = gui.vBox(vbox, "Info")
        self.infostack = QStackedWidget()

        self.info_area = QLabel(text="No document set selected", wordWrap=True)
        self.progress_widget = QProgressBar(minimum=0, maximum=100)
        self.cancel_button = QPushButton(
            "Cancel",
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)

        w = QWidget()
        vlayout = QVBoxLayout()
        vlayout.setContentsMargins(0, 0, 0, 0)
        hlayout = QHBoxLayout()
        hlayout.setContentsMargins(0, 0, 0, 0)

        hlayout.addWidget(self.progress_widget)
        hlayout.addWidget(self.cancel_button)
        vlayout.addLayout(hlayout)

        self.pathlabel = TextLabel()
        self.pathlabel.setTextElideMode(Qt.ElideMiddle)
        self.pathlabel.setAttribute(Qt.WA_MacSmallSize)

        vlayout.addWidget(self.pathlabel)
        w.setLayout(vlayout)

        self.infostack.addWidget(self.info_area)
        self.infostack.addWidget(w)

        box.layout().addWidget(self.infostack)

        self.__initRecentItemsModel()
        self.__invalidated = True
        self.__executor = ThreadExecutor(self)

        QApplication.postEvent(self, QEvent(RuntimeEvent.Init))

    def __initRecentItemsModel(self):
        if self.currentPath is not None and \
                not os.path.isdir(self.currentPath):
            self.currentPath = None

        recent_paths = []
        for item in self.recent_paths:
            if os.path.isdir(item.abspath):
                recent_paths.append(item)
        recent_paths = recent_paths[:OWImportDocuments.MaxRecentItems]
        recent_model = self.recent_cb.model()
        for pathitem in recent_paths:
            item = RecentPath_asqstandarditem(pathitem)
            recent_model.appendRow(item)

        self.recent_paths = recent_paths

        if self.currentPath is not None and \
                os.path.isdir(self.currentPath) and self.recent_paths and \
                os.path.samefile(self.currentPath, self.recent_paths[0].abspath):
            self.recent_cb.setCurrentIndex(0)
        else:
            self.currentPath = None
            self.recent_cb.setCurrentIndex(-1)
        self.__actions.reload.setEnabled(self.currentPath is not None)

    def customEvent(self, event):
        """Reimplemented."""
        if event.type() == RuntimeEvent.Init:
            if self.__invalidated:
                try:
                    self.start()
                finally:
                    self.__invalidated = False

        super().customEvent(event)

    def __runOpenDialog(self):
        startdir = os.path.expanduser("~/")
        if self.recent_paths:
            startdir = os.path.dirname(self.recent_paths[0].abspath)

        caption = "Select Top Level Folder"
        if OWImportDocuments.Modality == Qt.WindowModal:
            dlg = QFileDialog(
                self,
                caption,
                startdir,
                acceptMode=QFileDialog.AcceptOpen,
                modal=True,
            )
            dlg.setFileMode(QFileDialog.Directory)
            dlg.setOption(QFileDialog.ShowDirsOnly)
            dlg.setDirectory(startdir)
            dlg.setAttribute(Qt.WA_DeleteOnClose)

            @dlg.accepted.connect
            def on_accepted():
                dirpath = dlg.selectedFiles()
                if dirpath:
                    self.setCurrentPath(dirpath[0])
                    self.start()

            dlg.open()
        else:
            dirpath = QFileDialog.getExistingDirectory(self, caption, startdir)
            if dirpath:
                self.setCurrentPath(dirpath)
                self.start()

    def __onRecentActivated(self, index):
        item = self.recent_cb.itemData(index)
        if item is None:
            return
        assert isinstance(item, RecentPath)
        self.setCurrentPath(item.abspath)
        self.start()

    def __updateInfo(self):
        if self.__state == State.NoState:
            text = "No document set selected"
        elif self.__state == State.Processing:
            text = "Processing"
        elif self.__state == State.Done:
            nvalid = self.n_text_data
            ncategories = self.n_text_categories
            n_skipped = self.n_skipped
            if ncategories < 2:
                text = "{} document{}".format(nvalid,
                                              "s" if nvalid != 1 else "")
            else:
                text = "{} documents / {} categories".format(
                    nvalid, ncategories)
            if n_skipped > 0:
                text = text + ", {} skipped".format(n_skipped)
        elif self.__state == State.Cancelled:
            text = "Cancelled"
        elif self.__state == State.Error:
            text = "Error state"
        else:
            assert False

        self.info_area.setText(text)

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

    def setCurrentPath(self, path):
        """
        Set the current root text path to path

        If the path does not exists or is not a directory the current path
        is left unchanged

        Parameters
        ----------
        path : str
            New root import path.

        Returns
        -------
        status : bool
            True if the current root import path was successfully
            changed to path.
        """
        if self.currentPath is not None and path is not None and \
                os.path.isdir(self.currentPath) and os.path.isdir(path) and \
                os.path.samefile(self.currentPath, path):
            return True

        success = True
        error = None
        if path is not None:
            if not os.path.exists(path):
                error = "'{}' does not exist".format(path)
                path = None
                success = False
            elif not os.path.isdir(path):
                error = "'{}' is not a folder".format(path)
                path = None
                success = False

        if error is not None:
            self.error(error)
            warnings.warn(error, UserWarning, stacklevel=3)
        else:
            self.error()

        if path is not None:
            newindex = self.addRecentPath(path)
            self.recent_cb.setCurrentIndex(newindex)
            if newindex >= 0:
                self.currentPath = path
            else:
                self.currentPath = None
        else:
            self.currentPath = None
        self.__actions.reload.setEnabled(self.currentPath is not None)

        if self.__state == State.Processing:
            self.cancel()

        return success

    def addRecentPath(self, path):
        """
        Prepend a path entry to the list of recent paths

        If an entry with the same path already exists in the recent path
        list it is moved to the first place

        Parameters
        ----------
        path : str
        """
        existing = None
        for pathitem in self.recent_paths:
            try:
                if os.path.samefile(pathitem.abspath, path):
                    existing = pathitem
                    break
            except FileNotFoundError:
                # file not found if the `pathitem.abspath` no longer exists
                pass

        model = self.recent_cb.model()

        if existing is not None:
            selected_index = self.recent_paths.index(existing)
            assert model.item(selected_index).data(Qt.UserRole) is existing
            self.recent_paths.remove(existing)
            row = model.takeRow(selected_index)
            self.recent_paths.insert(0, existing)
            model.insertRow(0, row)
        else:
            item = RecentPath(path, None, None)
            self.recent_paths.insert(0, item)
            model.insertRow(0, RecentPath_asqstandarditem(item))
        return 0

    def __setRuntimeState(self, state):
        assert state in State
        self.setBlocking(state == State.Processing)
        message = ""
        if state == State.Processing:
            assert self.__state in [
                State.Done, State.NoState, State.Error, State.Cancelled
            ]
            message = "Processing"
        elif state == State.Done:
            assert self.__state == State.Processing
        elif state == State.Cancelled:
            assert self.__state == State.Processing
            message = "Cancelled"
        elif state == State.Error:
            message = "Error during processing"
        elif state == State.NoState:
            message = ""
        else:
            assert False

        self.__state = state

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

        self.setStatusMessage(message)
        self.__updateInfo()

    def reload(self):
        """
        Restart the text scan task
        """
        if self.__state == State.Processing:
            self.cancel()

        self.corpus = None
        self.start()

    def start(self):
        """
        Start/execute the text indexing operation
        """
        self.error()
        self.Warning.clear()
        self.progress_widget.setValue(0)

        self.__invalidated = False
        if self.currentPath is None:
            return

        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            log.info("Starting a new task while one is in progress. "
                     "Cancel the existing task (dir:'{}')".format(
                         self.__pendingTask.startdir))
            self.cancel()

        startdir = self.currentPath

        self.__setRuntimeState(State.Processing)

        report_progress = methodinvoke(self, "__onReportProgress", (object, ))

        task = ImportDocuments(startdir, report_progress=report_progress)

        # collect the task state in one convenient place
        self.__pendingTask = taskstate = namespace(
            task=task,
            startdir=startdir,
            future=None,
            watcher=None,
            cancelled=False,
            cancel=None,
        )

        def cancel():
            # Cancel the task and disconnect
            if taskstate.future.cancel():
                pass
            else:
                taskstate.task.cancelled = True
                taskstate.cancelled = True
                try:
                    taskstate.future.result(timeout=0)
                except UserInterruptError:
                    pass
                except TimeoutError:
                    log.info("The task did not stop in in a timely manner")
            taskstate.watcher.finished.disconnect(self.__onRunFinished)

        taskstate.cancel = cancel

        def run_text_scan_task_interupt():
            try:
                return task.run()
            except UserInterruptError:
                # Suppress interrupt errors, so they are not logged
                return

        taskstate.future = self.__executor.submit(run_text_scan_task_interupt)
        taskstate.watcher = FutureWatcher(taskstate.future)
        taskstate.watcher.finished.connect(self.__onRunFinished)

    @Slot()
    def __onRunFinished(self):
        assert QThread.currentThread() is self.thread()
        assert self.__state == State.Processing
        assert self.__pendingTask is not None
        assert self.sender() is self.__pendingTask.watcher
        assert self.__pendingTask.future.done()
        task = self.__pendingTask
        self.__pendingTask = None

        try:
            corpus, errors = task.future.result()
        except Exception:
            sys.excepthook(*sys.exc_info())
            state = State.Error
            corpus = None
            errors = []
            self.error(traceback.format_exc())
        else:
            state = State.Done
            self.error()

        if corpus:
            self.n_text_data = len(corpus)
            self.n_text_categories = len(corpus.domain.class_var.values)\
                if corpus.domain.class_var else 0

        self.corpus = corpus
        self.n_skipped = len(errors)

        if len(errors):
            self.Warning.read_error(
                "Some files" if len(errors) > 1 else "One file")

        self.__setRuntimeState(state)
        self.commit()

    def cancel(self):
        """
        Cancel current pending task (if any).
        """
        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            self.__pendingTask.cancel()
            self.__pendingTask = None
            self.__setRuntimeState(State.Cancelled)

    @Slot(object)
    def __onReportProgress(self, arg):
        # report on scan progress from a worker thread
        # arg must be a namespace(count: int, lastpath: str)
        assert QThread.currentThread() is self.thread()
        if self.__state == State.Processing:
            self.pathlabel.setText(prettifypath(arg.lastpath))
            self.progress_widget.setValue(arg.progress)
            self.progress_widget.setValue(100 * arg.progress)

    def commit(self):
        """
        Create and commit a Corpus from the collected text meta data.
        """
        self.Outputs.data.send(self.corpus)

    def onDeleteWidget(self):
        self.cancel()
        self.__executor.shutdown(wait=True)
        self.__invalidated = False

    def eventFilter(self, receiver, event):
        # re-implemented from QWidget
        # intercept and process drag drop events on the recent directory
        # selection combo box
        def dirpath(event):
            # type: (QDropEvent) -> Optional[str]
            """Return the directory from a QDropEvent."""
            data = event.mimeData()
            urls = data.urls()
            if len(urls) == 1:
                url = urls[0]
                path = url.toLocalFile()
                if os.path.isdir(path):
                    return path
            return None

        if receiver is self.recent_cb and \
                event.type() in {QEvent.DragEnter, QEvent.DragMove,
                                 QEvent.Drop}:
            assert isinstance(event, QDropEvent)
            path = dirpath(event)
            if path is not None and event.possibleActions() & Qt.LinkAction:
                event.setDropAction(Qt.LinkAction)
                event.accept()
                if event.type() == QEvent.Drop:
                    self.setCurrentPath(path)
                    self.start()
            else:
                event.ignore()
            return True

        return super().eventFilter(receiver, event)
Пример #46
0
    def __init__(self):
        super().__init__()
        #: widget's runtime state
        self.__state = State.NoState
        self.corpus = None
        self.n_text_categories = 0
        self.n_text_data = 0
        self.n_skipped = 0

        self.__invalidated = False
        self.__pendingTask = None

        vbox = gui.vBox(self.controlArea)
        hbox = gui.hBox(vbox)
        self.recent_cb = QComboBox(
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
            minimumContentsLength=16,
            acceptDrops=True)
        self.recent_cb.installEventFilter(self)
        self.recent_cb.activated[int].connect(self.__onRecentActivated)

        browseaction = QAction(
            "Open/Load Documents",
            self,
            iconText="\N{HORIZONTAL ELLIPSIS}",
            icon=self.style().standardIcon(QStyle.SP_DirOpenIcon),
            toolTip="Select a folder from which to load the documents")
        browseaction.triggered.connect(self.__runOpenDialog)
        reloadaction = QAction("Reload",
                               self,
                               icon=self.style().standardIcon(
                                   QStyle.SP_BrowserReload),
                               toolTip="Reload current document set")
        reloadaction.triggered.connect(self.reload)
        self.__actions = namespace(
            browse=browseaction,
            reload=reloadaction,
        )

        browsebutton = QPushButton(browseaction.iconText(),
                                   icon=browseaction.icon(),
                                   toolTip=browseaction.toolTip(),
                                   clicked=browseaction.trigger)
        reloadbutton = QPushButton(
            reloadaction.iconText(),
            icon=reloadaction.icon(),
            clicked=reloadaction.trigger,
            default=True,
        )

        hbox.layout().addWidget(self.recent_cb)
        hbox.layout().addWidget(browsebutton)
        hbox.layout().addWidget(reloadbutton)

        self.addActions([browseaction, reloadaction])

        reloadaction.changed.connect(
            lambda: reloadbutton.setEnabled(reloadaction.isEnabled()))
        box = gui.vBox(vbox, "Info")
        self.infostack = QStackedWidget()

        self.info_area = QLabel(text="No document set selected", wordWrap=True)
        self.progress_widget = QProgressBar(minimum=0, maximum=100)
        self.cancel_button = QPushButton(
            "Cancel",
            icon=self.style().standardIcon(QStyle.SP_DialogCancelButton),
        )
        self.cancel_button.clicked.connect(self.cancel)

        w = QWidget()
        vlayout = QVBoxLayout()
        vlayout.setContentsMargins(0, 0, 0, 0)
        hlayout = QHBoxLayout()
        hlayout.setContentsMargins(0, 0, 0, 0)

        hlayout.addWidget(self.progress_widget)
        hlayout.addWidget(self.cancel_button)
        vlayout.addLayout(hlayout)

        self.pathlabel = TextLabel()
        self.pathlabel.setTextElideMode(Qt.ElideMiddle)
        self.pathlabel.setAttribute(Qt.WA_MacSmallSize)

        vlayout.addWidget(self.pathlabel)
        w.setLayout(vlayout)

        self.infostack.addWidget(self.info_area)
        self.infostack.addWidget(w)

        box.layout().addWidget(self.infostack)

        self.__initRecentItemsModel()
        self.__invalidated = True
        self.__executor = ThreadExecutor(self)

        QApplication.postEvent(self, QEvent(RuntimeEvent.Init))
class OWImportImages(widget.OWWidget):
    name = "Import Images"
    description = "Import images from a directory(s)"
    icon = "icons/ImportImages.svg"
    priority = 110

    outputs = [("Data", Orange.data.Table)]

    #: list of recent paths
    recent_paths = settings.Setting([])  # type: List[RecentPath]
    currentPath = settings.Setting(None)

    want_main_area = False
    resizing_enabled = False

    Modality = Qt.ApplicationModal
    # Modality = Qt.WindowModal

    MaxRecentItems = 20

    def __init__(self):
        super().__init__()
        #: widget's runtime state
        self.__state = State.NoState
        self._imageMeta = []
        self._imageCategories = {}

        self.__invalidated = False
        self.__pendingTask = None

        vbox = gui.vBox(self.controlArea)
        hbox = gui.hBox(vbox)
        self.recent_cb = QComboBox(
            sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon,
            minimumContentsLength=16,
        )
        self.recent_cb.activated[int].connect(self.__onRecentActivated)
        icons = standard_icons(self)

        browseaction = QAction(
            "Open/Load Images", self,
            iconText="\N{HORIZONTAL ELLIPSIS}",
            icon=icons.dir_open_icon,
            toolTip="Select a directory from which to load the images"
        )
        browseaction.triggered.connect(self.__runOpenDialog)
        reloadaction = QAction(
            "Reload", self,
            icon=icons.reload_icon,
            toolTip="Reload current image set"
        )
        reloadaction.triggered.connect(self.reload)
        self.__actions = namespace(
            browse=browseaction,
            reload=reloadaction,
        )

        browsebutton = QPushButton(
            browseaction.iconText(),
            icon=browseaction.icon(),
            toolTip=browseaction.toolTip(),
            clicked=browseaction.trigger
        )
        reloadbutton = QPushButton(
            reloadaction.iconText(),
            icon=reloadaction.icon(),
            clicked=reloadaction.trigger,
            default=True,
        )

        hbox.layout().addWidget(self.recent_cb)
        hbox.layout().addWidget(browsebutton)
        hbox.layout().addWidget(reloadbutton)

        self.addActions([browseaction, reloadaction])

        reloadaction.changed.connect(
            lambda: reloadbutton.setEnabled(reloadaction.isEnabled())
        )
        box = gui.vBox(vbox, "Info")
        self.infostack = QStackedWidget()

        self.info_area = QLabel(
            text="No image set selected",
            wordWrap=True
        )
        self.progress_widget = QProgressBar(
            minimum=0, maximum=0
        )
        self.cancel_button = QPushButton(
            "Cancel", icon=icons.cancel_icon,
        )
        self.cancel_button.clicked.connect(self.cancel)

        w = QWidget()
        vlayout = QVBoxLayout()
        vlayout.setContentsMargins(0, 0, 0, 0)
        hlayout = QHBoxLayout()
        hlayout.setContentsMargins(0, 0, 0, 0)

        hlayout.addWidget(self.progress_widget)
        hlayout.addWidget(self.cancel_button)
        vlayout.addLayout(hlayout)

        self.pathlabel = TextLabel()
        self.pathlabel.setTextElideMode(Qt.ElideMiddle)
        self.pathlabel.setAttribute(Qt.WA_MacSmallSize)

        vlayout.addWidget(self.pathlabel)
        w.setLayout(vlayout)

        self.infostack.addWidget(self.info_area)
        self.infostack.addWidget(w)

        box.layout().addWidget(self.infostack)

        self.__initRecentItemsModel()
        self.__invalidated = True
        self.__executor = ThreadExecutor(self)

        QApplication.postEvent(self, QEvent(RuntimeEvent.Init))

    def __initRecentItemsModel(self):
        if self.currentPath is not None and \
                not os.path.isdir(self.currentPath):
            self.currentPath = None

        recent_paths = []
        for item in self.recent_paths:
            if os.path.isdir(item.abspath):
                recent_paths.append(item)
        recent_paths = recent_paths[:OWImportImages.MaxRecentItems]
        recent_model = self.recent_cb.model()
        for pathitem in recent_paths:
            item = RecentPath_asqstandarditem(pathitem)
            recent_model.appendRow(item)

        self.recent_paths = recent_paths

        if self.currentPath is not None and \
                os.path.isdir(self.currentPath) and self.recent_paths and \
                os.path.samefile(self.currentPath, self.recent_paths[0].abspath):
            self.recent_cb.setCurrentIndex(0)
        else:
            self.currentPath = None
            self.recent_cb.setCurrentIndex(-1)
        self.__actions.reload.setEnabled(self.currentPath is not None)

    def customEvent(self, event):
        """Reimplemented."""
        if event.type() == RuntimeEvent.Init:
            if self.__invalidated:
                try:
                    self.start()
                finally:
                    self.__invalidated = False

        super().customEvent(event)

    def __runOpenDialog(self):
        startdir = os.path.expanduser("~/")
        if self.recent_paths:
            startdir = self.recent_paths[0].abspath

        if OWImportImages.Modality == Qt.WindowModal:
            dlg = QFileDialog(
                self, "Select Top Level Directory", startdir,
                acceptMode=QFileDialog.AcceptOpen,
                modal=True,
            )
            dlg.setFileMode(QFileDialog.Directory)
            dlg.setOption(QFileDialog.ShowDirsOnly)
            dlg.setDirectory(startdir)
            dlg.setAttribute(Qt.WA_DeleteOnClose)

            @dlg.accepted.connect
            def on_accepted():
                dirpath = dlg.selectedFiles()
                if dirpath:
                    self.setCurrentPath(dirpath[0])
                    self.start()
            dlg.open()
        else:
            dirpath = QFileDialog.getExistingDirectory(
                self, "Select Top Level Directory", startdir
            )
            if dirpath:
                self.setCurrentPath(dirpath)
                self.start()

    def __onRecentActivated(self, index):
        item = self.recent_cb.itemData(index)
        if item is None:
            return
        assert isinstance(item, RecentPath)
        self.setCurrentPath(item.abspath)
        self.start()

    def __updateInfo(self):
        if self.__state == State.NoState:
            text = "No image set selected"
        elif self.__state == State.Processing:
            text = "Processing"
        elif self.__state == State.Done:
            nvalid = sum(imeta.isvalid for imeta in self._imageMeta)
            ncategories = len(self._imageCategories)
            if ncategories < 2:
                text = "{} images".format(nvalid)
            else:
                text = "{} images / {} categories".format(nvalid, ncategories)
        elif self.__state == State.Cancelled:
            text = "Cancelled"
        elif self.__state == State.Error:
            text = "Error state"
        else:
            assert False

        self.info_area.setText(text)

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

    def setCurrentPath(self, path):
        """
        Set the current root image path to path

        If the path does not exists or is not a directory the current path
        is left unchanged

        Parameters
        ----------
        path : str
            New root import path.

        Returns
        -------
        status : bool
            True if the current root import path was successfully
            changed to path.
        """
        if self.currentPath is not None and path is not None and \
                os.path.isdir(self.currentPath) and os.path.isdir(path) and \
                os.path.samefile(self.currentPath, path):
            return True

        if not os.path.exists(path):
            warnings.warn("'{}' does not exist".format(path), UserWarning)
            return False
        elif not os.path.isdir(path):
            warnings.warn("'{}' is not a directory".format(path), UserWarning)
            return False

        newindex = self.addRecentPath(path)
        self.recent_cb.setCurrentIndex(newindex)
        if newindex >= 0:
            self.currentPath = path
        else:
            self.currentPath = None
        self.__actions.reload.setEnabled(self.currentPath is not None)

        if self.__state == State.Processing:
            self.cancel()

        return True

    def addRecentPath(self, path):
        """
        Prepend a path entry to the list of recent paths

        If an entry with the same path already exists in the recent path
        list it is moved to the first place

        Parameters
        ----------
        path : str
        """
        existing = None
        for pathitem in self.recent_paths:
            if os.path.samefile(pathitem.abspath, path):
                existing = pathitem
                break

        model = self.recent_cb.model()

        if existing is not None:
            selected_index = self.recent_paths.index(existing)
            assert model.item(selected_index).data(Qt.UserRole) is existing
            self.recent_paths.remove(existing)
            row = model.takeRow(selected_index)
            self.recent_paths.insert(0, existing)
            model.insertRow(0, row)
        else:
            item = RecentPath(path, None, None)
            self.recent_paths.insert(0, item)
            model.insertRow(0, RecentPath_asqstandarditem(item))
        return 0

    def __setRuntimeState(self, state):
        assert state in State
        self.setBlocking(state == State.Processing)
        message = ""
        if state == State.Processing:
            assert self.__state in [State.Done,
                                    State.NoState,
                                    State.Error,
                                    State.Cancelled]
            message = "Processing"
        elif state == State.Done:
            assert self.__state == State.Processing
        elif state == State.Cancelled:
            assert self.__state == State.Processing
            message = "Cancelled"
        elif state == State.Error:
            message = "Error during processing"
        elif state == State.NoState:
            message = ""
        else:
            assert False

        self.__state = state

        if self.__state == State.Processing:
            self.infostack.setCurrentIndex(1)
        else:
            self.infostack.setCurrentIndex(0)

        self.setStatusMessage(message)
        self.__updateInfo()

    def reload(self):
        """
        Restart the image scan task
        """
        if self.__state == State.Processing:
            self.cancel()

        self._imageMeta = []
        self._imageCategories = {}
        self.start()

    def start(self):
        """
        Start/execute the image indexing operation
        """
        self.error()

        self.__invalidated = False
        if self.currentPath is None:
            return

        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            log.info("Starting a new task while one is in progress. "
                     "Cancel the existing task (dir:'{}')"
                     .format(self.__pendingTask.startdir))
            self.cancel()

        startdir = self.currentPath

        self.__setRuntimeState(State.Processing)

        report_progress = methodinvoke(
            self, "__onReportProgress", (object,))

        task = ImageScan(startdir, report_progress=report_progress)

        # collect the task state in one convenient place
        self.__pendingTask = taskstate = namespace(
            task=task,
            startdir=startdir,
            future=None,
            watcher=None,
            cancelled=False,
            cancel=None,
        )

        def cancel():
            # Cancel the task and disconnect
            if taskstate.future.cancel():
                pass
            else:
                taskstate.task.cancelled = True
                taskstate.cancelled = True
                try:
                    taskstate.future.result(timeout=3)
                except UserInterruptError:
                    pass
                except TimeoutError:
                    log.info("The task did not stop in in a timely manner")
            taskstate.watcher.finished.disconnect(self.__onRunFinished)

        taskstate.cancel = cancel

        def run_image_scan_task_interupt():
            try:
                return task.run()
            except UserInterruptError:
                # Suppress interrupt errors, so they are not logged
                return

        taskstate.future = self.__executor.submit(run_image_scan_task_interupt)
        taskstate.watcher = FutureWatcher(taskstate.future)
        taskstate.watcher.finished.connect(self.__onRunFinished)

    @Slot()
    def __onRunFinished(self):
        assert QThread.currentThread() is self.thread()
        assert self.__state == State.Processing
        assert self.__pendingTask is not None
        assert self.sender() is self.__pendingTask.watcher
        assert self.__pendingTask.future.done()
        task = self.__pendingTask
        self.__pendingTask = None

        try:
            image_meta = task.future.result()
        except Exception as err:
            sys.excepthook(*sys.exc_info())
            state = State.Error
            image_meta = []
            self.error(traceback.format_exc())
        else:
            state = State.Done
            self.error()

        categories = {}

        for imeta in image_meta:
            # derive categories from the path relative to the starting dir
            dirname = os.path.dirname(imeta.path)
            relpath = os.path.relpath(dirname, task.startdir)
            categories[dirname] = relpath

        self._imageMeta = image_meta
        self._imageCategories = categories

        self.__setRuntimeState(state)
        self.commit()

    def cancel(self):
        """
        Cancel current pending task (if any).
        """
        if self.__state == State.Processing:
            assert self.__pendingTask is not None
            self.__pendingTask.cancel()
            self.__pendingTask = None
            self.__setRuntimeState(State.Cancelled)

    @Slot(object)
    def __onReportProgress(self, arg):
        # report on scan progress from a worker thread
        # arg must be a namespace(count: int, lastpath: str)
        assert QThread.currentThread() is self.thread()
        if self.__state == State.Processing:
            self.pathlabel.setText(prettyfypath(arg.lastpath))

    def commit(self):
        """
        Create and commit a Table from the collected image meta data.
        """
        if self._imageMeta:
            categories = self._imageCategories
            if len(categories) > 1:
                cat_var = Orange.data.DiscreteVariable(
                    "category", values=list(sorted(categories.values()))
                )
            else:
                cat_var = None
            # Image name (file basename without the extension)
            imagename_var = Orange.data.StringVariable("image name")
            # Full fs path
            image_var = Orange.data.StringVariable("image")
            image_var.attributes["type"] = "image"
            # file size/width/height
            size_var = Orange.data.ContinuousVariable(
                "size", number_of_decimals=0)
            width_var = Orange.data.ContinuousVariable(
                "width", number_of_decimals=0)
            height_var = Orange.data.ContinuousVariable(
                "height", number_of_decimals=0)
            domain = Orange.data.Domain(
                [], [cat_var] if cat_var is not None else [],
                [imagename_var, image_var, size_var, width_var, height_var]
            )
            cat_data = []
            meta_data = []

            for imgmeta in self._imageMeta:
                if imgmeta.isvalid:
                    if cat_var is not None:
                        category = categories.get(os.path.dirname(imgmeta.path))
                        cat_data.append([cat_var.to_val(category)])
                    else:
                        cat_data.append([])
                    basename = os.path.basename(imgmeta.path)
                    imgname, _ = os.path.splitext(basename)

                    meta_data.append(
                        [imgname, imgmeta.path, imgmeta.size,
                         imgmeta.width, imgmeta.height]
                    )

            cat_data = numpy.array(cat_data, dtype=float)
            meta_data = numpy.array(meta_data, dtype=object)
            table = Orange.data.Table.from_numpy(
                domain, numpy.empty((len(cat_data), 0), dtype=float),
                cat_data, meta_data
            )
        else:
            table = None

        self.send("Data", table)

    def onDeleteWidget(self):
        self.cancel()
        self.__executor.shutdown(wait=True)
Пример #48
0
class OWTestLearners(OWWidget):
    name = "Test and Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100
    keywords = []

    class Inputs:
        train_data = Input("Data", Table, default=True)
        test_data = Input("Test Data", Table)
        learner = Input("Learner", Learner, multiple=True)
        preprocessor = Input("Preprocessor", Preprocess)

    class Outputs:
        predictions = Output("Predictions", Table)
        evaluations_results = Output("Evaluation Results", Results)

    settings_version = 3
    UserAdviceMessages = [
        widget.Message("Click on the table header to select shown columns",
                       "click_header")
    ]

    settingsHandler = settings.PerfectDomainContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: Resampling/testing types
    KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \
        = 0, 1, 2, 3, 4, 5
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)
    # CV where nr. of feature values determines nr. of folds
    fold_feature = settings.ContextSetting(None)
    fold_feature_selected = settings.ContextSetting(False)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        train_data_empty = Msg("Train dataset is empty.")
        test_data_empty = Msg("Test dataset is empty.")
        class_required = Msg("Train data input requires a target variable.")
        too_many_classes = Msg("Too many target variables.")
        class_required_test = Msg(
            "Test data input requires a target variable.")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train datasets "
                                 "have different target variables.")
        memory_error = Msg("Not enough memory.")
        no_class_values = Msg("Target variable has no values.")
        only_one_class_var_value = Msg("Target variable has only one value.")
        test_data_incompatible = Msg(
            "Test data may be incompatible with train data.")

    class Warning(OWWidget.Warning):
        missing_data = \
            Msg("Instances with unknown target values were removed from{}data.")
        test_data_missing = Msg("Missing separate test data input.")
        scores_not_computed = Msg("Some scores could not be computed.")
        test_data_unused = Msg("Test data is present but unused. "
                               "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")
        test_data_transformed = Msg(
            "Test data has been transformed to match the train data.")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)
        self.score_table = ScoreTable(self)
        self.score_table.shownScoresChanged.connect(self.update_stats_model)

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.score_table.view)

    @staticmethod
    def sizeHint():
        return QSize(780, 1)

    def _update_controls(self):
        self.fold_feature = None
        self.feature_model.set_domain(None)
        if self.data:
            self.feature_model.set_domain(self.data.domain)
            if self.fold_feature is None and self.feature_model:
                self.fold_feature = self.feature_model[0]
        enabled = bool(self.feature_model)
        self.controls.resampling.buttons[
            OWTestLearners.FeatureFold].setEnabled(enabled)
        self.features_combo.setEnabled(enabled)
        if self.resampling == OWTestLearners.FeatureFold and not enabled:
            self.resampling = OWTestLearners.KFold

    @Inputs.learner
    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.

        Parameters
        ----------
        learner : Optional[Orange.base.Learner]
        key : Any
        """
        if key in self.learners and learner is None:
            # Removed
            self._invalidate([key])
            del self.learners[key]
        elif learner is not None:
            self.learners[key] = InputLearner(learner, None, None)
            self._invalidate([key])

    @Inputs.train_data
    def set_train_data(self, data):
        """
        Set the input training dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.data_sampled.clear()
        self.Error.train_data_empty.clear()
        self.Error.class_required.clear()
        self.Error.too_many_classes.clear()
        self.Error.no_class_values.clear()
        self.Error.only_one_class_var_value.clear()
        if data is not None and not data:
            self.Error.train_data_empty()
            data = None
        if data:
            conds = [
                not data.domain.class_vars,
                len(data.domain.class_vars) > 1,
                np.isnan(data.Y).all(), data.domain.has_discrete_class
                and len(data.domain.class_var.values) == 1
            ]
            errors = [
                self.Error.class_required, self.Error.too_many_classes,
                self.Error.no_class_values, self.Error.only_one_class_var_value
            ]
            for cond, error in zip(conds, errors):
                if cond:
                    error()
                    data = None
                    break

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        self._update_scorers()
        self._update_controls()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain)
            if self.fold_feature_selected and bool(self.feature_model):
                self.resampling = OWTestLearners.FeatureFold
        self._invalidate()

    @Inputs.test_data
    def set_test_data(self, data):
        # type: (Orange.data.Table) -> None
        """
        Set the input separate testing dataset.

        Parameters
        ----------
        data : Optional[Orange.data.Table]
        """
        self.Information.test_data_sampled.clear()
        self.Error.test_data_empty.clear()
        if data is not None and not data:
            self.Error.test_data_empty()
            data = None
        if data and not data.domain.class_var:
            self.Error.class_required_test()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = \
            data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = HasClass()(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {
            (True, True): " ",  # both, don't specify
            (True, False): " train ",
            (False, True): " test "
        }[(self.train_data_missing_vals, self.test_data_missing_vals)]

    # List of scorers shouldn't be retrieved globally, when the module is
    # loading since add-ons could have registered additional scorers.
    # It could have been cached but
    # - we don't gain much with it
    # - it complicates the unit tests
    def _update_scorers(self):
        if self.data is None or self.data.domain.class_var is None:
            self.scorers = []
            return
        self.scorers = usable_scorers(self.data.domain.class_var)

    @Inputs.preprocessor
    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.score_table.update_header(self.scorers)
        self.update_stats_model()
        if self.__needupdate:
            self.__update()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def fold_feature_changed(self):
        self.resampling = OWTestLearners.FeatureFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()
        self.__update()

    def update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.score_table.model
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            results = slot.results
            if results is not None and results.success:
                train = QStandardItem("{:.3f}".format(
                    results.value.train_time))
                train.setTextAlignment(Qt.AlignRight)
                train.setData(key, Qt.UserRole)
                test = QStandardItem("{:.3f}".format(results.value.test_time))
                test.setTextAlignment(Qt.AlignRight)
                test.setData(key, Qt.UserRole)
                row = [head, train, test]
            else:
                row = [head]
            if isinstance(results, Try.Fail):
                head.setToolTip(str(results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                if isinstance(results.exception, DomainTransformationError) \
                        and self.resampling == self.TestOnTest:
                    self.Error.test_data_incompatible()
                    self.Information.test_data_transformed.clear()
                else:
                    errors.append("{name} failed with error:\n"
                                  "{exc.__class__.__name__}: {exc!s}".format(
                                      name=name, exc=slot.results.exception))

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    # Cell variable is used immediatelly, it's not stored
                    # pylint: disable=cell-var-from-loop
                    stats = [
                        Try(scorer_caller(scorer, ovr_results, target=1))
                        for scorer in self.scorers
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat, scorer in zip(stats, self.scorers):
                    item = QStandardItem()
                    item.setTextAlignment(Qt.AlignRight)
                    if stat.success:
                        item.setData(float(stat.value[0]), Qt.DisplayRole)
                    else:
                        item.setToolTip(str(stat.exception))
                        if scorer.name in self.score_table.shown_scores:
                            has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        # Resort rows based on current sorting
        header = self.score_table.view.horizontalHeader()
        model.sort(header.sortIndicatorSection(), header.sortIndicatorOrder())

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self.update_stats_model()

    def _invalidate(self, which=None):
        self.fold_feature_selected = \
            self.resampling == OWTestLearners.FeatureFold
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.score_table.model
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.__needupdate = True

    def commit(self):
        """
        Commit the results to output.
        """
        self.Error.memory_error.clear()
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        combined = None
        predictions = None
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            try:
                predictions = combined.get_augmented_data(
                    combined.learner_names)
            except MemoryError:
                self.Error.memory_error()

        self.Outputs.evaluations_results.send(combined)
        self.Outputs.predictions.send(predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [("Sampling type", "{}{}-fold Cross validation".format(
                stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.NRepeats[self.n_repeats],
                     self.SampleSizes[self.sample_size]))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.score_table.view)

    @classmethod
    def migrate_settings(cls, settings_, version):
        if version < 2:
            if settings_["resampling"] > 0:
                settings_["resampling"] += 1
        if version < 3:
            # Older version used an incompatible context handler
            settings_["context_settings"] = [
                c for c in settings_.get("context_settings", ())
                if not hasattr(c, 'classes')
            ]

    @Slot(float)
    def setProgressValue(self, value):
        self.progressBarSet(value, processEvents=False)

    def __update(self):
        self.__needupdate = False

        assert self.__task is None or self.__state == State.Running
        if self.__state == State.Running:
            self.cancel()

        self.Warning.test_data_unused.clear()
        self.Error.test_data_incompatible.clear()
        self.Warning.test_data_missing.clear()
        self.Information.test_data_transformed(
            shown=self.resampling == self.TestOnTest and self.data is not None
            and self.test_data is not None and
            self.data.domain.attributes != self.test_data.domain.attributes)
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()

        # check preconditions and return early
        if self.data is None:
            self.__state = State.Waiting
            self.commit()
            return
        if not self.learners:
            self.__state = State.Waiting
            self.commit()
            return
        if self.resampling == OWTestLearners.KFold and \
                len(self.data) < self.NFolds[self.n_folds]:
            self.Error.too_many_folds()
            self.__state = State.Waiting
            self.commit()
            return

        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                if not self.Error.test_data_empty.is_shown():
                    self.Warning.test_data_missing()
                self.__state = State.Waiting
                self.commit()
                return
            elif self.test_data.domain.class_var != self.data.domain.class_var:
                self.Error.class_inconsistent()
                self.__state = State.Waiting
                self.commit()
                return

        elif self.test_data is not None:
            self.Warning.test_data_unused()

        rstate = 42
        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]

        # deepcopy all learners as they are not thread safe (by virtue of
        # the base API). These will be the effective learner objects tested
        # but will be replaced with the originals on return (see restore
        # learners bellow)
        learners_c = [copy.deepcopy(learner) for learner in learners]

        if self.resampling == OWTestLearners.TestOnTest:
            test_f = partial(
                Orange.evaluation.TestOnTestData(store_data=True,
                                                 store_models=True), self.data,
                self.test_data, learners_c, self.preprocessor)
        else:
            if self.resampling == OWTestLearners.KFold:
                sampler = Orange.evaluation.CrossValidation(
                    k=self.NFolds[self.n_folds], random_state=rstate)
            elif self.resampling == OWTestLearners.FeatureFold:
                sampler = Orange.evaluation.CrossValidationFeature(
                    feature=self.fold_feature)
            elif self.resampling == OWTestLearners.LeaveOneOut:
                sampler = Orange.evaluation.LeaveOneOut()
            elif self.resampling == OWTestLearners.ShuffleSplit:
                sampler = Orange.evaluation.ShuffleSplit(
                    n_resamples=self.NRepeats[self.n_repeats],
                    train_size=self.SampleSizes[self.sample_size] / 100,
                    test_size=None,
                    stratified=self.shuffle_stratified,
                    random_state=rstate)
            elif self.resampling == OWTestLearners.TestOnTrain:
                sampler = Orange.evaluation.TestOnTrainingData(
                    store_models=True)
            else:
                assert False, "self.resampling %s" % self.resampling

            sampler.store_data = True
            test_f = partial(sampler, self.data, learners_c, self.preprocessor)

        def replace_learners(evalfunc, *args, **kwargs):
            res = evalfunc(*args, **kwargs)
            assert all(lc is lo for lc, lo in zip(learners_c, res.learners))
            res.learners[:] = learners
            return res

        test_f = partial(replace_learners, test_f)

        self.__submit(test_f)

    def __submit(self, testfunc):
        # type: (Callable[[Callable[[float], None]], Results]) -> None
        """
        Submit a testing function for evaluation

        MUST not be called if an evaluation is already pending/running.
        Cancel the existing task first.

        Parameters
        ----------
        testfunc : Callable[[Callable[float]], Results])
            Must be a callable taking a single `callback` argument and
            returning a Results instance
        """
        assert self.__state != State.Running
        # Setup the task
        task = Task()

        def progress_callback(finished):
            if task.cancelled:
                raise UserInterrupt()
            QMetaObject.invokeMethod(self, "setProgressValue",
                                     Qt.QueuedConnection,
                                     Q_ARG(float, 100 * finished))

        def ondone(_):
            QMetaObject.invokeMethod(self, "__task_complete",
                                     Qt.QueuedConnection, Q_ARG(object, task))

        testfunc = partial(testfunc, callback=progress_callback)
        task.future = self.__executor.submit(testfunc)
        task.future.add_done_callback(ondone)

        self.progressBarInit(processEvents=None)
        self.setBlocking(True)
        self.setStatusMessage("Running")

        self.__state = State.Running
        self.__task = task

    @Slot(object)
    def __task_complete(self, task):
        # handle a completed task
        assert self.thread() is QThread.currentThread()
        if self.__task is not task:
            assert task.cancelled
            log.debug("Reaping cancelled task: %r", "<>")
            return

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)
        self.setStatusMessage("")
        result = task.future
        assert result.done()
        self.__task = None
        try:
            results = result.result()  # type: Results
            learners = results.learners  # type: List[Learner]
        except Exception as er:
            log.exception("testing error (in __task_complete):", exc_info=True)
            self.error("\n".join(traceback.format_exception_only(type(er),
                                                                 er)))
            self.__state = State.Done
            return

        self.__state = State.Done

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        assert all(learner in learner_key for learner in learners)

        # Update the results for individual learners
        class_var = results.domain.class_var
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_primitive():
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(self.scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [
                        Try(scorer_caller(scorer, result))
                        for scorer in self.scorers
                    ]
                    result = Try.Success(result)
            key = learner_key.get(learner)
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.score_table.update_header(self.scorers)
        self.update_stats_model()

        self.commit()

    def cancel(self):
        """
        Cancel the current/pending evaluation (if any).
        """
        if self.__task is not None:
            assert self.__state == State.Running
            self.__state = State.Cancelled
            task, self.__task = self.__task, None
            task.cancel()
            assert task.future.done()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()
Пример #49
0
    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea, self,
            "speciesIndex", "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(
            box, self, "geneattr", "Entity feature", sendSelectedValue=0,
            callback=self.updateAnnotations)

        cb = gui.checkBox(
            box, self, "genesinrows", "Use feature names",
            callback=self.updateAnnotations,
            disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

#         gui.button(box, self, "Gene matcher settings",
#                    callback=self.updateGeneMatcherSettings,
#                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self, "useReferenceData",
            ["All entities", "Reference set (input)"],
            tooltips=["Use entire genome (for gene set enrichment) or all " +
                      "available entities for reference",
                      "Use entities from Reference Examples input signal " +
                      "as reference"],
            box="Reference", callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QtGui.QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QtGui.QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget, self, "minClusterCount",
                 0, 100, label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            pvalfilterbox, self, "useMaxPValFilter", "p-value",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            fdrfilterbox, self, "useMaxFDRFilter", "FDR",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QtGui.QLineEdit(
            self, placeholderText="Filter ...")

        self.filterCompleter = QtGui.QCompleter(self.filterLineEdit)
        self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QtGui.QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QtGui.QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QtGui.QTreeView.NoEditTriggers,
        )
        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded(
            [("Taxonomy", "ncbi_taxonomy.tar.gz"),
             (geneset.sfdomain, "index.pck")]
        )

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(
            parent=self, threadPool=QtCore.QThreadPool(self))
        self._executor.submit(task)
Пример #50
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False
        self.scorers = []

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()  # type: Dict[Any, Input]

        self.__state = State.Waiting
        # Do we need to [re]test any learners, set by _invalidate and
        # cleared by __update
        self.__needupdate = False
        self.__task = None  # type: Optional[Task]
        self.__executor = ThreadExecutor()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_folds",
                     label="Number of folds: ",
                     items=[str(x) for x in self.NFolds],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Cross validation by feature")
        ibox = gui.indentedBox(rbox)
        self.feature_model = DomainModel(order=DomainModel.METAS,
                                         valid_types=DiscreteVariable)
        self.features_combo = gui.comboBox(ibox,
                                           self,
                                           "fold_feature",
                                           model=self.feature_model,
                                           orientation=Qt.Horizontal,
                                           callback=self.fold_feature_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(ibox,
                     self,
                     "n_repeats",
                     label="Repeat train/test: ",
                     items=[str(x) for x in self.NRepeats],
                     maximumContentsLength=3,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.comboBox(ibox,
                     self,
                     "sample_size",
                     label="Training set size: ",
                     items=["{} %".format(x) for x in self.SampleSizes],
                     maximumContentsLength=5,
                     orientation=Qt.Horizontal,
                     callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)
        self.score_table = ScoreTable(self)
        self.score_table.shownScoresChanged.connect(self.update_stats_model)

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.score_table.view)
Пример #51
0
    def __init__(self):
        super().__init__()

        # sets self.curvePoints, self.steps equidistant points from
        # 1/self.steps to 1
        self.updateCurvePoints()

        self.scoring = [
            ("Classification Accuracy", Orange.evaluation.scoring.CA),
            ("AUC", Orange.evaluation.scoring.AUC),
            ("Precision", Orange.evaluation.scoring.Precision),
            ("Recall", Orange.evaluation.scoring.Recall)
        ]
        #: input data on which to construct the learning curve
        self.data = None
        #: optional test data
        self.testdata = None
        #: A {input_id: Learner} mapping of current learners from input channel
        self.learners = OrderedDict()
        #: A {input_id: List[Results]} mapping of input id to evaluation
        #: results list, one for each curve point
        self.results = OrderedDict()
        #: A {input_id: List[float]} mapping of input id to learning curve
        #: point scores
        self.curves = OrderedDict()

        # [start-snippet-3]
        #: The current evaluating task (if any)
        self._task = None   # type: Optional[Task]
        #: An executor we use to submit learner evaluations into a thread pool
        self._executor = ThreadExecutor()
        # [end-snippet-3]

        # GUI
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoa = gui.widgetLabel(box, 'No data on input.')
        self.infob = gui.widgetLabel(box, 'No learners.')

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Evaluation Scores")
        gui.comboBox(box, self, "scoringF",
                     items=[x[0] for x in self.scoring],
                     callback=self._invalidate_curves)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Options")
        gui.spin(box, self, 'folds', 2, 100, step=1,
                 label='Cross validation folds:  ', keyboardTracking=False,
                 callback=lambda:
                    self._invalidate_results() if self.commitOnChange else None
        )
        gui.spin(box, self, 'steps', 2, 100, step=1,
                 label='Learning curve points:  ', keyboardTracking=False,
                 callback=[self.updateCurvePoints,
                           lambda: self._invalidate_results() if self.commitOnChange else None])
        gui.checkBox(box, self, 'commitOnChange', 'Apply setting on any change')
        self.commitBtn = gui.button(box, self, "Apply Setting",
                                    callback=self._invalidate_results,
                                    disabled=True)

        gui.rubber(self.controlArea)

        # table widget
        self.table = gui.table(self.mainArea,
                               selectionMode=QTableWidget.NoSelection)
Пример #52
0
    def __init__(self):
        super().__init__()

        # commit
        self.commit_button = None

        # progress bar
        self.progress_bar = None
        self.progress_bar_iterations = None

        # data
        self.input_data = None
        self.input_genes = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        # custom gene sets
        self.custom_data = None
        self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable))
        self.custom_gs_col_box = None
        self.gs_label_combobox = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.num_of_custom_sets = None

        # Gene Sets widget
        self.gs_widget = None

        # info box
        self.input_info = None
        self.num_of_sel_genes = 0

        # filter
        self.line_edit_filter = None
        self.search_pattern = ''
        self.organism_select_combobox = None

        # data model view
        self.data_view = None
        self.data_model = None

        # gene matcher NCBI
        self.gene_matcher = None

        # filter proxy model
        self.filter_proxy_model = None

        # hierarchy widget
        self.hierarchy_widget = None
        self.hierarchy_state = None

        # spinbox
        self.spin_widget = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # gui
        self.setup_gui()
class OWGOEnrichmentAnalysis(widget.OWWidget):
    name = "GO Browser"
    description = "Enrichment analysis for Gene Ontology terms."
    icon = "../widgets/icons/GOBrowser.svg"
    priority = 2020

    inputs = [("Cluster Data", Orange.data.Table,
               "setDataset", widget.Single + widget.Default),
              ("Reference Data", Orange.data.Table,
               "setReferenceDataset")]

    outputs = [("Data on Selected Genes", Orange.data.Table),
               ("Data on Unselected Genes", Orange.data.Table),
               ("Data on Unknown Genes", Orange.data.Table),
               ("Enrichment Report", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    annotationIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)
    geneMatcherSettings = settings.Setting([True, False, False, False])
    useReferenceDataset = settings.Setting(False)
    aspectIndex = settings.Setting(0)

    useEvidenceType = settings.Setting(
        {et: True for et in go.evidenceTypesOrdered})

    filterByNumOfInstances = settings.Setting(False)
    minNumOfInstances = settings.Setting(1)
    filterByPValue = settings.Setting(True)
    maxPValue = settings.Setting(0.2)
    filterByPValue_nofdr = settings.Setting(False)
    maxPValue_nofdr = settings.Setting(0.01)
    probFunc = settings.Setting(0)

    selectionDirectAnnotation = settings.Setting(0)
    selectionDisjoint = settings.Setting(0)
    selectionAddTermAsClass = settings.Setting(0)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.clusterDataset = None
        self.referenceDataset = None
        self.ontology = None
        self.annotations = None
        self.loadedAnnotationCode = "---"
        self.treeStructRootKey = None
        self.probFunctions = [stats.Binomial(), stats.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = OWGOEnrichmentAnalysis.Initializing

        self.annotationCodes = []

        #############
        ## GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        ## Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(box, self, "Ontology/Annotation Info",
                   callback=self.ShowInfo,
                   tooltip="Show information on loaded ontology and annotations")

        box = gui.widgetBox(self.inputTab, "Organism")
        self.annotationComboBox = gui.comboBox(
            box, self, "annotationIndex", items=self.annotationCodes,
            callback=self._updateEnrichment, tooltip="Select organism")

        genebox = gui.widgetBox(self.inputTab, "Gene Names")
        self.geneAttrIndexCombo = gui.comboBox(
            genebox, self, "geneAttrIndex", callback=self._updateEnrichment,
            tooltip="Use this attribute to extract gene names from input data")
        self.geneAttrIndexCombo.setDisabled(self.useAttrNames)

        cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names",
                          tooltip="Use column names for gene names",
                          callback=self._updateEnrichment)
        cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled)

        gui.button(genebox, self, "Gene matcher settings",
                   callback=self.UpdateGeneMatcher,
                   tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab, self, "useReferenceDataset",
            ["Entire genome", "Reference set (input)"],
            tooltips=["Use entire genome for reference",
                      "Use genes from Referece Examples input signal as reference"],
            box="Reference", callback=self._updateEnrichment)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab, self, "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect", callback=self._updateEnrichment)

        ## Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(box, self, "filterByNumOfInstances", "Genes",
                     callback=self.FilterAndDisplayGraph, 
                     tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox, self, 'minNumOfInstances', 1, 100,
                 step=1, label='#:', labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box, self, "filterByPValue_nofdr", "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        #use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box, self, "filterByPValue", "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=["Use binomial distribution test",
                                        "Use hypergeometric distribution test"],
                              callback=self._updateEnrichment)
        box = gui.widgetBox(self.filterTab, "Evidence codes in annotation",
                              addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(
                etype, toolTip=go.evidenceTypes[etype],
                checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        ## Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(
            self.selectTab, self, "selectionDirectAnnotation",
            ["Directly or Indirectly", "Directly"],
            box="Annotated genes",
            callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box, self, "selectionDisjoint",
            btnLabels=["All selected genes",
                       "Term-specific genes",
                       "Common term genes"],
            tooltips=["Outputs genes annotated to all selected GO terms",
                      "Outputs genes that appear in only one of selected GO terms", 
                      "Outputs genes common to all selected GO terms"],
            callback=[self.ExampleSelection,
                      self.UpdateAddClassButton])

        self.addClassCB = gui.checkBox(
            box, self, "selectionAddTermAsClass", "Add GO Term as class",
            callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value',
                           'FDR', 'Genes', 'Enrichment']

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        self.setBlocking(True)
        self._executor = ThreadExecutor()
        self._init = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             ("GO", "taxonomy.pickle")]
        )
        self._init.finished.connect(self.__initialize_finish)
        self._executor.submit(self._init)


    def sizeHint(self):
        return QSize(1000, 700)

    def __initialize_finish(self):
        self.setBlocking(False)

        try:
            self.annotationFiles = listAvailable()
        except ConnectTimeout:
            self.error(2, "Internet connection error, unable to load data. " + \
                          "Check connection and create a new GO Browser widget.")
            self.filterTab.setEnabled(False)
            self.inputTab.setEnabled(False)
            self.selectTab.setEnabled(False)
            self.listView.setEnabled(False)
            self.sigTerms.setEnabled(False)
        else:
            self.annotationCodes = sorted(self.annotationFiles.keys())
            self.annotationComboBox.clear()
            self.annotationComboBox.addItems(self.annotationCodes)
            self.annotationComboBox.setCurrentIndex(self.annotationIndex)
            self.__state = OWGOEnrichmentAnalysis.Ready

    def __on_evidenceChanged(self):
        for etype, cb in self.evidenceCheckBoxDict.items():
            self.useEvidenceType[etype] = cb.isChecked()
        self._updateEnrichment()

    def UpdateGeneMatcher(self):
        """Open the Gene matcher settings dialog."""
        dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, modal=True)
        if dialog.exec_() != QDialog.Rejected:
            self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items]
            if self.annotations:
                self.SetGeneMatcher()
                self._updateEnrichment()

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.warning(0)
        self.warning(1)
        self.geneAttrIndexCombo.clear()
        self.ClearGraph()

        self.send("Data on Selected Genes", None)
        self.send("Data on Unselected Genes", None)
        self.send("Data on Unknown Genes", None)
        self.send("Enrichment Report", None)

    def setDataset(self, data=None):
        if self.__state == OWGOEnrichmentAnalysis.Initializing:
            self.__initialize_finish()

        self.closeContext()
        self.clear()
        self.clusterDataset = data

        if data is not None:
            domain = data.domain
            allvars = domain.variables + domain.metas
            self.candidateGeneAttrs = [var for var in allvars if isstring(var)]

            self.geneAttrIndexCombo.clear()
            for var in self.candidateGeneAttrs:
                self.geneAttrIndexCombo.addItem(*gui.attributeItem(var))
            taxid = data_hints.get_hint(data, "taxid", "")
            code = None
            try:
                code = go.from_taxid(taxid)
            except KeyError:
                pass
            except Exception as ex:
                print(ex)

            if code is not None:
                filename = "gene_association.%s.tar.gz" % code
                if filename in self.annotationFiles.values():
                    self.annotationIndex = \
                            [i for i, name in enumerate(self.annotationCodes) \
                             if self.annotationFiles[name] == filename].pop()

            self.useAttrNames = data_hints.get_hint(data, "genesinrows",
                                                    self.useAttrNames)
            self.openContext(data)

            self.geneAttrIndex = min(self.geneAttrIndex,
                                     len(self.candidateGeneAttrs) - 1)
            if len(self.candidateGeneAttrs) == 0:
                self.useAttrNames = True
                self.geneAttrIndex = -1
            elif self.geneAttrIndex < len(self.candidateGeneAttrs):
                self.geneAttrIndex = len(self.candidateGeneAttrs) - 1

            self._updateEnrichment()

    def setReferenceDataset(self, data=None):
        self.referenceDataset = data
        self.referenceRadioBox.buttons[1].setDisabled(not bool(data))
        self.referenceRadioBox.buttons[1].setText("Reference set")
        if self.clusterDataset is not None and self.useReferenceDataset:
            self.useReferenceDataset = 0 if not data else 1
            graph = self.Enrichment()
            self.SetGraph(graph)
        elif self.clusterDataset:
            self.__updateReferenceSetButton()

    def handleNewSignals(self):
        super().handleNewSignals()

    def _updateEnrichment(self):
        if self.clusterDataset is not None and \
                self.__state == OWGOEnrichmentAnalysis.Ready:
            pb = gui.ProgressBar(self, 100)
            self.Load(pb=pb)
            graph = self.Enrichment(pb=pb)
            self.FilterUnknownGenes()
            self.SetGraph(graph)

    def __updateReferenceSetButton(self):
        allgenes, refgenes = None, None
        if self.referenceDataset:
            try:
                allgenes = self.genesFromTable(self.referenceDataset)
            except Exception:
                allgenes = []
            refgenes, unknown = self.FilterAnnotatedGenes(allgenes)
        self.referenceRadioBox.buttons[1].setDisabled(not bool(allgenes))
        self.referenceRadioBox.buttons[1].setText("Reference set " + ("(%i genes, %i matched)" % (len(allgenes), len(refgenes)) if allgenes and refgenes else ""))

    def genesFromTable(self, data):
        if self.useAttrNames:
            genes = [v.name for v in data.domain.variables]
        else:
            attr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs) - 1)]
            genes = [str(ex[attr]) for ex in data if not numpy.isnan(ex[attr])]
            if any("," in gene for gene in genes):
                self.information(0, "Separators detected in gene names. Assuming multiple genes per example.")
                genes = reduce(operator.iadd, (genes.split(",") for genes in genes), [])
        return genes

    def FilterAnnotatedGenes(self, genes):
        matchedgenes = self.annotations.get_gene_names_translator(genes).values()
        return matchedgenes, [gene for gene in genes if gene not in matchedgenes]

    def FilterUnknownGenes(self):
        if not self.useAttrNames and self.candidateGeneAttrs:
            geneAttr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs)-1)]
            indices = []
            for i, ex in enumerate(self.clusterDataset):
                if not any(self.annotations.genematcher.match(n.strip()) for n in str(ex[geneAttr]).split(",")):
                    indices.append(i)
            if indices:
                data = self.clusterDataset[indices]
            else:
                data = None
            self.send("Data on Unknown Genes", data)
        else:
            self.send("Data on Unknown Genes", None)

    def Load(self, pb=None):

        if self.__state == OWGOEnrichmentAnalysis.Ready:
            go_files, tax_files = serverfiles.listfiles("GO"), serverfiles.listfiles("Taxonomy")
            calls = []
            pb, finish = (gui.ProgressBar(self, 0), True) if pb is None else (pb, False)
            count = 0
            if not tax_files:
                calls.append(("Taxonomy", "ncbi_taxnomy.tar.gz"))
                count += 1
            org = self.annotationCodes[min(self.annotationIndex, len(self.annotationCodes)-1)]
            if org != self.loadedAnnotationCode:
                count += 1
                if self.annotationFiles[org] not in go_files:
                    calls.append(("GO", self.annotationFiles[org]))
                    count += 1

            if "gene_ontology_edit.obo.tar.gz" not in go_files:
                calls.append(("GO", "gene_ontology_edit.obo.tar.gz"))
                count += 1
            if not self.ontology:
                count += 1
            pb.iter += count * 100

            for args in calls:
                serverfiles.localpath_download(*args, **dict(callback=pb.advance))

            i = len(calls)
            if not self.ontology:
                self.ontology = go.Ontology(progress_callback=lambda value: pb.advance())
                i += 1

            if org != self.loadedAnnotationCode:
                self.annotations = None
                gc.collect()  # Force run garbage collection
                code = self.annotationFiles[org].split(".")[-3]
                self.annotations = go.Annotations(code, genematcher=gene.GMDirect(), progress_callback=lambda value: pb.advance())
                i += 1
                self.loadedAnnotationCode = org
                count = defaultdict(int)
                geneSets = defaultdict(set)

                for anno in self.annotations.annotations:
                    count[anno.evidence] += 1
                    geneSets[anno.evidence].add(anno.geneName)
                for etype in go.evidenceTypesOrdered:
                    ecb = self.evidenceCheckBoxDict[etype]
                    ecb.setEnabled(bool(count[etype]))
                    ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype])))
            if finish:
                pb.finish()

    def SetGeneMatcher(self):
        if self.annotations:
            taxid = self.annotations.taxid
            matchers = []
            for matcher, use in zip([gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy], self.geneMatcherSettings):
                if use:
                    try:
                        if taxid == "352472":
                            matchers.extend([matcher(taxid), gene.GMDicty(),
                                             [matcher(taxid), gene.GMDicty()]])
                            # The reason machers are duplicated is that we want `matcher` or `GMDicty` to
                            # match genes by them self if possible. Only use the joint matcher if they fail.   
                        else:
                            matchers.append(matcher(taxid))
                    except Exception as ex:
                        print(ex)
            self.annotations.genematcher = gene.matcher(matchers)
            self.annotations.genematcher.set_targets(self.annotations.gene_names)

    def Enrichment(self, pb=None):
        assert self.clusterDataset is not None

        pb = gui.ProgressBar(self, 100) if pb is None else pb
        if not self.annotations.ontology:
            self.annotations.ontology = self.ontology

        if isinstance(self.annotations.genematcher, gene.GMDirect):
            self.SetGeneMatcher()
        self.error(1)
        self.warning([0, 1])

        if self.useAttrNames:
            clusterGenes = [v.name for v in self.clusterDataset.domain.attributes]
            self.information(0)
        elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs):
            geneAttr = self.candidateGeneAttrs[self.geneAttrIndex]
            clusterGenes = [str(ex[geneAttr]) for ex in self.clusterDataset
                            if not numpy.isnan(ex[geneAttr])]
            if any("," in gene for gene in clusterGenes):
                self.information(0, "Separators detected in cluster gene names. Assuming multiple genes per example.")
                clusterGenes = reduce(operator.iadd, (genes.split(",") for genes in clusterGenes), [])
            else:
                self.information(0)
        else:
            self.error(1, "Failed to extract gene names from input dataset!")
            return {}

        genesSetCount = len(set(clusterGenes))

        self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator(clusterGenes).values()

        self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (genesSetCount, len(clusterGenes), 100.0*len(clusterGenes)/genesSetCount if genesSetCount else 0.0))

        referenceGenes = None
        if not self.useReferenceDataset or self.referenceDataset is None:
            self.information(2)
            self.information(1)
            referenceGenes = self.annotations.gene_names

        elif self.referenceDataset is not None:
            if self.useAttrNames:
                referenceGenes = [v.name for v in self.referenceDataset.domain.attributes]
                self.information(1)
            elif geneAttr in (self.referenceDataset.domain.variables +
                              self.referenceDataset.domain.metas):
                referenceGenes = [str(ex[geneAttr]) for ex in self.referenceDataset
                                  if not numpy.isnan(ex[geneAttr])]
                if any("," in gene for gene in clusterGenes):
                    self.information(1, "Separators detected in reference gene names. Assuming multiple genes per example.")
                    referenceGenes = reduce(operator.iadd, (genes.split(",") for genes in referenceGenes), [])
                else:
                    self.information(1)
            else:
                self.information(1)
                referenceGenes = None

            if referenceGenes is None:
                referenceGenes = list(self.annotations.gene_names)
                self.referenceRadioBox.buttons[1].setText("Reference set")
                self.referenceRadioBox.buttons[1].setDisabled(True)
                self.information(2, "Unable to extract gene names from reference dataset. Using entire genome for reference")
                self.useReferenceDataset = 0
            else:
                refc = len(referenceGenes)
                referenceGenes = self.annotations.get_gene_names_translator(referenceGenes).values()
                self.referenceRadioBox.buttons[1].setText("Reference set (%i genes, %i matched)" % (refc, len(referenceGenes)))
                self.referenceRadioBox.buttons[1].setDisabled(False)
                self.information(2)
        else:
            self.useReferenceDataset = 0

        if not referenceGenes:
            self.error(1, "No valid reference set")
            return {}

        self.referenceGenes = referenceGenes
        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                evidences.append(etype)
        aspect = ["P", "C", "F"][self.aspectIndex]

        if clusterGenes:
            self.terms = terms = self.annotations.get_enriched_terms(
                clusterGenes, referenceGenes, evidences, aspect=aspect,
                prob=self.probFunctions[self.probFunc], use_fdr=False,
                progress_callback=lambda value: pb.advance())
            ids = []
            pvals = []
            for i, d in self.terms.items():
                ids.append(i)
                pvals.append(d[1])
            for i, fdr in zip(ids, stats.FDR(pvals)):  # save FDR as the last part of the tuple
                terms[i] = tuple(list(terms[i]) + [ fdr ])

        else:
            self.terms = terms = {}
        if not self.terms:
            self.warning(0, "No enriched terms found.")
        else:
            self.warning(0)

        pb.finish()
        self.treeStructDict = {}
        ids = self.terms.keys()

        self.treeStructRootKey = None

        parents = {}
        for id in ids:
            parents[id] = set([term for _, term in self.ontology[id].related])

        children = {}
        for term in self.terms:
            children[term] = set([id for id in ids if term in parents[id]])

        for term in self.terms:
            self.treeStructDict[term] = TreeNode(self.terms[term], children[term])
            if not self.ontology[term].related and not getattr(self.ontology[term], "is_obsolete", False):
                self.treeStructRootKey = term
        return terms

    def FilterGraph(self, graph):
        if self.filterByPValue_nofdr:
            graph = go.filterByPValue(graph, self.maxPValue_nofdr)
        if self.filterByPValue: #FDR
            graph = dict(filter(lambda item: item[1][3] <= self.maxPValue, graph.items()))
        if self.filterByNumOfInstances:
            graph = dict(filter(lambda item: len(item[1][0]) >= self.minNumOfInstances, graph.items()))
        return graph

    def FilterAndDisplayGraph(self):
        if self.clusterDataset:
            self.graph = self.FilterGraph(self.originalGraph)
            if self.originalGraph and not self.graph:
                self.warning(1, "All found terms were filtered out.")
            else:
                self.warning(1)
            self.ClearGraph()
            self.DisplayGraph()

    def SetGraph(self, graph=None):
        self.originalGraph = graph
        if graph:
            self.FilterAndDisplayGraph()
        else:
            self.graph = {}
            self.ClearGraph()

    def ClearGraph(self):
        self.listView.clear()
        self.listViewItems=[]
        self.sigTerms.clear()

    def DisplayGraph(self):
        fromParentDict = {}
        self.termListViewItemDict = {}
        self.listViewItems = []
        enrichment = lambda t: len(t[0]) / t[2] * (len(self.referenceGenes) / len(self.clusterGenes))
        maxFoldEnrichment = max([enrichment(term) for term in self.graph.values()] or [1])

        def addNode(term, parent, parentDisplayNode):
            if (parent, term) in fromParentDict:
                return
            if term in self.graph:
                displayNode = GOTreeWidgetItem(self.ontology[term], self.graph[term], len(self.clusterGenes), len(self.referenceGenes), maxFoldEnrichment, parentDisplayNode)
                displayNode.goId = term
                self.listViewItems.append(displayNode)
                if term in self.termListViewItemDict:
                    self.termListViewItemDict[term].append(displayNode)
                else:
                    self.termListViewItemDict[term] = [displayNode]
                fromParentDict[(parent, term)] = True
                parent = term
            else:
                displayNode = parentDisplayNode

            for c in self.treeStructDict[term].children:
                addNode(c, parent, displayNode)

        if self.treeStructDict:
            addNode(self.treeStructRootKey, None, self.listView)

        terms = self.graph.items()
        terms = sorted(terms, key=lambda item: item[1][1])
        self.sigTableTermsSorted = [t[0] for t in terms]

        self.sigTerms.clear()
        for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms):
            item = GOTreeWidgetItem(self.ontology[t_id],
                                    (genes, p_value, refCount, fdr),
                                    len(self.clusterGenes),
                                    len(self.referenceGenes),
                                    maxFoldEnrichment,
                                    self.sigTerms)
            item.goId = t_id

        self.listView.expandAll()
        for i in range(5):
            self.listView.resizeColumnToContents(i)
            self.sigTerms.resizeColumnToContents(i)
        self.sigTerms.resizeColumnToContents(6)
        width = min(self.listView.columnWidth(0), 350)
        self.listView.setColumnWidth(0, width)
        self.sigTerms.setColumnWidth(0, width)

        # Create and send the enrichemnt report table.
        termsDomain = Orange.data.Domain(
            [], [],
            # All is meta!
            [Orange.data.StringVariable("GO Term Id"),
             Orange.data.StringVariable("GO Term Name"),
             Orange.data.ContinuousVariable("Cluster Frequency"),
             Orange.data.ContinuousVariable("Genes in Cluster", number_of_decimals=0),
             Orange.data.ContinuousVariable("Reference Frequency"),
             Orange.data.ContinuousVariable("Genes in Reference", number_of_decimals=0),
             Orange.data.ContinuousVariable("p-value"),
             Orange.data.ContinuousVariable("FDR"),
             Orange.data.ContinuousVariable("Enrichment"),
             Orange.data.StringVariable("Genes")])

        terms = [[t_id,
                  self.ontology[t_id].name,
                  len(genes) / len(self.clusterGenes),
                  len(genes),
                  r_count / len(self.referenceGenes),
                  r_count,
                  p_value,
                  fdr,
                  len(genes) / len(self.clusterGenes) * \
                  len(self.referenceGenes) / r_count,
                  ",".join(genes)
                  ]
                 for t_id, (genes, p_value, r_count, fdr) in terms]

        if terms:
            X = numpy.empty((len(terms), 0))
            M = numpy.array(terms, dtype=object)
            termsTable = Orange.data.Table.from_numpy(termsDomain, X, metas=M)
        else:
            termsTable = Orange.data.Table(termsDomain)
        self.send("Enrichment Report", termsTable)

    def ViewSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selected = self.listView.selectedItems()
        self.selectedTerms = list(set([lvi.term.id for lvi in selected]))
        self.ExampleSelection()
        self.selectionChanging = 0

    def TableSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selectedIds = set([self.sigTerms.itemFromIndex(index).goId for index in self.sigTerms.selectedIndexes()])

        for i in range(self.sigTerms.topLevelItemCount()):
            item = self.sigTerms.topLevelItem(i)
            selected = item.goId in selectedIds
            term = item.goId

            if selected:
                self.selectedTerms.append(term)

            for lvi in self.termListViewItemDict[term]:
                try:
                    lvi.setSelected(selected)
                    if selected:
                        lvi.setExpanded(True)
                except RuntimeError:  # Underlying C/C++ object deleted
                    pass

        self.ExampleSelection()
        self.selectionChanging = 0

    def UpdateAddClassButton(self):
        self.addClassCB.setEnabled(self.selectionDisjoint == 1)

    def ExampleSelection(self):
        self.commit()

    def commit(self):
        if self.clusterDataset is None:
            return

        terms = set(self.selectedTerms)
        genes = reduce(operator.ior,
                       (set(self.graph[term][0]) for term in terms), set())

        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
#             if getattr(self, "useEvidence" + etype):
                evidences.append(etype)
        allTerms = self.annotations.get_annotated_terms(
            genes, direct_annotation_only=self.selectionDirectAnnotation,
            evidence_codes=evidences)

        if self.selectionDisjoint > 0:
            count = defaultdict(int)
            for term in self.selectedTerms:
                for g in allTerms.get(term, []):
                    count[g] += 1
            ccount = 1 if self.selectionDisjoint == 1 else len(self.selectedTerms)
            selectedGenes = [gene for gene, c in count.items()
                             if c == ccount and gene in genes]
        else:
            selectedGenes = reduce(
                operator.ior,
                (set(allTerms.get(term, [])) for term in self.selectedTerms),
                set())

        if self.useAttrNames:
            vars = [self.clusterDataset.domain[gene]
                    for gene in set(selectedGenes)]
            domain = Orange.data.Domain(
                vars, self.clusterDataset.domain.class_vars,
                self.clusterDataset.domain.metas)
            newdata = self.clusterDataset.from_table(domain, self.clusterDataset)

            self.send("Data on Selected Genes", newdata)
            self.send("Data on Unselected Genes", None)
        elif self.candidateGeneAttrs:
            selectedExamples = []
            unselectedExamples = []

            geneAttr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs)-1)]

            if self.selectionDisjoint == 1:
                goVar = Orange.data.DiscreteVariable(
                    "GO Term", values=list(self.selectedTerms))
                newDomain = Orange.data.Domain(
                    self.clusterDataset.domain.variables, goVar,
                    self.clusterDataset.domain.metas)
                goColumn = []
            for i, ex in enumerate(self.clusterDataset):
                if not numpy.isnan(ex[geneAttr]) and any(gene in selectedGenes for gene in str(ex[geneAttr]).split(",")):
                    if self.selectionDisjoint == 1 and self.selectionAddTermAsClass:
                        terms = filter(lambda term: any(gene in self.graph[term][0] for gene in str(ex[geneAttr]).split(",")) , self.selectedTerms)
                        term = sorted(terms)[0]
                        goColumn.append(goVar.values.index(term))
                    selectedExamples.append(i)
                else:
                    unselectedExamples.append(i)

            if selectedExamples:
                selectedExamples = self.clusterDataset[selectedExamples]
                if self.selectionDisjoint == 1 and self.selectionAddTermAsClass:
                    selectedExamples = Orange.data.Table.from_table(newDomain, selectedExamples)
                    view, issparse = selectedExamples.get_column_view(goVar)
                    assert not issparse
                    view[:] = goColumn
            else:
                selectedExamples = None

            if unselectedExamples:
                unselectedExamples = self.clusterDataset[unselectedExamples]
            else:
                unselectedExamples = None

            self.send("Data on Selected Genes", selectedExamples)
            self.send("Data on Unselected Genes", unselectedExamples)

    def ShowInfo(self):
        dialog = QDialog(self)
        dialog.setModal(False)
        dialog.setLayout(QVBoxLayout())
        label = QLabel(dialog)
        label.setText("Ontology:\n" + self.ontology.header
                      if self.ontology else "Ontology not loaded!")
        dialog.layout().addWidget(label)

        label = QLabel(dialog)
        label.setText("Annotations:\n" + self.annotations.header.replace("!", "")
                      if self.annotations else "Annotations not loaded!")
        dialog.layout().addWidget(label)
        dialog.show()

    def onDeleteWidget(self):
        """Called before the widget is removed from the canvas.
        """
        self.annotations = None
        self.ontology = None
        gc.collect()  # Force collection
Пример #54
0
class OWGeneSets(OWWidget):
    name = "Gene Sets"
    description = ""
    icon = "icons/OWGeneSets.svg"
    priority = 9
    want_main_area = True

    COUNT, GENES, CATEGORY, TERM = range(4)
    DATA_HEADER_LABELS = ["Count", 'Genes In Set', 'Category', 'Term']

    organism = Setting(None, schema_only=True)
    stored_gene_sets_selection = Setting([], schema_only=True)
    selected_rows = Setting([], schema_only=True)
    custom_gene_set_indicator = Setting(None, schema_only=True)

    min_count = Setting(5)
    use_min_count = Setting(True)
    auto_commit = Setting(True)

    class Inputs:
        genes = Input("Data", Table)
        custom_sets = Input('Custom Gene Sets', Table)

    class Outputs:
        matched_genes = Output("Matched Genes", Table)

    class Information(OWWidget.Information):
        pass

    class Warning(OWWidget.Warning):
        all_sets_filtered = Msg('All sets were filtered out.')

    class Error(OWWidget.Error):
        organism_mismatch = Msg('Organism in input data and custom gene sets does not match')
        missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION)
        missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID)
        missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID)
        cant_reach_host = Msg("Host orange.biolab.si is unreachable.")
        cant_load_organisms = Msg("No available organisms, please check your connection.")

    def __init__(self):
        super().__init__()

        # commit
        self.commit_button = None

        # progress bar
        self.progress_bar = None
        self.progress_bar_iterations = None

        # data
        self.input_data = None
        self.input_genes = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        # custom gene sets
        self.custom_data = None
        self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable))
        self.custom_gs_col_box = None
        self.gs_label_combobox = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.num_of_custom_sets = None

        # Gene Sets widget
        self.gs_widget = None

        # info box
        self.input_info = None
        self.num_of_sel_genes = 0

        # filter
        self.line_edit_filter = None
        self.search_pattern = ''
        self.organism_select_combobox = None

        # data model view
        self.data_view = None
        self.data_model = None

        # gene matcher NCBI
        self.gene_matcher = None

        # filter proxy model
        self.filter_proxy_model = None

        # hierarchy widget
        self.hierarchy_widget = None
        self.hierarchy_state = None

        # spinbox
        self.spin_widget = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # gui
        self.setup_gui()

    def __reset_widget_state(self):
        self.update_info_box()
        # clear data view
        self.init_item_model()
        # reset filters
        self.setup_filter_model()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._init_gene_sets_finished)
            self._task = None

    @Slot()
    def progress_advance(self):
        # GUI should be updated in main thread. That's why we are calling advance method here
        if self.progress_bar:
            self.progress_bar.advance()

    def __get_input_genes(self):
        self.input_genes = []

        if self.use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.input_genes.append(str(variable.attributes.get(self.gene_id_attribute, '?')))
        else:
            genes, _ = self.input_data.get_column_view(self.gene_id_column)
            self.input_genes = [str(g) for g in genes]

    def handle_custom_gene_sets(self, select_customs_flag=False):
        if self.custom_gene_set_indicator:
            if self.custom_data is not None and self.custom_gene_id_column is not None:

                if self.__check_organism_mismatch():
                    # self.gs_label_combobox.setDisabled(True)
                    self.Error.organism_mismatch()
                    self.gs_widget.update_gs_hierarchy()
                    return

                if isinstance(self.custom_gene_set_indicator, DiscreteVariable):
                    labels = self.custom_gene_set_indicator.values
                    gene_sets_names = [
                        labels[int(idx)] for idx in self.custom_data.get_column_view(self.custom_gene_set_indicator)[0]
                    ]
                else:
                    gene_sets_names, _ = self.custom_data.get_column_view(self.custom_gene_set_indicator)

                self.num_of_custom_sets = len(set(gene_sets_names))
                gene_names, _ = self.custom_data.get_column_view(self.custom_gene_id_column)
                hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets',)
                try:
                    self.gs_widget.add_custom_sets(
                        gene_sets_names,
                        gene_names,
                        hierarchy_title=hierarchy_title,
                        select_customs_flag=select_customs_flag,
                    )
                except geneset.GeneSetException:
                    pass
                # self.gs_label_combobox.setDisabled(False)
            else:
                self.gs_widget.update_gs_hierarchy()

        self.update_info_box()

    def update_tree_view(self):
        self.init_gene_sets()

    def invalidate(self):
        # clear
        self.__reset_widget_state()
        self.update_info_box()

        if self.input_data is not None:
            # setup
            self.__get_input_genes()
            self.update_tree_view()

    def __check_organism_mismatch(self):
        """ Check if organisms from different inputs match.

        :return: True if there is a mismatch
        """
        if self.tax_id is not None and self.custom_tax_id is not None:
            return self.tax_id != self.custom_tax_id
        return False

    def __get_reference_genes(self):
        self.reference_genes = []

        if self.reference_attr_names:
            for variable in self.reference_data.domain.attributes:
                self.reference_genes.append(str(variable.attributes.get(self.reference_gene_id_attribute, '?')))
        else:
            genes, _ = self.reference_data.get_column_view(self.reference_gene_id_column)
            self.reference_genes = [str(g) for g in genes]

    @Inputs.custom_sets
    def handle_custom_input(self, data):
        self.Error.clear()
        self.__reset_widget_state()
        self.custom_data = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.feature_model.set_domain(None)

        if data:
            self.custom_data = data
            self.feature_model.set_domain(self.custom_data.domain)
            self.custom_tax_id = str(self.custom_data.attributes.get(TAX_ID, None))
            self.custom_use_attr_names = self.custom_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
            self.custom_gene_id_attribute = self.custom_data.attributes.get(GENE_ID_ATTRIBUTE, None)
            self.custom_gene_id_column = self.custom_data.attributes.get(GENE_ID_COLUMN, None)

            if self.gs_label_combobox is None:
                self.gs_label_combobox = comboBox(
                    self.custom_gs_col_box,
                    self,
                    "custom_gene_set_indicator",
                    sendSelectedValue=True,
                    model=self.feature_model,
                    callback=self.on_gene_set_indicator_changed,
                )
            self.custom_gs_col_box.show()

            if self.custom_gene_set_indicator in self.feature_model:
                index = self.feature_model.indexOf(self.custom_gene_set_indicator)
                self.custom_gene_set_indicator = self.feature_model[index]
            else:
                self.custom_gene_set_indicator = self.feature_model[0]
        else:
            self.custom_gs_col_box.hide()

        self.gs_widget.clear_custom_sets()
        self.handle_custom_gene_sets(select_customs_flag=self.custom_gene_set_indicator is not None)
        self.invalidate()

    @Inputs.genes
    def handle_genes_input(self, data):
        self.Error.clear()
        self.__reset_widget_state()
        # clear output
        self.Outputs.matched_genes.send(None)
        # clear input values
        self.input_genes = []
        self.input_data = None
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gs_widget.clear()
        self.gs_widget.clear_gene_sets()
        self.update_info_box()

        if data:
            self.input_data = data
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, None))
            self.use_attr_names = self.input_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.input_data.attributes.get(GENE_ID_ATTRIBUTE, None)
            self.gene_id_column = self.input_data.attributes.get(GENE_ID_COLUMN, None)
            self.update_info_box()

            if not (
                self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))
            ):

                if self.tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.tax_id is None:
                self.Error.missing_tax_id()
                return

            if self.__check_organism_mismatch():
                self.Error.organism_mismatch()
                return

            self.gs_widget.load_gene_sets(self.tax_id)

            # if input data change, we need to refresh custom sets
            if self.custom_data:
                self.gs_widget.clear_custom_sets()
                self.handle_custom_gene_sets()

            self.invalidate()

    def update_info_box(self):
        info_string = ''
        if self.input_genes:
            info_string += '{} unique gene names on input.\n'.format(len(self.input_genes))
            info_string += '{} genes on output.\n'.format(self.num_of_sel_genes)
        else:
            if self.input_data:
                if not any([self.gene_id_column, self.gene_id_attribute]):
                    info_string += 'Input data with incorrect meta data.\nUse Gene Name Matcher widget.'
            else:
                info_string += 'No data on input.\n'

        if self.custom_data:
            info_string += '{} marker genes in {} sets\n'.format(self.custom_data.X.shape[0], self.num_of_custom_sets)

        self.input_info.setText(info_string)

    def create_partial(self):
        return partial(
            self.set_items,
            self.gs_widget.gs_object,
            self.stored_gene_sets_selection,
            set(self.input_genes),
            self.callback,
        )

    def callback(self):
        if self._task.cancelled:
            raise KeyboardInterrupt()
        if self.progress_bar:
            methodinvoke(self, "progress_advance")()

    def init_gene_sets(self):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self._task = Task()
        self.init_item_model()

        # save setting on selected hierarchies
        self.stored_gene_sets_selection = self.gs_widget.get_hierarchies(only_selected=True)

        f = self.create_partial()

        progress_iterations = sum(
            (
                len(g_set)
                for hier, g_set in self.gs_widget.gs_object.map_hierarchy_to_sets().items()
                if hier in self.stored_gene_sets_selection
            )
        )

        self.progress_bar = ProgressBar(self, iterations=progress_iterations)

        self._task.future = self._executor.submit(f)

        self._task.watcher = FutureWatcher(self._task.future)
        self._task.watcher.done.connect(self._init_gene_sets_finished)

    @Slot(concurrent.futures.Future)
    def _init_gene_sets_finished(self, f):
        assert self.thread() is QThread.currentThread()
        assert threading.current_thread() == threading.main_thread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progress_bar.finish()
        self.setStatusMessage('')

        try:
            results = f.result()  # type: list
            [self.data_model.appendRow(model_item) for model_item in results]
            self.filter_proxy_model.setSourceModel(self.data_model)
            self.data_view.selectionModel().selectionChanged.connect(self.commit)
            self.filter_data_view()
            self.set_selection()
            self.update_info_box()
        except Exception as ex:
            print(ex)

    def create_filters(self):
        search_term = self.search_pattern.lower().strip().split()

        filters = [
            FilterProxyModel.Filter(
                self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term)
            )
        ]

        if self.use_min_count:
            filters.append(FilterProxyModel.Filter(self.COUNT, Qt.DisplayRole, lambda value: value >= self.min_count))

        return filters

    def filter_data_view(self):
        filter_proxy = self.filter_proxy_model  # type: FilterProxyModel
        model = filter_proxy.sourceModel()  # type: QStandardItemModel

        if isinstance(model, QStandardItemModel):

            # apply filtering rules
            filter_proxy.set_filters(self.create_filters())

            if model.rowCount() and not filter_proxy.rowCount():
                self.Warning.all_sets_filtered()
            else:
                self.Warning.clear()

    def set_selection(self):
        if len(self.selected_rows):
            view = self.data_view
            model = self.data_model

            row_model_indexes = [model.indexFromItem(model.item(i)) for i in self.selected_rows]
            proxy_rows = [self.filter_proxy_model.mapFromSource(i).row() for i in row_model_indexes]

            if model.rowCount() <= self.selected_rows[-1]:
                return

            header_count = view.header().count() - 1
            selection = QItemSelection()

            for row_index in proxy_rows:
                selection.append(
                    QItemSelectionRange(
                        self.filter_proxy_model.index(row_index, 0),
                        self.filter_proxy_model.index(row_index, header_count),
                    )
                )

            view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect)

    def commit(self):
        selection_model = self.data_view.selectionModel()

        if selection_model:
            selection = selection_model.selectedRows(self.COUNT)
            self.selected_rows = [self.filter_proxy_model.mapToSource(sel).row() for sel in selection]

            if selection and self.input_genes:
                genes = [model_index.data(Qt.UserRole) for model_index in selection]
                output_genes = [gene_name for gene_name in list(set.union(*genes))]
                self.num_of_sel_genes = len(output_genes)
                self.update_info_box()

                if self.use_attr_names:
                    selected = [
                        column
                        for column in self.input_data.domain.attributes
                        if self.gene_id_attribute in column.attributes
                        and str(column.attributes[self.gene_id_attribute]) in output_genes
                    ]

                    domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas)
                    new_data = self.input_data.from_table(domain, self.input_data)
                    self.Outputs.matched_genes.send(new_data)

                else:
                    # create filter from selected column for genes
                    only_known = table_filter.FilterStringList(self.gene_id_column, output_genes)
                    # apply filter to the data
                    data_table = table_filter.Values([only_known])(self.input_data)

                    self.Outputs.matched_genes.send(data_table)

    def assign_delegates(self):
        self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self))

        self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self))

    def setup_filter_model(self):
        self.filter_proxy_model = FilterProxyModel()
        self.filter_proxy_model.setFilterKeyColumn(self.TERM)
        self.data_view.setModel(self.filter_proxy_model)

    def setup_filter_area(self):
        h_layout = QHBoxLayout()
        h_layout.setSpacing(100)
        h_widget = widgetBox(self.mainArea, orientation=h_layout)

        spin(
            h_widget,
            self,
            'min_count',
            0,
            1000,
            label='Count',
            tooltip='Minimum genes count',
            checked='use_min_count',
            callback=self.filter_data_view,
            callbackOnReturn=True,
            checkCallback=self.filter_data_view,
        )

        self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern')
        self.line_edit_filter.setPlaceholderText('Filter gene sets ...')
        self.line_edit_filter.textChanged.connect(self.filter_data_view)

    def on_gene_set_indicator_changed(self):
        # self._handle_future_model()
        self.gs_widget.clear_custom_sets()
        self.handle_custom_gene_sets()
        self.invalidate()

    def setup_control_area(self):
        # Control area
        self.input_info = widgetLabel(widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n')
        self.custom_gs_col_box = box = vBox(self.controlArea, 'Custom Gene Set Term Column')
        box.hide()

        gene_sets_box = widgetBox(self.controlArea, "Gene Sets")
        self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection')
        self.gs_widget.hierarchy_tree_widget.itemClicked.connect(self.update_tree_view)

        self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False)

    def setup_gui(self):
        # control area
        self.setup_control_area()

        # main area
        self.data_view = QTreeView()
        self.setup_filter_model()
        self.setup_filter_area()
        self.data_view.setAlternatingRowColors(True)
        self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder)
        self.data_view.setSortingEnabled(True)
        self.data_view.setSelectionMode(QTreeView.ExtendedSelection)
        self.data_view.setEditTriggers(QTreeView.NoEditTriggers)
        self.data_view.viewport().setMouseTracking(False)
        self.data_view.setItemDelegateForColumn(self.TERM, LinkStyledItemDelegate(self.data_view))

        self.mainArea.layout().addWidget(self.data_view)

        self.data_view.header().setSectionResizeMode(QHeaderView.ResizeToContents)
        self.assign_delegates()

    @staticmethod
    def set_items(gene_sets, sets_to_display, genes, callback):
        model_items = []
        if not genes:
            return

        for gene_set in sorted(gene_sets):
            if gene_set.hierarchy not in sets_to_display:
                continue

            callback()

            matched_set = gene_set.genes & genes
            if len(matched_set) > 0:
                category_column = QStandardItem()
                term_column = QStandardItem()
                count_column = QStandardItem()
                genes_column = QStandardItem()

                category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole)
                term_column.setData(gene_set.name, Qt.DisplayRole)
                term_column.setData(gene_set.name, Qt.ToolTipRole)
                term_column.setData(gene_set.link, LinkRole)
                term_column.setForeground(QColor(Qt.blue))

                count_column.setData(matched_set, Qt.UserRole)
                count_column.setData(len(matched_set), Qt.DisplayRole)

                genes_column.setData(len(gene_set.genes), Qt.DisplayRole)
                genes_column.setData(
                    set(gene_set.genes), Qt.UserRole
                )  # store genes to get then on output on selection

                model_items.append([count_column, genes_column, category_column, term_column])

        return model_items

    def init_item_model(self):
        if self.data_model:
            self.data_model.clear()
            self.setup_filter_model()
        else:
            self.data_model = QStandardItemModel()

        self.data_model.setSortRole(Qt.UserRole)
        self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS)

    def sizeHint(self):
        return QSize(1280, 960)
Пример #55
0
    def __init__(self, parent=None, ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n"
        )

        self.organisms = None
        self.organismBox = gui.widgetBox(
            self.controlArea, "Organism", addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox, self, "organism_index",
            callback=self._onSelectedOrganismChanged)

        # For now only support one alt source, with a checkbox
        # In the future this can be extended to multiple selections
        self.altSourceCheck = gui.checkBox(
            self.organismBox, self, "useAltSource",
            "Show information from dictyBase",
            callback=self.onAltSourceChange)

        self.altSourceCheck.hide()

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(
            box, self, "gene_attr",
            "Gene atttibute", callback=self.updateInfoItems
        )
        self.geneAttrComboBox.setEnabled(not self.useAttr)
        cb = gui.checkBox(box, self, "useAttr", "Use attribute names",
                          callback=self.updateInfoItems)
        cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        # A label for dictyExpress link (Why oh god why???)
        self.dictyExpressBox = gui.widgetBox(
            self.controlArea, "Dicty Express")
        self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "")
        self.linkLabel.setOpenExternalLinks(False)
        self.linkLabel.linkActivated.connect(self.onDictyExpressLink)

        self.dictyExpressBox.hide()

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea, self, "search_string", "Filter",
                     callbackOnType=True, callback=self.searchUpdate)

        self.treeWidget = QTreeView(
            self.mainArea,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            uniformRowHeights=True,
            sortingEnabled=True)

        self.treeWidget.setItemDelegate(
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box, self, "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(
                taxonomy.ensure_downloaded,
                callback=methodinvoke(self, "advance", ())
            )
        )

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)
class OWExplainPredictions(OWWidget):

    name = "Explain Predictions"
    description = "Computes attribute contributions to the final prediction with an approximation algorithm for shapely value"
    icon = "icons/ExplainPredictions.svg"
    priority = 200
    gui_error = settings.Setting(0.05)
    gui_p_val = settings.Setting(0.05)
    gui_num_atr = settings.Setting(20)
    sort_index = settings.Setting(SortBy.ABSOLUTE)

    class Inputs:
        data = Input("Data", Table, default=True)
        model = Input("Model", Model, multiple=False)
        sample = Input("Sample", Table)

    class Outputs:
        explanations = Output("Explanations", Table)

    class Error(OWWidget.Error):
        sample_too_big = widget.Msg("Can only explain one sample at the time.")

    class Warning(OWWidget.Warning):
        unknowns_increased = widget.Msg(
            "Number of unknown values increased, Data and Sample domains mismatch.")

    def __init__(self):
        super().__init__()
        self.data = None
        self.model = None
        self.to_explain = None
        self.explanations = None
        self.stop = True
        self.e = None

        self._task = None
        self._executor = ThreadExecutor()

        info_box = gui.vBox(self.controlArea, "Info")
        self.data_info = gui.widgetLabel(info_box, "Data: N/A")
        self.model_info = gui.widgetLabel(info_box, "Model: N/A")
        self.sample_info = gui.widgetLabel(info_box, "Sample: N/A")

        criteria_box = gui.vBox(self.controlArea, "Stopping criteria")
        self.error_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_error",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error < ",
                                   spinType=float,
                                   callback=self._update_error_spin,
                                   controlWidth=80,
                                   keyboardTracking=False)

        self.p_val_spin = gui.spin(criteria_box,
                                   self,
                                   "gui_p_val",
                                   0.01,
                                   1,
                                   step=0.01,
                                   label="Error p-value < ",
                                   spinType=float,
                                   callback=self._update_p_val_spin,
                                   controlWidth=80, keyboardTracking=False)

        plot_properties_box = gui.vBox(self.controlArea, "Display features")
        self.num_atr_spin = gui.spin(plot_properties_box,
                                     self,
                                     "gui_num_atr",
                                     1,
                                     100,
                                     step=1,
                                     label="Show attributes",
                                     callback=self._update_num_atr_spin,
                                     controlWidth=80,
                                     keyboardTracking=False)

        self.sort_combo = gui.comboBox(plot_properties_box,
                                       self,
                                       "sort_index",
                                       label="Rank by",
                                       items=SortBy.items(),
                                       orientation=Qt.Horizontal,
                                       callback=self._update_combo)

        gui.rubber(self.controlArea)

        self.cancel_button = gui.button(self.controlArea,
                                        self,
                                        "Stop Computation",
                                        callback=self.toggle_button,
                                        autoDefault=True,
                                        tooltip="Stops and restarts computation")
        self.cancel_button.setDisabled(True)

        predictions_box = gui.vBox(self.mainArea, "Model prediction")
        self.predict_info = gui.widgetLabel(predictions_box, "")

        self.mainArea.setMinimumWidth(700)
        self.resize(700, 400)

        class _GraphicsView(QGraphicsView):
            def __init__(self, scene, parent, **kwargs):
                for k, v in dict(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
                                 viewportUpdateMode=QGraphicsView.BoundingRectViewportUpdate,
                                 renderHints=(QPainter.Antialiasing |
                                              QPainter.TextAntialiasing |
                                              QPainter.SmoothPixmapTransform),
                                 alignment=(Qt.AlignTop |
                                            Qt.AlignLeft),
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.MinimumExpanding)).items():
                    kwargs.setdefault(k, v)
                super().__init__(scene, parent, **kwargs)

        class GraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
                                 styleSheet='QGraphicsView {background: white}')
                self.viewport().setMinimumWidth(500)
                self._is_resizing = False

            w = self

            def resizeEvent(self, resizeEvent):
                self._is_resizing = True
                self.w.draw()
                self._is_resizing = False
                return super().resizeEvent(resizeEvent)

            def is_resizing(self):
                return self._is_resizing

            def sizeHint(self):
                return QSize(600, 300)

        class FixedSizeGraphicsView(_GraphicsView):
            def __init__(self, scene, parent):
                super().__init__(scene, parent,
                                 sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
                                                        QSizePolicy.Minimum))

            def sizeHint(self):
                return QSize(600, 30)

        """all will share the same scene, but will show different parts of it"""
        self.box_scene = QGraphicsScene(self)

        self.box_view = GraphicsView(self.box_scene, self)
        self.header_view = FixedSizeGraphicsView(self.box_scene, self)
        self.footer_view = FixedSizeGraphicsView(self.box_scene, self)

        self.mainArea.layout().addWidget(self.header_view)
        self.mainArea.layout().addWidget(self.box_view)
        self.mainArea.layout().addWidget(self.footer_view)

        self.painter = None

    def draw(self):
        """Uses GraphAttributes class to draw the explanaitons """
        self.box_scene.clear()
        wp = self.box_view.viewport().rect()
        header_height = 30
        if self.explanations is not None:
            self.painter = GraphAttributes(self.box_scene, min(
                self.gui_num_atr, self.explanations.Y.shape[0]))
            self.painter.paint(wp, self.explanations, header_h=header_height)

        """set appropriate boxes for different views"""
        rect = QRectF(self.box_scene.itemsBoundingRect().x(),
                      self.box_scene.itemsBoundingRect().y(),
                      self.box_scene.itemsBoundingRect().width(),
                      self.box_scene.itemsBoundingRect().height())

        self.box_scene.setSceneRect(rect)
        self.box_view.setSceneRect(
            rect.x(), rect.y()+header_height+2, rect.width(), rect.height() - 80)
        self.header_view.setSceneRect(
            rect.x(), rect.y(), rect.width(), 10)
        self.header_view.setFixedHeight(header_height)
        self.footer_view.setSceneRect(
            rect.x(), rect.y() + rect.height() - 50, rect.width(), 35)

    def sort_explanations(self):
        """sorts explanations according to users choice from combo box"""
        if self.sort_index == SortBy.POSITIVE:
            self.explanations = self.explanations[np.argsort(
                self.explanations.X[:, 0])][::-1]
        elif self.sort_index == SortBy.NEGATIVE:
            self.explanations = self.explanations[np.argsort(
                self.explanations.X[:, 0])]
        elif self.sort_index == SortBy.ABSOLUTE:
            self.explanations = self.explanations[np.argsort(
                np.abs(self.explanations.X[:, 0]))][::-1]
        elif self.sort_index == SortBy.BY_NAME:
            l = np.array(
                list(map(np.chararray.lower, self.explanations.metas[:, 0])))
            self.explanations = self.explanations[np.argsort(l)]
        else:
            return

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """Set input 'Data"""
        self.data = data
        self.explanations = None
        self.data_info.setText("Data: N/A")
        self.e = None
        if data is not None:
            model = TableModel(data, parent=None)
            if data.X.shape[0] == 1:
                inst = "1 instance and "
            else:
                inst = str(data.X.shape[0]) + " instances and "
            if data.X.shape[1] == 1:
                feat = "1 feature "
            else:
                feat = str(data.X.shape[1]) + " features"
            self.data_info.setText("Data: " + inst + feat)

    @Inputs.model
    def set_predictor(self, model):
        """Set input 'Model"""
        self.model = model
        self.model_info.setText("Model: N/A")
        self.explanations = None
        self.e = None
        if model is not None:
            self.model_info.setText("Model: " + str(model.name))

    @Inputs.sample
    @check_sql_input
    def set_sample(self, sample):
        """Set input 'Sample', checks if size is appropriate"""
        self.to_explain = sample
        self.explanations = None
        self.Error.sample_too_big.clear()
        self.sample_info.setText("Sample: N/A")
        if sample is not None:
            if len(sample.X) != 1:
                self.to_explain = None
                self.Error.sample_too_big()
            else:
                if sample.X.shape[1] == 1:
                    feat = "1 feature"
                else:
                    feat = str(sample.X.shape[1]) + " features"
                self.sample_info.setText("Sample: " + feat)
                if self.e is not None:
                    self.e.saved = False

    def handleNewSignals(self):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self.predict_info.setText("")
        self.Warning.unknowns_increased.clear()
        self.stop = True
        self.cancel_button.setText("Stop Computation")
        self.commit_calc_or_output()

    def commit_calc_or_output(self):
        if self.data is not None and self.to_explain is not None:
            self.commit_calc()
        else:
            self.commit_output()

    def commit_calc(self):
        num_nan = np.count_nonzero(np.isnan(self.to_explain.X[0]))

        self.to_explain = self.to_explain.transform(self.data.domain)
        if num_nan != np.count_nonzero(np.isnan(self.to_explain.X[0])):
            self.Warning.unknowns_increased()
        if self.model is not None:
            # calculate contributions
            if self.e is None:
                self.e = ExplainPredictions(self.data,
                                            self.model,
                                            batch_size=min(
                                                len(self.data.X), 500),
                                            p_val=self.gui_p_val,
                                            error=self.gui_error)
            self._task = task = Task()

            def callback(progress):
                nonlocal task
                # update progress bar
                QMetaObject.invokeMethod(
                    self, "set_progress_value", Qt.QueuedConnection, Q_ARG(int, progress))
                if task.canceled:
                    return True
                return False

            def callback_update(table):
                QMetaObject.invokeMethod(
                    self, "update_view", Qt.QueuedConnection, Q_ARG(Orange.data.Table, table))

            def callback_prediction(class_value):
                QMetaObject.invokeMethod(
                    self, "update_model_prediction", Qt.QueuedConnection, Q_ARG(float, class_value))

            self.was_canceled = False
            explain_func = partial(
                self.e.anytime_explain, self.to_explain[0], callback=callback, update_func=callback_update, update_prediction=callback_prediction)

            self.progressBarInit(processEvents=None)
            task.future = self._executor.submit(explain_func)
            task.watcher = FutureWatcher(task.future)
            task.watcher.done.connect(self._task_finished)
            self.cancel_button.setDisabled(False)

    @pyqtSlot(Orange.data.Table)
    def update_view(self, table):
        self.explanations = table
        self.sort_explanations()
        self.draw()
        self.commit_output()

    @pyqtSlot(float)
    def update_model_prediction(self, value):
        self._print_prediction(value)

    @pyqtSlot(int)
    def set_progress_value(self, value):
        self.progressBarSet(value, processEvents=False)

    @pyqtSlot(concurrent.futures.Future)
    def _task_finished(self, f):
        """
        Parameters:
        ----------
        f: conncurent.futures.Future
            future instance holding the result of learner evaluation
        """
        assert self.thread() is QThread.currentThread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None

        if not self.was_canceled:
            self.cancel_button.setDisabled(True)

        try:
            results = f.result()
        except Exception as ex:
            log = logging.getLogger()
            log.exception(__name__, exc_info=True)
            self.error("Exception occured during evaluation: {!r}".format(ex))

            for key in self.results.keys():
                self.results[key] = None
        else:
            self.update_view(results[1])

        self.progressBarFinished(processEvents=False)

    def commit_output(self):
        """
        Sends best-so-far results forward
        """
        self.Outputs.explanations.send(self.explanations)

    def toggle_button(self):
        if self.stop:
            self.stop = False
            self.cancel_button.setText("Restart Computation")
            self.cancel()
        else:
            self.stop = True
            self.cancel_button.setText("Stop Computation")
            self.commit_calc_or_output()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._task_finished)
            self.was_canceled = True
            self._task_finished(self._task.future)

    def _print_prediction(self, class_value):
        """
        Parameters
        ----------
        class_value: float 
            Number representing either index of predicted class value, looked up in domain, or predicted value (regression)
        """
        name = self.data.domain.class_vars[0].name
        if isinstance(self.data.domain.class_vars[0], ContinuousVariable):
            self.predict_info.setText(name + ":      " + str(class_value))
        else:
            self.predict_info.setText(
                name + ":      " + self.data.domain.class_vars[0].values[int(class_value)])

    def _update_error_spin(self):
        self.cancel()
        if self.e is not None:
            self.e.error = self.gui_error
        self.handleNewSignals()

    def _update_p_val_spin(self):
        self.cancel()
        if self.e is not None:
            self.e.p_val = self.gui_p_val
        self.handleNewSignals()

    def _update_num_atr_spin(self):
        self.cancel()
        self.handleNewSignals()

    def _update_combo(self):
        if self.explanations != None:
            self.sort_explanations()
            self.draw()
            self.commit_output()

    def onDeleteWidget(self):
        self.cancel()
        super().onDeleteWidget()