示例#1
0
class OWGenes(OWWidget, ConcurrentWidgetMixin):
    name = "Genes"
    description = "Tool for working with genes"
    icon = "../widgets/icons/OWGeneInfo.svg"
    priority = 40
    want_main_area = True

    selected_organism: int = Setting(11)
    search_pattern: str = Setting('')
    exclude_unmatched = Setting(True)
    replace_id_with_symbol = Setting(True)
    auto_commit = Setting(True)

    settingsHandler = DomainContextHandler()
    selected_gene_col = ContextSetting(None)
    use_attr_names = ContextSetting(True)

    replaces = [
        'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher'
    ]

    class Inputs:
        data_table = Input("Data", Table)

    class Outputs:
        data_table = Output("Data", Table)
        gene_matcher_results = Output("Genes", Table)

    class Information(OWWidget.Information):
        pass

    def sizeHint(self):
        return QSize(1280, 960)

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        # ATTRIBUTES #
        self.target_database = ENTREZ_ID

        # input data
        self.input_data = None
        self.input_genes = None
        self.tax_id = None
        self.column_candidates = []

        # input options
        self.organisms = []

        # gene matcher
        self.gene_matcher = None

        # progress bar
        self.progress_bar = None

        self._timer = QTimer()
        self._timer.timeout.connect(self._apply_filter)
        self._timer.setSingleShot(True)

        # GUI SECTION #

        # Control area
        self.info_box = widgetLabel(
            widgetBox(self.controlArea, "Info", addSpace=True),
            'No data on input.\n')

        organism_box = vBox(self.controlArea, 'Organism')
        self.organism_select_combobox = comboBox(
            organism_box,
            self,
            'selected_organism',
            callback=self.on_input_option_change)

        self.get_available_organisms()
        self.organism_select_combobox.setCurrentIndex(self.selected_organism)

        box = widgetBox(self.controlArea, 'Gene IDs in the input data')
        self.gene_columns_model = itemmodels.DomainModel(
            valid_types=(StringVariable, DiscreteVariable))
        self.gene_column_combobox = comboBox(
            box,
            self,
            'selected_gene_col',
            label='Stored in data column',
            model=self.gene_columns_model,
            sendSelectedValue=True,
            callback=self.on_input_option_change,
        )

        self.attr_names_checkbox = checkBox(
            box,
            self,
            'use_attr_names',
            'Stored as feature (column) names',
            disables=[(-1, self.gene_column_combobox)],
            callback=self.on_input_option_change,
        )

        self.gene_column_combobox.setDisabled(bool(self.use_attr_names))

        output_box = vBox(self.controlArea, 'Output')

        # separator(output_box)
        # output_box.layout().addWidget(horizontal_line())
        # separator(output_box)
        self.exclude_radio = checkBox(output_box,
                                      self,
                                      'exclude_unmatched',
                                      'Exclude unmatched genes',
                                      callback=self.commit)

        self.replace_radio = checkBox(output_box,
                                      self,
                                      'replace_id_with_symbol',
                                      'Replace feature IDs with gene names',
                                      callback=self.commit)

        auto_commit(self.controlArea,
                    self,
                    "auto_commit",
                    "&Commit",
                    box=False)

        rubber(self.controlArea)

        # Main area
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self.handle_filter_callback)
        # rubber(self.radio_group)
        self.mainArea.layout().addWidget(self.filter)

        # set splitter
        self.splitter = QSplitter()
        self.splitter.setOrientation(Qt.Vertical)

        self.table_model = GeneInfoModel()
        self.table_view = QTableView()
        self.table_view.setAlternatingRowColors(True)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSortingEnabled(True)
        self.table_view.setShowGrid(False)
        self.table_view.verticalHeader().hide()
        # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)

        self.unknown_model = UnknownGeneInfoModel()

        self.unknown_view = QTableView()
        self.unknown_view.setModel(self.unknown_model)
        self.unknown_view.verticalHeader().hide()
        self.unknown_view.setShowGrid(False)
        self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection)
        self.unknown_view.horizontalHeader().setSectionResizeMode(
            QHeaderView.Stretch)

        self.splitter.addWidget(self.table_view)
        self.splitter.addWidget(self.unknown_view)

        self.splitter.setStretchFactor(0, 90)
        self.splitter.setStretchFactor(1, 10)

        self.mainArea.layout().addWidget(self.splitter)

    def handle_filter_callback(self):
        self._timer.stop()
        self._timer.start(500)

    def _apply_filter(self):
        # filter only if input data is present and model is populated
        if self.table_model.table is not None:
            self.table_model.update_model(
                filter_pattern=str(self.search_pattern))
            self.commit()

    def __reset_widget_state(self):
        self.table_view.clearSpans()
        self.table_view.setModel(None)
        self.table_model.clear()
        self.unknown_model.clear()
        self._update_info_box()

    def _update_info_box(self):

        if self.input_genes and self.gene_matcher:
            num_genes = len(self.gene_matcher.genes)
            known_genes = len(self.gene_matcher.get_known_genes())

            info_text = ('{} genes in input data\n'
                         '{} genes match Entrez database\n'
                         '{} genes with match conflicts\n'.format(
                             num_genes, known_genes, num_genes - known_genes))

        else:
            info_text = 'No data on input.'

        self.info_box.setText(info_text)

    def on_done(self, _):
        # update info box
        self._update_info_box()

        # set output options
        self.toggle_radio_options()

        # set known genes
        self.table_model.initialize(self.gene_matcher.genes)
        self.table_view.setModel(self.table_model)
        self.table_view.selectionModel().selectionChanged.connect(self.commit)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)

        self.table_view.setItemDelegateForColumn(
            self.table_model.entrez_column_index,
            LinkStyledItemDelegate(self.table_view))
        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)
        self.table_view.horizontalHeader().setStretchLastSection(True)

        # set unknown genes
        self.unknown_model.initialize(self.gene_matcher.genes)
        self.unknown_view.verticalHeader().setStretchLastSection(True)

        self._apply_filter()

    def get_available_organisms(self):
        available_organism = sorted(((tax_id, taxonomy.name(tax_id))
                                     for tax_id in taxonomy.common_taxids()),
                                    key=lambda x: x[1])

        self.organisms = [tax_id[0] for tax_id in available_organism]
        self.organism_select_combobox.addItems(
            [tax_id[1] for tax_id in available_organism])

    def gene_names_from_table(self):
        """ Extract and return gene names from `Orange.data.Table`.
        """
        self.input_genes = []
        if self.input_data:
            if self.use_attr_names:
                self.input_genes = [
                    str(attr.name).strip()
                    for attr in self.input_data.domain.attributes
                ]
            else:
                if self.selected_gene_col is None:
                    self.selected_gene_col = self.gene_column_identifier()

                self.input_genes = [
                    str(e[self.selected_gene_col]) for e in self.input_data
                    if not np.isnan(e[self.selected_gene_col])
                ]

    def _update_gene_matcher(self):
        self.gene_names_from_table()

        self.gene_matcher = GeneMatcher(self.get_selected_organism(),
                                        auto_start=False)
        self.gene_matcher.genes = self.input_genes
        # self.gene_matcher.organism = self.get_selected_organism()

    def get_selected_organism(self):
        return self.organisms[self.selected_organism]

    def _run(self):
        if self.gene_matcher is not None:
            self.start(run_gene_matcher, self.gene_matcher)

    def on_input_option_change(self):
        self.__reset_widget_state()
        self._update_gene_matcher()
        self._run()

    def gene_column_identifier(self):
        """
        Get most suitable column that stores genes. If there are
        several suitable columns, select the one with most unique
        values. Take the best one.
        """

        # candidates -> (variable, num of unique values)
        candidates = ((col,
                       np.unique(self.input_data.get_column_view(col)[0]).size)
                      for col in self.gene_columns_model
                      if isinstance(col, DiscreteVariable)
                      or isinstance(col, StringVariable))

        best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1]
        return best_candidate

    def find_genes_location(self):
        """ Try locate the genes in the input data when we first load the data.

            Proposed rules:
                - when no suitable feature names are present, check the columns.
                - find the most suitable column, that is, the one with most unique values.

        """
        domain = self.input_data.domain
        if not domain.attributes:
            if self.selected_gene_col is None:
                self.selected_gene_col = self.gene_column_identifier()
                self.use_attr_names = False

    @Inputs.data_table
    def handle_input(self, data):
        self.closeContext()
        self.input_data = None
        self.input_genes = None
        self.__reset_widget_state()
        self.gene_columns_model.set_domain(None)
        self.selected_gene_col = None

        if data:
            self.input_data = data
            self.gene_columns_model.set_domain(self.input_data.domain)

            # check if input table has tax_id, human is used if tax_id is not found
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606'))
            # check for gene location. Default is that genes are attributes in the input table.
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, self.use_attr_names)

            if self.tax_id in self.organisms and not self.selected_organism:
                self.selected_organism = self.organisms.index(self.tax_id)

            self.openContext(self.input_data.domain)
            self.find_genes_location()
            self.on_input_option_change()

    def commit(self):
        selection = self.table_view.selectionModel().selectedRows(
            self.table_model.entrez_column_index)

        selected_genes = [row.data() for row in selection]
        if not len(selected_genes):
            selected_genes = self.table_model.get_filtered_genes()

        gene_ids = self.get_target_ids()
        known_genes = [gid for gid in gene_ids if gid != '?']

        table = None
        gm_table = None
        if known_genes:
            # Genes are in rows (we have a column with genes).
            if not self.use_attr_names:

                if self.target_database in self.input_data.domain:
                    gene_var = self.input_data.domain[self.target_database]
                    metas = self.input_data.domain.metas
                else:
                    gene_var = StringVariable(self.target_database)
                    metas = self.input_data.domain.metas + (gene_var, )

                domain = Domain(self.input_data.domain.attributes,
                                self.input_data.domain.class_vars, metas)

                table = self.input_data.transform(domain)
                col, _ = table.get_column_view(gene_var)
                col[:] = gene_ids

                # filter selected rows
                selected_genes_set = set(selected_genes)
                selected_rows = [
                    row_index for row_index, row in enumerate(table)
                    if str(row[gene_var]) in selected_genes_set
                ]

                # handle table attributes
                table.attributes[TAX_ID] = self.get_selected_organism()
                table.attributes[GENE_AS_ATTRIBUTE_NAME] = False
                table.attributes[GENE_ID_COLUMN] = self.target_database
                table = table[selected_rows] if selected_rows else table

                if self.exclude_unmatched:
                    # create filter from selected column for genes
                    only_known = table_filter.FilterStringList(
                        gene_var, known_genes)
                    # apply filter to the data
                    table = table_filter.Values([only_known])(table)

                self.Outputs.data_table.send(table)

            # genes are are in columns (genes are features).
            else:
                domain = self.input_data.domain.copy()
                table = self.input_data.transform(domain)

                for gene in self.gene_matcher.genes:
                    if gene.input_identifier in table.domain:

                        table.domain[gene.input_identifier].attributes[
                            self.target_database] = (str(gene.gene_id)
                                                     if gene.gene_id else '?')

                        if self.replace_id_with_symbol:
                            try:
                                table.domain[gene.input_identifier].name = str(
                                    gene.symbol)
                            except AttributeError:
                                # TODO: missing gene symbol, need to handle this?
                                pass

                # filter selected columns
                selected_genes_set = set(selected_genes)
                selected = [
                    column for column in table.domain.attributes
                    if self.target_database in column.attributes
                    and str(column.attributes[
                        self.target_database]) in selected_genes_set
                ]

                output_attrs = table.domain.attributes

                if selected:
                    output_attrs = selected

                if self.exclude_unmatched:
                    known_genes_set = set(known_genes)
                    output_attrs = [
                        col for col in output_attrs if col.attributes[
                            self.target_database] in known_genes_set
                    ]

                domain = Domain(output_attrs, table.domain.class_vars,
                                table.domain.metas)

                table = table.from_table(domain, table)

                # handle table attributes
                table.attributes[TAX_ID] = self.get_selected_organism()
                table.attributes[GENE_AS_ATTRIBUTE_NAME] = True
                table.attributes[GENE_ID_ATTRIBUTE] = self.target_database

            gm_table = self.gene_matcher.to_data_table(
                selected_genes=selected_genes if selected_genes else None)

        self.Outputs.data_table.send(table)
        self.Outputs.gene_matcher_results.send(gm_table)

    def toggle_radio_options(self):
        self.replace_radio.setEnabled(bool(self.use_attr_names))

        if self.gene_matcher.genes:
            # enable checkbox if unknown genes are detected
            self.exclude_radio.setEnabled(
                len(self.gene_matcher.genes) != len(
                    self.gene_matcher.get_known_genes()))
            self.exclude_unmatched = len(self.gene_matcher.genes) != len(
                self.gene_matcher.get_known_genes())

    def get_target_ids(self):
        return [
            str(gene.gene_id) if gene.gene_id else '?'
            for gene in self.gene_matcher.genes
        ]
示例#2
0
class OWPredictions(OWWidget):
    name = "Predictions"
    icon = "icons/Predictions.svg"
    priority = 200
    description = "Display the predictions of models for an input dataset."
    keywords = []

    class Inputs:
        data = Input("Data", Orange.data.Table)
        predictors = Input("Predictors", Model, multiple=True)

    class Outputs:
        predictions = Output("Predictions", Orange.data.Table)
        evaluation_results = Output("Evaluation Results",
                                    Orange.evaluation.Results,
                                    dynamic=False)

    class Warning(OWWidget.Warning):
        empty_data = Msg("Empty dataset")

    class Error(OWWidget.Error):
        predictor_failed = \
            Msg("One or more predictors failed (see more...)\n{}")
        scorer_failed = \
            Msg("One or more scorers failed (see more...)\n{}")
        predictors_target_mismatch = \
            Msg("Predictors do not have the same target.")
        data_target_mismatch = \
            Msg("Data does not have the same target as predictors.")

    settingsHandler = settings.ClassValuesContextHandler()
    score_table = settings.SettingProvider(ScoreTable)

    #: Display the full input dataset or only the target variable columns (if
    #: available)
    show_attrs = settings.Setting(True)
    #: Show predicted values (for discrete target variable)
    show_predictions = settings.Setting(True)
    #: Show predictions probabilities (for discrete target variable)
    show_probabilities = settings.Setting(True)
    #: List of selected class value indices in the "Show probabilities" list
    selected_classes = settings.ContextSetting([])
    #: Draw colored distribution bars
    draw_dist = settings.Setting(True)

    output_attrs = settings.Setting(True)
    output_predictions = settings.Setting(True)
    output_probabilities = settings.Setting(True)

    def __init__(self):
        super().__init__()

        #: Input data table
        self.data = None  # type: Optional[Orange.data.Table]
        #: A dict mapping input ids to PredictorSlot
        self.predictors = OrderedDict()  # type: Dict[object, PredictorSlot]
        #: A class variable (prediction target)
        self.class_var = None  # type: Optional[Orange.data.Variable]
        #: List of (discrete) class variable's values
        self.class_values = []  # type: List[str]

        box = gui.vBox(self.controlArea, "Info")
        self.infolabel = gui.widgetLabel(
            box, "No data on input.\nPredictors: 0\nTask: N/A")
        self.infolabel.setMinimumWidth(150)
        gui.button(box,
                   self,
                   "Restore Original Order",
                   callback=self._reset_order,
                   tooltip="Show rows in the original order")

        self.classification_options = box = gui.vBox(self.controlArea,
                                                     "Show",
                                                     spacing=-1,
                                                     addSpace=False)

        gui.checkBox(box,
                     self,
                     "show_predictions",
                     "Predicted class",
                     callback=self._update_prediction_delegate)
        b = gui.checkBox(box,
                         self,
                         "show_probabilities",
                         "Predicted probabilities for:",
                         callback=self._update_prediction_delegate)
        ibox = gui.indentedBox(box,
                               sep=gui.checkButtonOffsetHint(b),
                               addSpace=False)
        gui.listBox(ibox,
                    self,
                    "selected_classes",
                    "class_values",
                    callback=self._update_prediction_delegate,
                    selectionMode=QListWidget.MultiSelection,
                    addSpace=False)
        gui.checkBox(box,
                     self,
                     "draw_dist",
                     "Draw distribution bars",
                     callback=self._update_prediction_delegate)

        box = gui.vBox(self.controlArea, "Data View")
        gui.checkBox(box,
                     self,
                     "show_attrs",
                     "Show full dataset",
                     callback=self._update_column_visibility)

        box = gui.vBox(self.controlArea, "Output", spacing=-1)
        self.checkbox_class = gui.checkBox(box,
                                           self,
                                           "output_attrs",
                                           "Original data",
                                           callback=self.commit)
        self.checkbox_class = gui.checkBox(box,
                                           self,
                                           "output_predictions",
                                           "Predictions",
                                           callback=self.commit)
        self.checkbox_prob = gui.checkBox(box,
                                          self,
                                          "output_probabilities",
                                          "Probabilities",
                                          callback=self.commit)

        gui.rubber(self.controlArea)

        self.vsplitter = QSplitter(orientation=Qt.Vertical,
                                   childrenCollapsible=True,
                                   handleWidth=2)

        self.splitter = QSplitter(
            orientation=Qt.Horizontal,
            childrenCollapsible=False,
            handleWidth=2,
        )
        self.dataview = TableView(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollMode=QTableView.ScrollPerPixel,
            selectionMode=QTableView.NoSelection,
            focusPolicy=Qt.StrongFocus)
        self.predictionsview = TableView(
            verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff,
            horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn,
            horizontalScrollMode=QTableView.ScrollPerPixel,
            selectionMode=QTableView.NoSelection,
            focusPolicy=Qt.StrongFocus,
            sortingEnabled=True,
        )

        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.dataview.verticalHeader().hide()

        dsbar = self.dataview.verticalScrollBar()
        psbar = self.predictionsview.verticalScrollBar()

        psbar.valueChanged.connect(dsbar.setValue)
        dsbar.valueChanged.connect(psbar.setValue)

        self.dataview.verticalHeader().setDefaultSectionSize(22)
        self.predictionsview.verticalHeader().setDefaultSectionSize(22)
        self.dataview.verticalHeader().sectionResized.connect(
            lambda index, _, size: self.predictionsview.verticalHeader(
            ).resizeSection(index, size))

        self.splitter.addWidget(self.predictionsview)
        self.splitter.addWidget(self.dataview)

        self.score_table = ScoreTable(self)
        self.vsplitter.addWidget(self.splitter)
        self.vsplitter.addWidget(self.score_table.view)
        self.vsplitter.setStretchFactor(0, 5)
        self.vsplitter.setStretchFactor(1, 1)
        self.mainArea.layout().addWidget(self.vsplitter)

    @Inputs.data
    @check_sql_input
    def set_data(self, data):
        """Set the input dataset"""
        if data is not None and not data:
            data = None
            self.Warning.empty_data()
        else:
            self.Warning.empty_data.clear()

        self.data = data
        if data is None:
            self.dataview.setModel(None)
            self.predictionsview.setModel(None)
            self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        else:
            # force full reset of the view's HeaderView state
            self.dataview.setModel(None)
            model = TableModel(data, parent=None)
            modelproxy = TableSortProxyModel()
            modelproxy.setSourceModel(model)
            self.dataview.setModel(modelproxy)
            self._update_column_visibility()

        self._invalidate_predictions()

    # pylint: disable=redefined-builtin
    @Inputs.predictors
    def set_predictor(self, predictor=None, id=None):
        if id in self.predictors:
            if predictor is not None:
                self.predictors[id] = self.predictors[id]._replace(
                    predictor=predictor, name=predictor.name, results=None)
            else:
                del self.predictors[id]
        elif predictor is not None:
            self.predictors[id] = \
                PredictorSlot(predictor, predictor.name, None)

    def _set_class_var(self):
        pred_classes = set(pred.predictor.domain.class_var
                           for pred in self.predictors.values())
        self.Error.predictors_target_mismatch.clear()
        self.Error.data_target_mismatch.clear()
        self.class_var = None
        if len(pred_classes) > 1:
            self.Error.predictors_target_mismatch()
        if len(pred_classes) == 1:
            self.class_var = pred_classes.pop()
            if self.data is not None and \
                    self.data.domain.class_var is not None and \
                    self.class_var != self.data.domain.class_var:
                self.Error.data_target_mismatch()
                self.class_var = None

        discrete_class = self.class_var is not None \
                         and self.class_var.is_discrete
        self.classification_options.setVisible(discrete_class)
        self.closeContext()
        if discrete_class:
            self.class_values = list(self.class_var.values)
            self.selected_classes = list(range(len(self.class_values)))
            self.openContext(self.class_var)
        else:
            self.class_values = []
            self.selected_classes = []

    def handleNewSignals(self):
        self._set_class_var()
        self._call_predictors()
        self._update_scores()
        self._update_predictions_model()
        self._update_prediction_delegate()
        self._set_errors()
        self._update_info()
        self.commit()

    def _call_predictors(self):
        if not self.data:
            return
        for inputid, slot in self.predictors.items():
            if slot.results is not None \
                    and not isinstance(slot.results, str) \
                    and not numpy.isnan(slot.results.predicted[0]).all():
                continue
            try:
                pred, prob = self.predict(slot.predictor, self.data)
            except (ValueError, DomainTransformationError) as err:
                results = "{}: {}".format(slot.predictor.name, err)
            else:
                results = Orange.evaluation.Results()
                results.data = self.data
                results.domain = self.data.domain
                results.row_indices = numpy.arange(len(self.data))
                results.folds = (Ellipsis, )
                results.actual = self.data.Y
                results.predicted = pred.reshape((1, len(self.data)))
                results.probabilities = prob.reshape((1, ) + prob.shape)
            self.predictors[inputid] = slot._replace(results=results)

    def _update_scores(self):
        model = self.score_table.model
        model.clear()

        if self.data is None or self.data.domain.class_var is None:
            scorers = []
        else:
            scorers = usable_scorers(self.data.domain.class_var)
        self.score_table.update_header(scorers)

        errors = []
        for inputid, pred in self.predictors.items():
            name = learner_name(pred.predictor)
            head = QStandardItem(name)
            #            head.setData(key, Qt.UserRole)
            row = [head]
            results = self.predictors[inputid].results
            if isinstance(results, str):
                head.setToolTip(results)
                head.setText("{} (error)".format(name))
                head.setForeground(QBrush(Qt.red))
            else:
                for scorer in scorers:
                    item = QStandardItem()
                    try:
                        score = scorer_caller(scorer, results)()[0]
                        item.setText(f"{score:.3f}")
                    except Exception as exc:  # pylint: disable=broad-except
                        item.setToolTip(str(exc))
                        if scorer.name in self.score_table.shown_scores:
                            errors.append(str(exc))
                    row.append(item)
            self.score_table.model.appendRow(row)
        self.Error.scorer_failed("\n".join(errors), shown=bool(errors))

    def _set_errors(self):
        # Not all predictors are run every time, so errors can't be collected
        # in _call_predictors
        errors = "\n".join(p.results for p in self.predictors.values()
                           if isinstance(p.results, str))
        self.Error.predictor_failed(errors, shown=bool(errors))

    def _update_info(self):
        info = []
        if self.data is not None:
            info.append("Data: {} instances.".format(len(self.data)))
        else:
            info.append("Data: N/A")

        n_predictors = len(self.predictors)
        n_valid = len(self._valid_predictors())
        if n_valid != n_predictors:
            info.append("Predictors: {} (+ {} failed)".format(
                n_valid, n_predictors - n_valid))
        else:
            info.append("Predictors: {}".format(n_predictors or "N/A"))

        if self.class_var is None:
            info.append("Task: N/A")
        elif self.class_var.is_discrete:
            info.append("Task: Classification")
            self.checkbox_class.setEnabled(True)
            self.checkbox_prob.setEnabled(True)
        else:
            info.append("Task: Regression")
            self.checkbox_class.setEnabled(False)
            self.checkbox_prob.setEnabled(False)

        self.infolabel.setText("\n".join(info))

    def _invalidate_predictions(self):
        for inputid, pred in list(self.predictors.items()):
            self.predictors[inputid] = pred._replace(results=None)

    def _valid_predictors(self):
        if self.class_var is not None and self.data is not None:
            return [
                p for p in self.predictors.values()
                if p.results is not None and not isinstance(p.results, str)
            ]
        else:
            return []

    def _update_predictions_model(self):
        """Update the prediction view model."""
        if self.data is not None and self.class_var is not None:
            slots = self._valid_predictors()
            results = []
            class_var = self.class_var
            for p in slots:
                if isinstance(p.results, str):
                    continue
                values = p.results.predicted[0]
                if self.class_var.is_discrete:
                    # if values were added to class_var between building the
                    # model and predicting, add zeros for new class values,
                    # which are always at the end
                    prob = p.results.probabilities[0]
                    prob = numpy.c_[prob,
                                    numpy.zeros(
                                        (prob.shape[0], len(class_var.values) -
                                         prob.shape[1]))]
                    values = [Value(class_var, v) for v in values]
                else:
                    prob = numpy.zeros((len(values), 0))
                results.append((values, prob))
            results = list(zip(*(zip(*res) for res in results)))
            headers = [p.name for p in slots]
            model = PredictionsModel(results, headers)
        else:
            model = None

        predmodel = PredictionsSortProxyModel()
        predmodel.setSourceModel(model)
        predmodel.setDynamicSortFilter(True)
        self.predictionsview.setItemDelegate(PredictionsItemDelegate())
        self.predictionsview.setModel(predmodel)
        hheader = self.predictionsview.horizontalHeader()
        hheader.setSortIndicatorShown(False)
        # SortFilterProxyModel is slow due to large abstraction overhead
        # (every comparison triggers multiple `model.index(...)`,
        # model.rowCount(...), `model.parent`, ... calls)
        hheader.setSectionsClickable(predmodel.rowCount() < 20000)

        predmodel.layoutChanged.connect(self._update_data_sort_order)
        self._update_data_sort_order()
        self.predictionsview.resizeColumnsToContents()

    def _update_column_visibility(self):
        """Update data column visibility."""
        if self.data is not None and self.class_var is not None:
            domain = self.data.domain
            first_attr = len(domain.class_vars) + len(domain.metas)

            for i in range(first_attr, first_attr + len(domain.attributes)):
                self.dataview.setColumnHidden(i, not self.show_attrs)
            if domain.class_var:
                self.dataview.setColumnHidden(0, False)

    def _update_data_sort_order(self):
        """Update data row order to match the current predictions view order"""
        datamodel = self.dataview.model()  # data model proxy
        predmodel = self.predictionsview.model()  # predictions model proxy
        sortindicatorshown = False
        if datamodel is not None:
            assert isinstance(datamodel, TableSortProxyModel)
            n = datamodel.rowCount()
            if predmodel is not None and predmodel.sortColumn() >= 0:
                sortind = numpy.argsort([
                    predmodel.mapToSource(predmodel.index(i, 0)).row()
                    for i in range(n)
                ])
                sortind = numpy.array(sortind, numpy.int)
                sortindicatorshown = True
            else:
                sortind = None

            datamodel.setSortIndices(sortind)

        self.predictionsview.horizontalHeader() \
            .setSortIndicatorShown(sortindicatorshown)

    def _reset_order(self):
        """Reset the row sorting to original input order."""
        datamodel = self.dataview.model()
        predmodel = self.predictionsview.model()
        if datamodel is not None:
            datamodel.sort(-1)
        if predmodel is not None:
            predmodel.sort(-1)
        self.predictionsview.horizontalHeader().setSortIndicatorShown(False)

    def _update_prediction_delegate(self):
        """Update the predicted probability visibility state"""
        if self.class_var is not None:
            delegate = PredictionsItemDelegate()
            if self.class_var.is_continuous:
                self._setup_delegate_continuous(delegate)
            else:
                self._setup_delegate_discrete(delegate)
                proxy = self.predictionsview.model()
                if proxy is not None:
                    proxy.setProbInd(
                        numpy.array(self.selected_classes, dtype=int))
            self.predictionsview.setItemDelegate(delegate)
            self.predictionsview.resizeColumnsToContents()
        self._update_spliter()

    def _setup_delegate_discrete(self, delegate):
        colors = [QColor(*rgb) for rgb in self.class_var.colors]
        fmt = []
        if self.show_probabilities:
            fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i)
                                  for i in sorted(self.selected_classes)))
        if self.show_predictions:
            fmt.append("{value!s}")
        delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt))
        if self.draw_dist and colors is not None:
            delegate.setColors(colors)
        return delegate

    def _setup_delegate_continuous(self, delegate):
        delegate.setFormat("{{value:{}}}".format(
            self.class_var.format_str[1:]))

    def _update_spliter(self):
        if self.data is None:
            return

        def width(view):
            h_header = view.horizontalHeader()
            v_header = view.verticalHeader()
            return h_header.length() + v_header.width()

        w = width(self.predictionsview) + 4
        w1, w2 = self.splitter.sizes()
        self.splitter.setSizes([w, w1 + w2 - w])

    def commit(self):
        self._commit_predictions()
        self._commit_evaluation_results()

    def _commit_evaluation_results(self):
        slots = self._valid_predictors()
        if not slots or self.data.domain.class_var is None:
            self.Outputs.evaluation_results.send(None)
            return

        class_var = self.class_var
        nanmask = numpy.isnan(self.data.get_column_view(class_var)[0])
        data = self.data[~nanmask]
        results = Orange.evaluation.Results(data, store_data=True)
        results.folds = None
        results.row_indices = numpy.arange(len(data))
        results.actual = data.Y.ravel()
        results.predicted = numpy.vstack(
            tuple(p.results.predicted[0][~nanmask] for p in slots))
        if class_var and class_var.is_discrete:
            results.probabilities = numpy.array(
                [p.results.probabilities[0][~nanmask] for p in slots])
        results.learner_names = [p.name for p in slots]
        self.Outputs.evaluation_results.send(results)

    def _commit_predictions(self):
        slots = self._valid_predictors()
        if not slots:
            self.Outputs.predictions.send(None)
            return

        if self.class_var and self.class_var.is_discrete:
            newmetas, newcolumns = self._classification_output_columns()
        else:
            newmetas, newcolumns = self._regression_output_columns()

        attrs = list(self.data.domain.attributes) if self.output_attrs else []
        metas = list(self.data.domain.metas) + newmetas
        domain = \
            Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas)
        predictions = self.data.transform(domain)
        if newcolumns:
            newcolumns = numpy.hstack(
                [numpy.atleast_2d(cols) for cols in newcolumns])
            predictions.metas[:, -newcolumns.shape[1]:] = newcolumns
        self.Outputs.predictions.send(predictions)

    def _classification_output_columns(self):
        newmetas = []
        newcolumns = []
        slots = self._valid_predictors()
        if self.output_predictions:
            newmetas += [
                DiscreteVariable(name=p.name, values=self.class_values)
                for p in slots
            ]
            newcolumns += [
                p.results.predicted[0].reshape((-1, 1)) for p in slots
            ]

        if self.output_probabilities:
            newmetas += [
                ContinuousVariable(name="%s (%s)" % (p.name, value))
                for p in slots for value in self.class_values
            ]
            newcolumns += [p.results.probabilities[0] for p in slots]
        return newmetas, newcolumns

    def _regression_output_columns(self):
        slots = self._valid_predictors()
        newmetas = [ContinuousVariable(name=p.name) for p in slots]
        newcolumns = [p.results.predicted[0].reshape((-1, 1)) for p in slots]
        return newmetas, newcolumns

    def send_report(self):
        def merge_data_with_predictions():
            data_model = self.dataview.model()
            predictions_model = self.predictionsview.model()

            # use ItemDelegate to style prediction values
            style = lambda x: self.predictionsview.itemDelegate().displayText(
                x, QLocale())

            # iterate only over visible columns of data's QTableView
            iter_data_cols = list(
                filter(lambda x: not self.dataview.isColumnHidden(x),
                       range(data_model.columnCount())))

            # print header
            yield [''] + \
                  [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in range(predictions_model.columnCount())] + \
                  [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
                   for col in iter_data_cols]

            # print data & predictions
            for i in range(data_model.rowCount()):
                yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \
                      [style(predictions_model.data(predictions_model.index(i, j)))
                       for j in range(predictions_model.columnCount())] + \
                      [data_model.data(data_model.index(i, j))
                       for j in iter_data_cols]

        if self.data is not None and self.class_var is not None:
            text = self.infolabel.text().replace('\n', '<br>')
            if self.show_probabilities and self.selected_classes:
                text += '<br>Showing probabilities for: '
                text += ', '.join(
                    [self.class_values[i] for i in self.selected_classes])
            self.report_paragraph('Info', text)
            self.report_table("Data & Predictions",
                              merge_data_with_predictions(),
                              header_rows=1,
                              header_columns=1)

    @classmethod
    def predict(cls, predictor, data):
        class_var = predictor.domain.class_var
        if class_var:
            if class_var.is_discrete:
                return cls.predict_discrete(predictor, data)
            else:
                return cls.predict_continuous(predictor, data)
        return None

    @staticmethod
    def predict_discrete(predictor, data):
        return predictor(data, Model.ValueProbs)

    @staticmethod
    def predict_continuous(predictor, data):
        values = predictor(data, Model.Value)
        return values, numpy.zeros((len(data), 0))