Пример #1
0
    def on_done(self, result: Results):
        model = QStandardItemModel()
        for item in result.items:
            model.appendRow(item)

        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels(Header.labels())

        self.filter_proxy_model.setSourceModel(model)
        self.tree_view.selectionModel().selectionChanged.connect(self.commit)
        self.filter_view()
        self.update_info_box()
Пример #2
0
    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results), default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels([
            "Category", "Term", "Count", "Reference count", "p-value", "FDR",
            "Enrichment"
        ])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped,
                     user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped,
                     user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(
                ),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit)

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(
            gsname(geneset) for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames), set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max(
                (e.enrichment_score
                 for _, e in results if np.isfinite(e.enrichment_score)),
                default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score)))

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView))

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))


#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")
Пример #3
0
    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results),
                       default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels(
            ["Category", "Term", "Count", "Reference count", "p-value",
             "FDR", "Enrichment"])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped, user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped, user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit
        )

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(gsname(geneset)
                       for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames),
                           set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max((e.enrichment_score for _, e in results
                             if np.isfinite(e.enrichment_score)),
                            default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score))
            )

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView)
        )

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))
#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")
Пример #4
0
class OWGeneSets(OWWidget):
    name = "Gene Sets"
    description = ""
    icon = "icons/OWGeneSets.svg"
    priority = 9
    want_main_area = True

    COUNT, GENES, CATEGORY, TERM = range(4)
    DATA_HEADER_LABELS = ["Count", 'Genes In Set', 'Category', 'Term']

    organism = Setting(None, schema_only=True)
    stored_gene_sets_selection = Setting([], schema_only=True)
    selected_rows = Setting([], schema_only=True)
    custom_gene_set_indicator = Setting(None, schema_only=True)

    min_count = Setting(5)
    use_min_count = Setting(True)
    auto_commit = Setting(True)

    class Inputs:
        genes = Input("Data", Table)
        custom_sets = Input('Custom Gene Sets', Table)

    class Outputs:
        matched_genes = Output("Matched Genes", Table)

    class Information(OWWidget.Information):
        pass

    class Warning(OWWidget.Warning):
        all_sets_filtered = Msg('All sets were filtered out.')

    class Error(OWWidget.Error):
        organism_mismatch = Msg('Organism in input data and custom gene sets does not match')
        missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION)
        missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID)
        missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID)
        cant_reach_host = Msg("Host orange.biolab.si is unreachable.")
        cant_load_organisms = Msg("No available organisms, please check your connection.")

    def __init__(self):
        super().__init__()

        # commit
        self.commit_button = None

        # progress bar
        self.progress_bar = None
        self.progress_bar_iterations = None

        # data
        self.input_data = None
        self.input_genes = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        # custom gene sets
        self.custom_data = None
        self.feature_model = DomainModel(valid_types=(DiscreteVariable, StringVariable))
        self.custom_gs_col_box = None
        self.gs_label_combobox = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.num_of_custom_sets = None

        # Gene Sets widget
        self.gs_widget = None

        # info box
        self.input_info = None
        self.num_of_sel_genes = 0

        # filter
        self.line_edit_filter = None
        self.search_pattern = ''
        self.organism_select_combobox = None

        # data model view
        self.data_view = None
        self.data_model = None

        # gene matcher NCBI
        self.gene_matcher = None

        # filter proxy model
        self.filter_proxy_model = None

        # hierarchy widget
        self.hierarchy_widget = None
        self.hierarchy_state = None

        # spinbox
        self.spin_widget = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # gui
        self.setup_gui()

    def __reset_widget_state(self):
        self.update_info_box()
        # clear data view
        self.init_item_model()
        # reset filters
        self.setup_filter_model()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._init_gene_sets_finished)
            self._task = None

    @Slot()
    def progress_advance(self):
        # GUI should be updated in main thread. That's why we are calling advance method here
        if self.progress_bar:
            self.progress_bar.advance()

    def __get_input_genes(self):
        self.input_genes = []

        if self.use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.input_genes.append(str(variable.attributes.get(self.gene_id_attribute, '?')))
        else:
            genes, _ = self.input_data.get_column_view(self.gene_id_column)
            self.input_genes = [str(g) for g in genes]

    def handle_custom_gene_sets(self, select_customs_flag=False):
        if self.custom_gene_set_indicator:
            if self.custom_data is not None and self.custom_gene_id_column is not None:

                if self.__check_organism_mismatch():
                    # self.gs_label_combobox.setDisabled(True)
                    self.Error.organism_mismatch()
                    self.gs_widget.update_gs_hierarchy()
                    return

                if isinstance(self.custom_gene_set_indicator, DiscreteVariable):
                    labels = self.custom_gene_set_indicator.values
                    gene_sets_names = [
                        labels[int(idx)] for idx in self.custom_data.get_column_view(self.custom_gene_set_indicator)[0]
                    ]
                else:
                    gene_sets_names, _ = self.custom_data.get_column_view(self.custom_gene_set_indicator)

                self.num_of_custom_sets = len(set(gene_sets_names))
                gene_names, _ = self.custom_data.get_column_view(self.custom_gene_id_column)
                hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets',)
                try:
                    self.gs_widget.add_custom_sets(
                        gene_sets_names,
                        gene_names,
                        hierarchy_title=hierarchy_title,
                        select_customs_flag=select_customs_flag,
                    )
                except geneset.GeneSetException:
                    pass
                # self.gs_label_combobox.setDisabled(False)
            else:
                self.gs_widget.update_gs_hierarchy()

        self.update_info_box()

    def update_tree_view(self):
        self.init_gene_sets()

    def invalidate(self):
        # clear
        self.__reset_widget_state()
        self.update_info_box()

        if self.input_data is not None:
            # setup
            self.__get_input_genes()
            self.update_tree_view()

    def __check_organism_mismatch(self):
        """ Check if organisms from different inputs match.

        :return: True if there is a mismatch
        """
        if self.tax_id is not None and self.custom_tax_id is not None:
            return self.tax_id != self.custom_tax_id
        return False

    def __get_reference_genes(self):
        self.reference_genes = []

        if self.reference_attr_names:
            for variable in self.reference_data.domain.attributes:
                self.reference_genes.append(str(variable.attributes.get(self.reference_gene_id_attribute, '?')))
        else:
            genes, _ = self.reference_data.get_column_view(self.reference_gene_id_column)
            self.reference_genes = [str(g) for g in genes]

    @Inputs.custom_sets
    def handle_custom_input(self, data):
        self.Error.clear()
        self.__reset_widget_state()
        self.custom_data = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.feature_model.set_domain(None)

        if data:
            self.custom_data = data
            self.feature_model.set_domain(self.custom_data.domain)
            self.custom_tax_id = str(self.custom_data.attributes.get(TAX_ID, None))
            self.custom_use_attr_names = self.custom_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
            self.custom_gene_id_attribute = self.custom_data.attributes.get(GENE_ID_ATTRIBUTE, None)
            self.custom_gene_id_column = self.custom_data.attributes.get(GENE_ID_COLUMN, None)

            if self.gs_label_combobox is None:
                self.gs_label_combobox = comboBox(
                    self.custom_gs_col_box,
                    self,
                    "custom_gene_set_indicator",
                    sendSelectedValue=True,
                    model=self.feature_model,
                    callback=self.on_gene_set_indicator_changed,
                )
            self.custom_gs_col_box.show()

            if self.custom_gene_set_indicator in self.feature_model:
                index = self.feature_model.indexOf(self.custom_gene_set_indicator)
                self.custom_gene_set_indicator = self.feature_model[index]
            else:
                self.custom_gene_set_indicator = self.feature_model[0]
        else:
            self.custom_gs_col_box.hide()

        self.gs_widget.clear_custom_sets()
        self.handle_custom_gene_sets(select_customs_flag=self.custom_gene_set_indicator is not None)
        self.invalidate()

    @Inputs.genes
    def handle_genes_input(self, data):
        self.Error.clear()
        self.__reset_widget_state()
        # clear output
        self.Outputs.matched_genes.send(None)
        # clear input values
        self.input_genes = []
        self.input_data = None
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gs_widget.clear()
        self.gs_widget.clear_gene_sets()
        self.update_info_box()

        if data:
            self.input_data = data
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, None))
            self.use_attr_names = self.input_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.input_data.attributes.get(GENE_ID_ATTRIBUTE, None)
            self.gene_id_column = self.input_data.attributes.get(GENE_ID_COLUMN, None)
            self.update_info_box()

            if not (
                self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))
            ):

                if self.tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.tax_id is None:
                self.Error.missing_tax_id()
                return

            if self.__check_organism_mismatch():
                self.Error.organism_mismatch()
                return

            self.gs_widget.load_gene_sets(self.tax_id)

            # if input data change, we need to refresh custom sets
            if self.custom_data:
                self.gs_widget.clear_custom_sets()
                self.handle_custom_gene_sets()

            self.invalidate()

    def update_info_box(self):
        info_string = ''
        if self.input_genes:
            info_string += '{} unique gene names on input.\n'.format(len(self.input_genes))
            info_string += '{} genes on output.\n'.format(self.num_of_sel_genes)
        else:
            if self.input_data:
                if not any([self.gene_id_column, self.gene_id_attribute]):
                    info_string += 'Input data with incorrect meta data.\nUse Gene Name Matcher widget.'
            else:
                info_string += 'No data on input.\n'

        if self.custom_data:
            info_string += '{} marker genes in {} sets\n'.format(self.custom_data.X.shape[0], self.num_of_custom_sets)

        self.input_info.setText(info_string)

    def create_partial(self):
        return partial(
            self.set_items,
            self.gs_widget.gs_object,
            self.stored_gene_sets_selection,
            set(self.input_genes),
            self.callback,
        )

    def callback(self):
        if self._task.cancelled:
            raise KeyboardInterrupt()
        if self.progress_bar:
            methodinvoke(self, "progress_advance")()

    def init_gene_sets(self):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self._task = Task()
        self.init_item_model()

        # save setting on selected hierarchies
        self.stored_gene_sets_selection = self.gs_widget.get_hierarchies(only_selected=True)

        f = self.create_partial()

        progress_iterations = sum(
            (
                len(g_set)
                for hier, g_set in self.gs_widget.gs_object.map_hierarchy_to_sets().items()
                if hier in self.stored_gene_sets_selection
            )
        )

        self.progress_bar = ProgressBar(self, iterations=progress_iterations)

        self._task.future = self._executor.submit(f)

        self._task.watcher = FutureWatcher(self._task.future)
        self._task.watcher.done.connect(self._init_gene_sets_finished)

    @Slot(concurrent.futures.Future)
    def _init_gene_sets_finished(self, f):
        assert self.thread() is QThread.currentThread()
        assert threading.current_thread() == threading.main_thread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progress_bar.finish()
        self.setStatusMessage('')

        try:
            results = f.result()  # type: list
            [self.data_model.appendRow(model_item) for model_item in results]
            self.filter_proxy_model.setSourceModel(self.data_model)
            self.data_view.selectionModel().selectionChanged.connect(self.commit)
            self.filter_data_view()
            self.set_selection()
            self.update_info_box()
        except Exception as ex:
            print(ex)

    def create_filters(self):
        search_term = self.search_pattern.lower().strip().split()

        filters = [
            FilterProxyModel.Filter(
                self.TERM, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in search_term)
            )
        ]

        if self.use_min_count:
            filters.append(FilterProxyModel.Filter(self.COUNT, Qt.DisplayRole, lambda value: value >= self.min_count))

        return filters

    def filter_data_view(self):
        filter_proxy = self.filter_proxy_model  # type: FilterProxyModel
        model = filter_proxy.sourceModel()  # type: QStandardItemModel

        if isinstance(model, QStandardItemModel):

            # apply filtering rules
            filter_proxy.set_filters(self.create_filters())

            if model.rowCount() and not filter_proxy.rowCount():
                self.Warning.all_sets_filtered()
            else:
                self.Warning.clear()

    def set_selection(self):
        if len(self.selected_rows):
            view = self.data_view
            model = self.data_model

            row_model_indexes = [model.indexFromItem(model.item(i)) for i in self.selected_rows]
            proxy_rows = [self.filter_proxy_model.mapFromSource(i).row() for i in row_model_indexes]

            if model.rowCount() <= self.selected_rows[-1]:
                return

            header_count = view.header().count() - 1
            selection = QItemSelection()

            for row_index in proxy_rows:
                selection.append(
                    QItemSelectionRange(
                        self.filter_proxy_model.index(row_index, 0),
                        self.filter_proxy_model.index(row_index, header_count),
                    )
                )

            view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect)

    def commit(self):
        selection_model = self.data_view.selectionModel()

        if selection_model:
            selection = selection_model.selectedRows(self.COUNT)
            self.selected_rows = [self.filter_proxy_model.mapToSource(sel).row() for sel in selection]

            if selection and self.input_genes:
                genes = [model_index.data(Qt.UserRole) for model_index in selection]
                output_genes = [gene_name for gene_name in list(set.union(*genes))]
                self.num_of_sel_genes = len(output_genes)
                self.update_info_box()

                if self.use_attr_names:
                    selected = [
                        column
                        for column in self.input_data.domain.attributes
                        if self.gene_id_attribute in column.attributes
                        and str(column.attributes[self.gene_id_attribute]) in output_genes
                    ]

                    domain = Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas)
                    new_data = self.input_data.from_table(domain, self.input_data)
                    self.Outputs.matched_genes.send(new_data)

                else:
                    # create filter from selected column for genes
                    only_known = table_filter.FilterStringList(self.gene_id_column, output_genes)
                    # apply filter to the data
                    data_table = table_filter.Values([only_known])(self.input_data)

                    self.Outputs.matched_genes.send(data_table)

    def assign_delegates(self):
        self.data_view.setItemDelegateForColumn(self.GENES, NumericalColumnDelegate(self))

        self.data_view.setItemDelegateForColumn(self.COUNT, NumericalColumnDelegate(self))

    def setup_filter_model(self):
        self.filter_proxy_model = FilterProxyModel()
        self.filter_proxy_model.setFilterKeyColumn(self.TERM)
        self.data_view.setModel(self.filter_proxy_model)

    def setup_filter_area(self):
        h_layout = QHBoxLayout()
        h_layout.setSpacing(100)
        h_widget = widgetBox(self.mainArea, orientation=h_layout)

        spin(
            h_widget,
            self,
            'min_count',
            0,
            1000,
            label='Count',
            tooltip='Minimum genes count',
            checked='use_min_count',
            callback=self.filter_data_view,
            callbackOnReturn=True,
            checkCallback=self.filter_data_view,
        )

        self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern')
        self.line_edit_filter.setPlaceholderText('Filter gene sets ...')
        self.line_edit_filter.textChanged.connect(self.filter_data_view)

    def on_gene_set_indicator_changed(self):
        # self._handle_future_model()
        self.gs_widget.clear_custom_sets()
        self.handle_custom_gene_sets()
        self.invalidate()

    def setup_control_area(self):
        # Control area
        self.input_info = widgetLabel(widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n')
        self.custom_gs_col_box = box = vBox(self.controlArea, 'Custom Gene Set Term Column')
        box.hide()

        gene_sets_box = widgetBox(self.controlArea, "Gene Sets")
        self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection')
        self.gs_widget.hierarchy_tree_widget.itemClicked.connect(self.update_tree_view)

        self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False)

    def setup_gui(self):
        # control area
        self.setup_control_area()

        # main area
        self.data_view = QTreeView()
        self.setup_filter_model()
        self.setup_filter_area()
        self.data_view.setAlternatingRowColors(True)
        self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder)
        self.data_view.setSortingEnabled(True)
        self.data_view.setSelectionMode(QTreeView.ExtendedSelection)
        self.data_view.setEditTriggers(QTreeView.NoEditTriggers)
        self.data_view.viewport().setMouseTracking(False)
        self.data_view.setItemDelegateForColumn(self.TERM, LinkStyledItemDelegate(self.data_view))

        self.mainArea.layout().addWidget(self.data_view)

        self.data_view.header().setSectionResizeMode(QHeaderView.ResizeToContents)
        self.assign_delegates()

    @staticmethod
    def set_items(gene_sets, sets_to_display, genes, callback):
        model_items = []
        if not genes:
            return

        for gene_set in sorted(gene_sets):
            if gene_set.hierarchy not in sets_to_display:
                continue

            callback()

            matched_set = gene_set.genes & genes
            if len(matched_set) > 0:
                category_column = QStandardItem()
                term_column = QStandardItem()
                count_column = QStandardItem()
                genes_column = QStandardItem()

                category_column.setData(", ".join(gene_set.hierarchy), Qt.DisplayRole)
                term_column.setData(gene_set.name, Qt.DisplayRole)
                term_column.setData(gene_set.name, Qt.ToolTipRole)
                term_column.setData(gene_set.link, LinkRole)
                term_column.setForeground(QColor(Qt.blue))

                count_column.setData(matched_set, Qt.UserRole)
                count_column.setData(len(matched_set), Qt.DisplayRole)

                genes_column.setData(len(gene_set.genes), Qt.DisplayRole)
                genes_column.setData(
                    set(gene_set.genes), Qt.UserRole
                )  # store genes to get then on output on selection

                model_items.append([count_column, genes_column, category_column, term_column])

        return model_items

    def init_item_model(self):
        if self.data_model:
            self.data_model.clear()
            self.setup_filter_model()
        else:
            self.data_model = QStandardItemModel()

        self.data_model.setSortRole(Qt.UserRole)
        self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS)

    def sizeHint(self):
        return QSize(1280, 960)
Пример #5
0
class OWSetEnrichment(widget.OWWidget):
    name = "Set Enrichment"
    description = ""
    icon = "../widgets/icons/GeneSetEnrichment.svg"
    priority = 5000

    inputs = [("Data", Orange.data.Table, "setData", widget.Default),
              ("Reference", Orange.data.Table, "setReference")]
    outputs = [("Data subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    taxid = settings.ContextSetting(None)
    speciesIndex = settings.ContextSetting(0)
    genesinrows = settings.ContextSetting(False)
    geneattr = settings.ContextSetting(0)
    categoriesCheckState = settings.ContextSetting({})

    useReferenceData = settings.Setting(False)
    useMinCountFilter = settings.Setting(True)
    useMaxPValFilter = settings.Setting(True)
    useMaxFDRFilter = settings.Setting(True)
    minClusterCount = settings.Setting(3)
    maxPValue = settings.Setting(0.01)
    maxFDR = settings.Setting(0.01)
    autocommit = settings.Setting(False)

    Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4

    class Error(widget.OWWidget.Error):
        no_gene_names = Msg("Input data contains no columns with gene names")
        no_data_onInput = Msg("No data on input")

    class Warning(widget.OWWidget.Warning):
        no_sets_found = Msg("No enriched sets found")


    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea, self,
            "speciesIndex", "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(
            box, self, "geneattr", "Entity feature", sendSelectedValue=0,
            callback=self.updateAnnotations)

        cb = gui.checkBox(
            box, self, "genesinrows", "Use feature names",
            callback=self.updateAnnotations,
            disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

#         gui.button(box, self, "Gene matcher settings",
#                    callback=self.updateGeneMatcherSettings,
#                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self, "useReferenceData",
            ["All entities", "Reference set (input)"],
            tooltips=["Use entire genome (for gene set enrichment) or all " +
                      "available entities for reference",
                      "Use entities from Reference Examples input signal " +
                      "as reference"],
            box="Reference", callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget, self, "minClusterCount",
                 0, 100, label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            pvalfilterbox, self, "useMaxPValFilter", "p-value",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            fdrfilterbox, self, "useMaxFDRFilter", "FDR",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QLineEdit(self, placeholderText="Search ...")

        self.filterCompleter = QCompleter(self.filterLineEdit)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QTreeView.NoEditTriggers,
        )

        self.source_model = QStandardItemModel()
        self.source_model.setSortRole(Qt.UserRole)
        self.source_model.setHorizontalHeaderLabels(
            ["Category", "Term", "Count", "Reference count", "p-value",
             "FDR", "Enrichment"])

        self.proxy_model = CustomFilterModel(self.annotationsChartView)
        self.proxy_model.setFilterKeyColumn(1)  # filter only by name (second column).

        self.annotationsChartView.setModel(self.proxy_model)

        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             (geneset.sfdomain, "index.pck")]
        )

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(
            parent=self, threadPool=QThreadPool(self))
        self._executor.submit(task)

    def string_search(self):
        self.annotationsChartView.model().setFilterFixedString(self.filterLineEdit.text())

    def sizeHint(self):
        return QSize(1024, 600)

    def __initialize_finish(self):
        # Finalize the the widget's initialization (preferably after
        # ensuring all required databases have been downloaded.

        sets = geneset.list_all()
        taxids = set(taxonomy.common_taxids() +
                     list(filter(None, [tid for _, tid, _ in sets])))
        organisms = [(tid, name_or_none(tid)) for tid in taxids]
        organisms = [(tid, name) for tid, name in organisms
                     if name is not None]

        organisms = [(None, "None")] + sorted(organisms)
        taxids = [tid for tid, _ in organisms]
        names = [name for _, name in organisms]
        self.taxid_list = taxids
        self.speciesComboBox.clear()
        self.speciesComboBox.addItems(names)
        self.genesets = sets

        if self.taxid in self.taxid_list:
            taxid = self.taxid
        else:
            taxid = self.taxid_list[0]

        self.taxid = None
        self.setCurrentOrganism(taxid)
        self.setBlocking(False)
        self.__state = OWSetEnrichment.Ready
        self.setStatusMessage("")

    def setCurrentOrganism(self, taxid):
        """Set the current organism `taxid`."""
        if taxid not in self.taxid_list:
            taxid = self.taxid_list[min(self.speciesIndex,
                                        len(self.taxid_list) - 1)]
        if self.taxid != taxid:
            self.taxid = taxid
            self.speciesIndex = self.taxid_list.index(taxid)
            self.refreshHierarchy()
            self._invalidateGeneMatcher()
            self._invalidate()

    def currentOrganism(self):
        """Return the current organism taxid"""
        return self.taxid

    def __on_speciesIndexChanged(self):
        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = "< Do not look >"
        self.setCurrentOrganism(taxid)
        
        if self.__invalidated and self.data is not None:
            self.updateAnnotations()

    def clear(self):
        """Clear/reset the widget state."""
        self._cancelPending()
        self.state = None

        self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment

        self._clearView()

        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

        self.geneAttrComboBox.clear()
        self.geneAttrs = []
        self._updatesummary()

    def _cancelPending(self):
        """Cancel pending tasks."""
        if self.state is not None:
            self.state.results.cancel()
            self.state.namematcher.cancel()
            self.state.cancelled = True

    def _clearView(self):
        """Clear the enrichment report view (main area)."""
        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

    def setData(self, data=None):
        """Set the input dataset with query gene names"""
        if self.__state & OWSetEnrichment.Initializing:
            self.__initialize_finish()

        self.Error.clear()
        self.closeContext()
        self.clear()

        self.groupsWidget.clear()
        self.data = data

        if data is not None:
            varlist = [var for var in data.domain.variables + data.domain.metas
                       if isinstance(var, Orange.data.StringVariable)]

            self.geneAttrs = varlist
            for var in varlist:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            oldtaxid = self.taxid
            self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1)

            taxid = data_hints.get_hint(data, "taxid", "")

            if taxid in self.taxid_list:
                self.speciesIndex = self.taxid_list.index(taxid)
                self.taxid = taxid

            self.genesinrows = data_hints.get_hint(
                data, "genesinrows", self.genesinrows)

            self.openContext(data)
            if oldtaxid != self.taxid:
                self.taxid = "< Do not look >"
                self.setCurrentOrganism(taxid)

            self.refreshHierarchy()
            self._invalidate()

    def setReference(self, data=None):
        """Set the (optional) input dataset with reference gene names."""
        self.referenceData = data
        self.referenceRadioBox.setEnabled(bool(data))
        if self.useReferenceData:
            self._invalidate()

    def handleNewSignals(self):
        if self.__invalidated:
            self.updateAnnotations()

    def _invalidateGeneMatcher(self):
        _, f = self.__genematcher
        f.cancel()
        self.__genematcher = (None, fulfill(gene.matcher([])))

    def _invalidate(self):
        self.__invalidated = True

    def genesFromTable(self, table):
        if self.genesinrows:
            genes = [attr.name for attr in table.domain.attributes]
        else:
            geneattr = self.geneAttrs[self.geneattr]
            genes = [str(ex[geneattr]) for ex in table]
        return genes

    def getHierarchy(self, taxid):
        def recursive_dict():
            return defaultdict(recursive_dict)
        collection = recursive_dict()

        def collect(col, hier):
            if hier:
                collect(col[hier[0]], hier[1:])

        for hierarchy, t_id, _ in self.genesets:
            collect(collection[t_id], hierarchy)

        return (taxid, collection[taxid]), (None, collection[None])

    def setHierarchy(self, hierarchy, hierarchy_noorg):
        self.groupsWidgetItems = {}

        def fill(col, parent, full=(), org=""):
            for key, value in sorted(col.items()):
                full_cat = full + (key,)
                item = QTreeWidgetItem(parent, [key])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable |
                              Qt.ItemIsSelectable | Qt.ItemIsEnabled)
                if value:
                    item.setFlags(item.flags() | Qt.ItemIsTristate)

                checked = self.categoriesCheckState.get(
                    (full_cat, org), Qt.Checked)
                item.setData(0, Qt.CheckStateRole, checked)
                item.setExpanded(True)
                item.category = full_cat
                item.organism = org
                self.groupsWidgetItems[full_cat] = item
                fill(value, item, full_cat, org=org)

        self.groupsWidget.clear()
        fill(hierarchy[1], self.groupsWidget, org=hierarchy[0])
        fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0])

    def refreshHierarchy(self):
        self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex]))

    def selectedCategories(self):
        """
        Return a list of currently selected hierarchy keys.

        A key is a tuple of identifiers from the root to the leaf of
        the hierarchy tree.
        """
        return [key for key, check in self.getHierarchyCheckState().items()
                if check == Qt.Checked]

    def getHierarchyCheckState(self):
        def collect(item, full=()):
            checked = item.checkState(0)
            name = str(item.data(0, Qt.DisplayRole))
            full_cat = full + (name,)
            result = [((full_cat, item.organism), checked)]
            for i in range(item.childCount()):
                result.extend(collect(item.child(i), full_cat))
            return result

        items = [self.groupsWidget.topLevelItem(i)
                 for i in range(self.groupsWidget.topLevelItemCount())]
        states = itertools.chain(*(collect(item) for item in items))
        return dict(states)

    def subsetSelectionChanged(self, item, column):
        # The selected geneset (hierarchy) subset has been changed by the
        # user. Update the displayed results.
        # Update the stored state (persistent settings)
        self.categoriesCheckState = self.getHierarchyCheckState()
        categories = self.selectedCategories()

        if self.data is not None:
            if self._nogenematching() or \
                    not set(categories) <= set(self.currentAnnotatedCategories):
                self.updateAnnotations()
            else:
                # compute FDR according the selected categories
                self.compute_fdr()
                self.filterAnnotationsChartView()

    def updateGeneMatcherSettings(self):
        raise NotImplementedError

        from .OWGOEnrichmentAnalysis import GeneMatcherDialog
        dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True)
        if dialog.exec_():
            self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items]
            self._invalidateGeneMatcher()
            if self.data is not None:
                self.updateAnnotations()

    def _genematcher(self):
        """
        Return a Future[gene.SequenceMatcher]
        """
        taxid = self.taxid_list[self.speciesIndex]

        current, matcher_f = self.__genematcher

        if taxid == current and \
                not matcher_f.cancelled():
            return matcher_f

        self._invalidateGeneMatcher()

        if taxid is None:
            self.__genematcher = (None, fulfill(gene.matcher([])))
            return self.__genematcher[1]

        matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy]
        matchers = [m for m, use in zip(matchers, self.geneMatcherSettings)
                    if use]

        def create():
            return gene.matcher([m(taxid) for m in matchers])

        matcher_f = self._executor.submit(create)
        self.__genematcher = (taxid, matcher_f)
        return self.__genematcher[1]

    def _nogenematching(self):
        return self.taxid is None or not any(self.geneMatcherSettings)

    def updateAnnotations(self):
        if self.data is None:
            return

        assert not self.__state & OWSetEnrichment.Initializing
        self._cancelPending()
        self._clearView()

        self.Warning.clear()
        self.Error.clear()

        if not self.genesinrows and len(self.geneAttrs) == 0:
            self.Error.no_gene_names()
            return

        self.__state = OWSetEnrichment.RunningEnrichment

        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = taxid

        categories = self.selectedCategories()

        clusterGenes = self.genesFromTable(self.data)

        if self.referenceData is not None and self.useReferenceData:
            referenceGenes = self.genesFromTable(self.referenceData)
        else:
            referenceGenes = None

        self.currentAnnotatedCategories = categories

        genematcher = self._genematcher()

        self.progressBarInit()

        ## Load collections in a worker thread
        # TODO: Use cached collections if already loaded and
        # use ensure_genesetsdownloaded with progress report (OWSelectGenes)
        collections = self._executor.submit(geneset.collections, *categories)

        def refset_null():
            """Return the default background reference set"""
            col = collections.result()
            return reduce(operator.ior, (set(g.genes) for g in col), set())

        def refset_ncbi():
            """Return all NCBI gene names"""
            geneinfo = gene.NCBIGeneInfo(taxid)
            return set(geneinfo.keys())

        def namematcher():
            matcher = genematcher.result()
            match = matcher.set_targets(ref_set.result())
            match.umatch = memoize(match.umatch)
            return match

        def map_unames():
            matcher = namematcher.result()
            query = list(filter(None, map(matcher.umatch, querynames)))
            reference = list(filter(None, map(matcher.umatch, ref_set.result())))
            return query, reference

        if self._nogenematching():
            if referenceGenes is None:
                ref_set = self._executor.submit(refset_null)
            else:
                ref_set = fulfill(referenceGenes)
        else:
            if referenceGenes == None:
                ref_set = self._executor.submit(refset_ncbi)
            else:
                ref_set = fulfill(referenceGenes)

        namematcher = self._executor.submit(namematcher)
        querynames = clusterGenes

        state = types.SimpleNamespace()
        state.query_set = clusterGenes
        state.reference_set = referenceGenes
        state.namematcher = namematcher
        state.query_count = len(set(clusterGenes))
        state.reference_count = (len(set(referenceGenes))
                                 if referenceGenes is not None else None)

        state.cancelled = False

        progress = methodinvoke(self, "_setProgress", (float,))
        info = methodinvoke(self, "_setRunInfo", (str,))

        @withtraceback
        def run():
            info("Loading data")
            match = namematcher.result()
            query, reference = map_unames()
            gscollections = collections.result()

            results = []
            info("Running enrichment")
            p = 0
            for i, gset in enumerate(gscollections):
                genes = set(filter(None, map(match.umatch, gset.genes)))
                enr = set_enrichment(genes, reference, query)
                results.append((gset, enr))

                if state.cancelled:
                    raise UserInteruptException

                pnew = int(100 * i / len(gscollections))
                if pnew != p:
                    progress(pnew)
                    p = pnew
            progress(100)
            info("")
            return query, reference, results

        task = Task(function=run)
        task.resultReady.connect(self.__on_enrichment_finished)
        task.exceptionReady.connect(self.__on_enrichment_failed)
        result = self._executor.submit(task)
        state.results = result

        self.state = state
        self._updatesummary()

    def __on_enrichment_failed(self, exception):
        if not isinstance(exception, UserInteruptException):
            print("ERROR:", exception, file=sys.stderr)
            print(exception._traceback, file=sys.stderr)

        self.progressBarFinished()
        self.setStatusMessage("")
        self.__state &= ~OWSetEnrichment.RunningEnrichment

    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results),
                       default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = self.source_model

        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped, user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped, user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit
        )

        if not model.rowCount():
            self.Warning.no_sets_found()
        else:
            # compute FDR according the selected categories
            self.compute_fdr()
            # set source model if there are sets found
            self.filterAnnotationsChartView()
            self.proxy_model.setSourceModel(model)
            self.Warning.clear()

        self._updatesummary()

        allnames = set(gsname(geneset)
                       for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames),
                           set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max((e.enrichment_score for _, e in results
                             if np.isfinite(e.enrichment_score)),
                            default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score))
            )

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView)
        )

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))
#             self.annotationsChartView.resizeColumnToContents(i)

        self.progressBarFinished()
        self.setStatusMessage("")

    def _updatesummary(self):
        state = self.state
        if state is None:
            self.Error.no_data_onInput()
            self.Warning.clear()
            self.infoBox.setText("No data on input.\n")
            return

        text = "{.query_count} unique names on input\n".format(state)

        if state.results.done() and not state.results.exception():
            mapped, _, _ = state.results.result()
            ratio_mapped = (len(mapped) / state.query_count
                            if state.query_count else 0)
            text += ("%i (%.1f%%) gene names matched" %
                     (len(mapped), 100.0 * ratio_mapped))
        elif not state.results.done():
            text += "..."
        else:
            text += "<Error {}>".format(str(state.results.exception()))
        self.infoBox.setText(text)

        # TODO: warn on no enriched sets found (i.e no query genes
        # mapped to any set)

    def return_filter_values(self):
        categories = set(", ".join(category) for category, _ in self.selectedCategories())
        return {
            "useMinCountFilter": self.useMinCountFilter,
            "useMaxPValFilter": self.useMaxPValFilter,
            "minClusterCount": self.minClusterCount,
            "useMaxFDRFilter": self.useMaxFDRFilter,
            "maxPValue": self.maxPValue,
            "categories": categories,
            "maxFDR": self.maxFDR
        }

    def compute_fdr(self):
        selected_categories = self.return_filter_values().get('categories')
        data_model = self.source_model

        # get pvalues of rows that match selected category
        pvals = [(row_index, data_model.index(row_index, 4).data(role=Qt.UserRole))
                 for row_index in range(data_model.rowCount())
                 if data_model.index(row_index, 0).data(role=Qt.DisplayRole) in selected_categories]

        fdrs = utils.stats.FDR([pval for _, pval in pvals])

        # set calculated values to the data model
        for (row_index, pval), fdr in zip(pvals, fdrs):
            fdr_item = data_model.item(row_index, 5)
            fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole)
            fdr_item.setData(fmtp(fdr), Qt.DisplayRole)
            fdr_item.setData(fdr, Qt.UserRole)

    def filterAnnotationsChartView(self):
        if self.__state & OWSetEnrichment.RunningEnrichment:
            return

        # set string pattern
        self.proxy_model._pattern = str(self.filterLineEdit.text())
        # set filter values to proxy model
        self.proxy_model.set_filter_values(self.return_filter_values())
        # filter model
        self.proxy_model.invalidateFilter()

        self._updatesummary()

    @Slot(float)
    def _setProgress(self, value):
        assert QThread.currentThread() is self.thread()
        self.progressBarSet(value, processEvents=None)

    @Slot(str)
    def _setRunInfo(self, text):
        self.setStatusMessage(text)

    def commit(self):
        if self.data is None or \
                self.__state & OWSetEnrichment.RunningEnrichment:
            return

        model = self.source_model
        rows = self.annotationsChartView.selectionModel().selectedRows(0)
        selected = [model.item(index.row(), 0) for index in rows]
        mapped = reduce(operator.ior,
                        (set(item.enrichment.query_mapped)
                         for item in selected),
                        set())
        assert self.state.namematcher.done()
        matcher = self.state.namematcher.result()

        axis = 1 if self.genesinrows else 0
        if axis == 1:
            mapped = [attr for attr in self.data.domain.attributes
                      if matcher.umatch(attr.name) in mapped]

            newdomain = Orange.data.Domain(
                mapped, self.data.domain.class_vars, self.data.domain.metas)
            data = self.data.from_table(newdomain, self.data)
        else:
            geneattr = self.geneAttrs[self.geneattr]
            selected = [i for i, ex in enumerate(self.data)
                        if matcher.umatch(str(ex[geneattr])) in mapped]
            data = self.data[selected]
        self.send("Data subset", data)

    def onDeleteWidget(self):
        if self.state is not None:
            self._cancelPending()
            self.state = None
        self._executor.shutdown(wait=False)
class OWGeneSets(OWWidget):
    name = "Gene Set Enrichment"
    description = ""
    icon = "icons/OWGeneSets.svg"
    priority = 9
    want_main_area = True
    settingsHandler = OrganismContextHandler()

    # settings
    auto_commit = Setting(True)
    stored_selections = ContextSetting([])
    organism = ContextSetting(None)

    min_count = Setting(5)
    use_min_count = Setting(True)

    max_p_value = Setting(0.0001)
    use_p_value = Setting(False)

    max_fdr = Setting(0.01)
    use_max_fdr = Setting(True)

    use_reference_data = Setting(True)

    COUNT, REFERENCE, P_VAL, FDR, ENRICHMENT, GENES, CATEGORY, TERM = range(8)
    DATA_HEADER_LABELS = [
        "Count", 'Reference', 'p-Value', 'FDR', 'Enrichment', 'Genes In Set',
        'Category', 'Term'
    ]

    class Inputs:
        genes = Input("Genes", Table)
        custom_sets = Input('Custom Gene Sets', Table)
        reference = Input("Reference Genes", Table)

    class Outputs:
        matched_genes = Output("Matched Genes", Table)

    class Information(OWWidget.Information):
        pass

    class Warning(OWWidget.Warning):
        all_sets_filtered = Msg('All sets were filtered out.')

    class Error(OWWidget.Error):
        organism_mismatch = Msg(
            'Organism in input data and custom gene sets does not match')
        missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION)
        missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID)
        missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID)
        cant_reach_host = Msg("Host orange.biolab.si is unreachable.")
        cant_load_organisms = Msg(
            "No available organisms, please check your connection.")

    def __init__(self):
        super().__init__()

        # commit
        self.commit_button = None

        # gene sets object
        self.gene_sets_obj = geneset.GeneSets()

        # progress bar
        self.progress_bar = None
        self.progress_bar_iterations = None

        # data
        self.input_data = None
        self.input_genes = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        # custom gene sets
        self.custom_data = None
        self.feature_model = DomainModel(valid_types=(DiscreteVariable,
                                                      StringVariable))
        self.gene_set_label = None
        self.gs_label_combobox = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None

        # reference genes
        self.reference_radio_box = None
        self.reference_data = None
        self.reference_genes = None

        self.reference_tax_id = None
        self.reference_attr_names = None
        self.reference_gene_id_attribute = None
        self.reference_gene_id_column = None

        # info box
        self.input_info = None
        self.num_of_sel_genes = 0

        # filter
        self.line_edit_filter = None
        self.search_pattern = ''
        self.organism_select_combobox = None

        # data model view
        self.data_view = None
        self.data_model = None

        # gene matcher NCBI
        self.gene_matcher = None

        # filter proxy model
        self.filter_proxy_model = None

        # hierarchy widget
        self.hierarchy_widget = None
        self.hierarchy_state = None

        # spinbox
        self.spin_widget = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        self._task = None  # type: Optional[Task]
        self._executor = ThreadExecutor()

        # gui
        self.setup_gui()

    def __reset_widget_state(self):
        # reset hierarchy widget state
        self.hierarchy_widget.clear()
        # clear data view
        self.init_item_model()
        # reset filters
        self.setup_filter_model()

    def cancel(self):
        """
        Cancel the current task (if any).
        """
        if self._task is not None:
            self._task.cancel()
            assert self._task.future.done()
            # disconnect the `_task_finished` slot
            self._task.watcher.done.disconnect(self._init_gene_sets_finished)
            self._task = None

    @Slot()
    def progress_advance(self):
        # GUI should be updated in main thread. That's why we are calling advance method here
        if self.progress_bar:
            self.progress_bar.advance()

    def __get_input_genes(self):
        self.input_genes = []

        if self.use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.input_genes.append(
                    str(variable.attributes.get(self.gene_id_attribute, '?')))
        else:
            genes, _ = self.input_data.get_column_view(self.gene_id_column)
            self.input_genes = [str(g) for g in genes]

    def __construct_custom_gene_sets(self):
        custom_set_hier = ('Custom sets', )

        # delete any custom sets if they exists
        self.gene_sets_obj.delete_sets_by_hierarchy(custom_set_hier)

        if self.custom_data and self.custom_gene_id_column:

            gene_sets_names, _ = self.custom_data.get_column_view(
                self.gene_set_label)
            gene_names, _ = self.custom_data.get_column_view(
                self.custom_gene_id_column)

            temp_dict = defaultdict(list)
            for set_name, gene_name in zip(gene_sets_names, gene_names):
                temp_dict[set_name].append(gene_name)

            g_sets = []
            for key, value in temp_dict.items():
                g_sets.append(
                    geneset.GeneSet(gs_id=key,
                                    hierarchy=custom_set_hier,
                                    organism=self.custom_tax_id,
                                    name=key,
                                    genes=set(value)))

            self.gene_sets_obj.update(g_sets)

    def __update_hierarchy(self):
        self.set_hierarchy_model(
            self.hierarchy_widget,
            hierarchy_tree(self.gene_sets_obj.hierarchies()))
        self.set_selected_hierarchies()

    def update_tree_view(self):
        if self.use_reference_data and self.reference_data:
            self.init_gene_sets(reference_genes=self.reference_genes)
        else:
            self.init_gene_sets()

    def invalidate(self):
        # clear
        self.__reset_widget_state()
        self.update_info_box()

        if self.input_data is not None:
            # setup
            self.__construct_custom_gene_sets()
            self.__get_input_genes()
            self.__update_hierarchy()
            self.update_tree_view()

    def __check_organism_mismatch(self):
        """ Check if organisms from different inputs match.

        :return: True if there is a mismatch
        """
        if self.tax_id is not None and self.custom_tax_id is not None:
            return self.tax_id != self.custom_tax_id
        return False

    def __get_reference_genes(self):
        self.reference_genes = []

        if self.reference_attr_names:
            for variable in self.reference_data.domain.attributes:
                self.reference_genes.append(
                    str(
                        variable.attributes.get(
                            self.reference_gene_id_attribute, '?')))
        else:
            genes, _ = self.reference_data.get_column_view(
                self.reference_gene_id_column)
            self.reference_genes = [str(g) for g in genes]

    @Inputs.reference
    def handle_reference_genes(self, data):
        """
        Set the (optional) input dataset with reference gene names.
        """

        if data:
            self.reference_data = data
            self.reference_tax_id = str(
                self.reference_data.attributes.get(TAX_ID, None))
            self.reference_attr_names = self.reference_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.reference_gene_id_attribute = self.reference_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.reference_gene_id_column = self.reference_data.attributes.get(
                GENE_ID_COLUMN, None)

            if not (self.reference_attr_names is not None and
                    ((self.reference_gene_id_attribute is None) ^
                     (self.reference_gene_id_column is None))):

                if self.reference_tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.reference_tax_id is None:
                self.Error.missing_tax_id()
                return

        self.__get_reference_genes()
        self.reference_radio_box.setEnabled(bool(self.reference_data))
        self.invalidate()

    @Inputs.custom_sets
    def handle_custom_input(self, data):
        self.Error.clear()
        self.__reset_widget_state()
        self.custom_data = None
        self.custom_tax_id = None
        self.custom_use_attr_names = None
        self.custom_gene_id_attribute = None
        self.custom_gene_id_column = None
        self.gs_label_combobox.setDisabled(True)
        self.feature_model.set_domain(None)

        if data:
            self.custom_data = data
            self.custom_tax_id = str(
                self.custom_data.attributes.get(TAX_ID, None))
            self.custom_use_attr_names = self.custom_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.custom_gene_id_attribute = self.custom_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.custom_gene_id_column = self.custom_data.attributes.get(
                GENE_ID_COLUMN, None)

            if not (self.custom_use_attr_names is not None and
                    ((self.custom_gene_id_attribute is None) ^
                     (self.custom_gene_id_column is None))):

                if self.custom_tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.custom_tax_id is None:
                self.Error.missing_tax_id()
                return

            if self.__check_organism_mismatch():
                self.Error.organism_mismatch()
                return

            self.gs_label_combobox.setDisabled(False)
            self.feature_model.set_domain(self.custom_data.domain)

            if self.feature_model:
                self.gene_set_label = self.feature_model[0]

        self.invalidate()

    @Inputs.genes
    def handle_genes_input(self, data):
        self.closeContext()
        self.Error.clear()
        self.__reset_widget_state()
        # clear output
        self.Outputs.matched_genes.send(None)
        # clear input genes
        self.input_genes = []
        self.gs_label_combobox.setDisabled(True)
        self.update_info_box()

        if data:
            self.input_data = data
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, None))
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.input_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.gene_id_column = self.input_data.attributes.get(
                GENE_ID_COLUMN, None)

            if not (self.use_attr_names is not None and
                    ((self.gene_id_attribute is None) ^
                     (self.gene_id_column is None))):

                if self.tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.tax_id is None:
                self.Error.missing_tax_id()
                return

            if self.__check_organism_mismatch():
                self.Error.organism_mismatch()
                return

            self.openContext(self.tax_id)

            # if input data change, we need to set feature model again
            if self.custom_data:
                self.gs_label_combobox.setDisabled(False)
                self.feature_model.set_domain(self.custom_data.domain)

                if self.feature_model:
                    self.gene_set_label = self.feature_model[0]

            self.download_gene_sets()

    def update_info_box(self):
        info_string = ''
        if self.input_genes:
            info_string += '{} unique gene names on input.\n'.format(
                len(self.input_genes))
            info_string += '{} genes on output.\n'.format(
                self.num_of_sel_genes)
        else:
            info_string += 'No genes on input.\n'

        self.input_info.setText(info_string)

    def on_gene_sets_download(self, result):
        # make sure this happens in the main thread.
        # Qt insists that widgets be created within the GUI(main) thread.
        assert threading.current_thread() == threading.main_thread()
        self.setStatusMessage('')

        if result:
            for res in result:
                g_sets = geneset.load_gene_sets(res, self.tax_id)
                self.gene_sets_obj.update([g_set for g_set in g_sets])

        # add custom sets if there are any
        self.invalidate()
        self.update_info_box()

    def download_gene_sets(self):
        self.Error.clear()
        # reset hierarchy widget state
        self.hierarchy_widget.clear()
        # clear data view
        self.init_item_model()

        # get all gene sets for selected organism
        gene_sets = geneset.list_all(organism=self.tax_id)
        # status message
        self.setStatusMessage('downloading sets')

        worker = Worker(download_gene_sets, self.tax_id, gene_sets)
        worker.signals.result.connect(self.on_gene_sets_download)

        # move download process to worker thread
        self.threadpool.start(worker)

    def set_hierarchy_model(self, tree_widget, sets):
        def beautify_displayed_text(text):
            if '_' in text:
                return text.replace('_', ' ').title()
            else:
                return text

        # TODO: maybe optimize this code?
        for key, value in sets.items():
            item = QTreeWidgetItem(tree_widget, [beautify_displayed_text(key)])
            item.setFlags(item.flags()
                          & (Qt.ItemIsUserCheckable | ~Qt.ItemIsSelectable
                             | Qt.ItemIsEnabled))
            item.setExpanded(True)
            item.hierarchy = key

            if value:
                item.setFlags(item.flags() | Qt.ItemIsTristate)
                self.set_hierarchy_model(item, value)
            else:
                if item.parent():
                    item.hierarchy = (item.parent().hierarchy, key)

            if not item.childCount() and not item.parent():
                item.hierarchy = (key, )

    def init_gene_sets(self, reference_genes=None):
        if self._task is not None:
            self.cancel()
        assert self._task is None

        self._task = Task()
        progress_advance = methodinvoke(self, "progress_advance")

        def callback():
            if self._task.cancelled:
                raise KeyboardInterrupt()
            if self.progress_bar:
                progress_advance()

        if reference_genes is None:
            reference_genes = self.gene_sets_obj.genes()

        self.init_item_model()

        sets_to_display = self.get_hierarchies(only_selected=True)
        # save setting on selected hierarchies
        self.stored_selections = sets_to_display
        # save context
        self.closeContext()

        f = partial(self.set_items,
                    self.gene_sets_obj,
                    sets_to_display,
                    set(self.input_genes),
                    reference_genes,
                    self.min_count if self.use_min_count else 1,
                    callback=callback)

        progress_iterations = sum([
            len(g_set) for hier, g_set in
            self.gene_sets_obj.map_hierarchy_to_sets().items()
            if hier in sets_to_display
        ])

        self.progress_bar = ProgressBar(self, iterations=progress_iterations)

        self._task.future = self._executor.submit(f)

        self._task.watcher = FutureWatcher(self._task.future)
        self._task.watcher.done.connect(self._init_gene_sets_finished)

        self.openContext(self.tax_id)

    @Slot(concurrent.futures.Future)
    def _init_gene_sets_finished(self, f):
        assert self.thread() is QThread.currentThread()
        assert threading.current_thread() == threading.main_thread()
        assert self._task is not None
        assert self._task.future is f
        assert f.done()

        self._task = None
        self.progress_bar.finish()
        self.setStatusMessage('')

        try:
            results = f.result()  # type: list
            [self.data_model.appendRow(model_item) for model_item in results]
            self.filter_proxy_model.setSourceModel(self.data_model)
            self._update_fdr()
            self.filter_data_view()
        except Exception as ex:
            print(ex)

    def set_selected_hierarchies(self):
        iterator = QTreeWidgetItemIterator(self.hierarchy_widget,
                                           QTreeWidgetItemIterator.All)

        while iterator.value():
            # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that
            #       holds subcategories. We don't want to display all sets from category
            if type(iterator.value().hierarchy) is not str:
                if iterator.value().hierarchy in self.stored_selections:
                    iterator.value().setCheckState(0, Qt.Checked)
                else:
                    iterator.value().setCheckState(0, Qt.Unchecked)

            iterator += 1

        # if no items are checked, we check first one at random
        if len(self.get_hierarchies(only_selected=True)) == 0:
            iterator = QTreeWidgetItemIterator(
                self.hierarchy_widget, QTreeWidgetItemIterator.NotChecked)

            while iterator.value():
                if type(iterator.value().hierarchy) is not str:
                    iterator.value().setCheckState(0, Qt.Checked)
                    return

                iterator += 1

    def get_hierarchies(self, **kwargs):
        """ return selected hierarchy
        """
        only_selected = kwargs.get('only_selected', None)

        sets_to_display = list()

        if only_selected:
            iterator = QTreeWidgetItemIterator(self.hierarchy_widget,
                                               QTreeWidgetItemIterator.Checked)
        else:
            iterator = QTreeWidgetItemIterator(self.hierarchy_widget)

        while iterator.value():
            # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that
            #       holds subcategories. We don't want to display all sets from category
            if type(iterator.value().hierarchy) is not str:

                if not only_selected:
                    sets_to_display.append(iterator.value().hierarchy)
                else:
                    if not iterator.value().isDisabled():
                        sets_to_display.append(iterator.value().hierarchy)

            iterator += 1

        return sets_to_display

    def filter_data_view(self):
        filter_proxy = self.filter_proxy_model  # type: FilterProxyModel
        model = filter_proxy.sourceModel()  # type: QStandardItemModel

        assert isinstance(model, QStandardItemModel)

        search_term = self.search_pattern.lower().strip().split()

        # apply filtering rules
        filters = [
            FilterProxyModel.Filter(
                self.TERM, Qt.DisplayRole,
                lambda value: all(fs in value.lower() for fs in search_term))
        ]

        # if self.use_min_count:
        #    filters.append(
        #        FilterProxyModel.Filter(
        #            self.COUNT, Qt.DisplayRole,
        #            lambda value: value >= self.min_count,
        #        )
        #    )

        if self.use_p_value:
            filters.append(
                FilterProxyModel.Filter(
                    self.P_VAL, Qt.DisplayRole,
                    lambda value: value < self.max_p_value))

        if self.use_max_fdr:
            filters.append(
                FilterProxyModel.Filter(self.FDR, Qt.DisplayRole,
                                        lambda value: value < self.max_fdr))

        filter_proxy.set_filters(filters)

        if model.rowCount() and not filter_proxy.rowCount():
            self.Warning.all_sets_filtered()
        else:
            self.Warning.clear()

    def __get_source_data(self, proxy_row_index, column):
        proxy_index = self.filter_proxy_model.index(proxy_row_index, column)
        source_index = self.filter_proxy_model.mapToSource(proxy_index)
        return source_index.data(role=Qt.DisplayRole)

    def _update_fdr(self):
        # Update the FDR in place due to a changed selected categories set and
        # results for all of these categories are already available.
        proxy = self.filter_proxy_model
        model = self.filter_proxy_model.sourceModel()

        if model is not None:
            assert isinstance(model, QStandardItemModel)

            p_values = [(i, self.__get_source_data(i, self.P_VAL))
                        for i in range(proxy.rowCount())]
            fdr_values = FDR([p_val for _, p_val in p_values])

            for i, fdr_val in zip([i for i, _ in p_values], fdr_values):
                proxy_index = proxy.index(i, self.FDR)
                source_index = self.filter_proxy_model.mapToSource(proxy_index)
                source_item = model.item(source_index.row(), self.FDR)
                source_item.setData(fdr_val, role=Qt.DisplayRole)
                source_item.setData(fdr_val, role=Qt.ToolTipRole)

    def commit(self):
        selection_model = self.data_view.selectionModel()

        if selection_model:
            # genes_from_set = selection_model.selectedRows(GENES)
            matched_genes = selection_model.selectedRows(self.COUNT)

            if matched_genes and self.input_genes:
                genes = [
                    model_index.data(Qt.UserRole)
                    for model_index in matched_genes
                ]
                output_genes = [
                    gene_name for gene_name in list(set.union(*genes))
                ]
                self.num_of_sel_genes = len(output_genes)
                self.update_info_box()

                if self.use_attr_names:
                    selected = [
                        column for column in self.input_data.domain.attributes
                        if self.gene_id_attribute in column.attributes
                        and str(column.attributes[
                            self.gene_id_attribute]) in output_genes
                    ]

                    domain = Domain(selected,
                                    self.input_data.domain.class_vars,
                                    self.input_data.domain.metas)
                    new_data = self.input_data.from_table(
                        domain, self.input_data)
                    self.Outputs.matched_genes.send(new_data)

                else:
                    selected_rows = []
                    for row_index, row in enumerate(self.input_data):
                        gene_in_row = str(row[self.gene_id_column])
                        if gene_in_row in self.input_genes and gene_in_row in output_genes:
                            selected_rows.append(row_index)

                    if selected_rows:
                        selected = self.input_data[selected_rows]
                    else:
                        selected = None

                    self.Outputs.matched_genes.send(selected)

    def assign_delegates(self):
        self.data_view.setItemDelegateForColumn(self.GENES,
                                                NumericalColumnDelegate(self))

        self.data_view.setItemDelegateForColumn(self.COUNT,
                                                NumericalColumnDelegate(self))

        self.data_view.setItemDelegateForColumn(self.REFERENCE,
                                                NumericalColumnDelegate(self))

        self.data_view.setItemDelegateForColumn(
            self.P_VAL, NumericalColumnDelegate(self,
                                                precision=2,
                                                notation='e'))

        self.data_view.setItemDelegateForColumn(
            self.FDR, NumericalColumnDelegate(self, precision=2, notation='e'))

        self.data_view.setItemDelegateForColumn(
            self.ENRICHMENT, NumericalColumnDelegate(self, precision=1))

    def setup_filter_model(self):
        self.filter_proxy_model = FilterProxyModel()
        self.filter_proxy_model.setFilterKeyColumn(self.TERM)
        self.data_view.setModel(self.filter_proxy_model)

    def setup_filter_area(self):
        h_layout = QHBoxLayout()
        h_layout.setSpacing(100)
        h_widget = widgetBox(self.mainArea, orientation=h_layout)

        spin(h_widget,
             self,
             'min_count',
             0,
             100,
             label='Count',
             tooltip='Minimum genes count',
             checked='use_min_count',
             callback=self.invalidate,
             callbackOnReturn=True,
             checkCallback=self.invalidate)

        doubleSpin(h_widget,
                   self,
                   'max_p_value',
                   0.0,
                   1.0,
                   0.0001,
                   label='p-value',
                   tooltip='Maximum p-value of the enrichment score',
                   checked='use_p_value',
                   callback=self.filter_data_view,
                   callbackOnReturn=True,
                   checkCallback=self.filter_data_view)

        doubleSpin(h_widget,
                   self,
                   'max_fdr',
                   0.0,
                   1.0,
                   0.0001,
                   label='FDR',
                   tooltip='Maximum false discovery rate',
                   checked='use_max_fdr',
                   callback=self.filter_data_view,
                   callbackOnReturn=True,
                   checkCallback=self.filter_data_view)

        self.line_edit_filter = lineEdit(h_widget, self, 'search_pattern')
        self.line_edit_filter.setPlaceholderText('Filter gene sets ...')
        self.line_edit_filter.textChanged.connect(self.filter_data_view)

    def setup_control_area(self):
        info_box = vBox(self.controlArea, 'Info')
        self.input_info = widgetLabel(info_box)

        box = vBox(self.controlArea, "Custom Gene Sets")
        self.gs_label_combobox = comboBox(box,
                                          self,
                                          "gene_set_label",
                                          sendSelectedValue=True,
                                          model=self.feature_model,
                                          callback=self.invalidate)
        self.gs_label_combobox.setDisabled(True)

        self.reference_radio_box = radioButtonsInBox(
            self.controlArea,
            self,
            "use_reference_data",
            ["Entire genome", "Reference gene set (input)"],
            tooltips=[
                "Use entire genome (for gene set enrichment)",
                "Use reference set of genes"
            ],
            box="Reference",
            callback=self.invalidate)

        self.reference_radio_box.setEnabled(False)

        hierarchy_box = widgetBox(self.controlArea, "Gene Set Categories")
        self.hierarchy_widget = QTreeWidget(self)
        self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers)
        self.hierarchy_widget.setHeaderLabels([' '])
        self.hierarchy_widget.itemClicked.connect(self.update_tree_view)
        hierarchy_box.layout().addWidget(self.hierarchy_widget)

        self.commit_button = auto_commit(self.controlArea,
                                         self,
                                         "auto_commit",
                                         "&Commit",
                                         box=False)

    def setup_gui(self):
        # control area
        self.setup_control_area()

        # main area
        self.data_view = QTreeView()
        self.setup_filter_model()
        self.setup_filter_area()
        self.data_view.setAlternatingRowColors(True)
        self.data_view.sortByColumn(self.COUNT, Qt.DescendingOrder)
        self.data_view.setSortingEnabled(True)
        self.data_view.setSelectionMode(QTreeView.ExtendedSelection)
        self.data_view.setEditTriggers(QTreeView.NoEditTriggers)
        self.data_view.viewport().setMouseTracking(False)
        self.data_view.setItemDelegateForColumn(
            self.TERM, LinkStyledItemDelegate(self.data_view))
        self.data_view.selectionModel().selectionChanged.connect(self.commit)

        self.mainArea.layout().addWidget(self.data_view)

        self.data_view.header().setSectionResizeMode(
            QHeaderView.ResizeToContents)
        self.assign_delegates()

    @staticmethod
    def set_items(gene_sets, sets_to_display, genes, ref, count_treshold,
                  callback):
        model_items = []
        if not genes:
            return

        for gene_set in gene_sets:
            if gene_set.hierarchy not in sets_to_display:
                continue
            enrichemnt_result = gene_set.set_enrichment(
                ref, genes.intersection(ref))
            callback()

            if len(enrichemnt_result.query) >= count_treshold:
                category_column = QStandardItem()
                name_column = QStandardItem()
                count_column = QStandardItem()
                genes_column = QStandardItem()
                ref_column = QStandardItem()
                pval_column = QStandardItem()
                fdr_column = QStandardItem()
                enrichemnt_column = QStandardItem()

                category_column.setData(", ".join(gene_set.hierarchy),
                                        Qt.DisplayRole)
                name_column.setData(gene_set.name, Qt.DisplayRole)
                name_column.setData(gene_set.name, Qt.ToolTipRole)
                name_column.setData(gene_set.link, LinkRole)
                name_column.setForeground(QColor(Qt.blue))

                count_column.setData(len(enrichemnt_result.query),
                                     Qt.DisplayRole)
                count_column.setData(set(enrichemnt_result.query), Qt.UserRole)

                genes_column.setData(len(gene_set.genes), Qt.DisplayRole)
                genes_column.setData(
                    set(gene_set.genes), Qt.UserRole
                )  # store genes to get then on output on selection

                ref_column.setData(len(enrichemnt_result.reference),
                                   Qt.DisplayRole)

                pval_column.setData(enrichemnt_result.p_value, Qt.DisplayRole)
                pval_column.setData(enrichemnt_result.p_value, Qt.ToolTipRole)

                enrichemnt_column.setData(enrichemnt_result.enrichment_score,
                                          Qt.DisplayRole)
                enrichemnt_column.setData(enrichemnt_result.enrichment_score,
                                          Qt.ToolTipRole)

                model_items.append([
                    count_column, ref_column, pval_column, fdr_column,
                    enrichemnt_column, genes_column, category_column,
                    name_column
                ])
        return model_items

    def init_item_model(self):
        if self.data_model:
            self.data_model.clear()
            self.setup_filter_model()
        else:
            self.data_model = QStandardItemModel()

        self.data_model.setSortRole(Qt.UserRole)
        self.data_model.setHorizontalHeaderLabels(self.DATA_HEADER_LABELS)

    def sizeHint(self):
        return QSize(1280, 960)
Пример #7
0
class OWGeneSets(OWWidget):
    name = "Gene Sets"
    description = ""
    icon = "icons/OWGeneSets.svg"
    priority = 9
    want_main_area = True
    settingsHandler = OrganismContextHandler()

    # settings
    auto_commit = Setting(True)
    stored_selections = ContextSetting([])
    organism = ContextSetting(None)

    class Inputs:
        genes = Input("Genes", Table)

    class Outputs:
        matched_genes = Output("Matched Genes", Table)

    class Information(OWWidget.Information):
        pass

    class Error(OWWidget.Error):
        missing_annotation = Msg(ERROR_ON_MISSING_ANNOTATION)
        missing_gene_id = Msg(ERROR_ON_MISSING_GENE_ID)
        missing_tax_id = Msg(ERROR_ON_MISSING_TAX_ID)
        cant_reach_host = Msg("Host orange.biolab.si is unreachable.")
        cant_load_organisms = Msg(
            "No available organisms, please check your connection.")

    def __init__(self):
        super().__init__()

        # commit
        self.commit_button = None

        # progress bar
        self.progress_bar = None
        self.progress_bar_iterations = None

        # data
        self.input_data = None
        self.input_genes = None

        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        self.input_info = None
        self.num_of_sel_genes = 0

        # filter
        self.lineEdit_filter = None
        self.search_pattern = ''
        self.organism_select_combobox = None

        # data model view
        self.data_view = None
        self.data_model = None

        # gene matcher NCBI
        self.gene_matcher = None

        # filter proxy model
        self.filter_proxy_model = None

        # hierarchy widget
        self.hierarchy_widget = None
        self.hierarchy_state = None

        # threads
        self.threadpool = QThreadPool(self)
        self.workers = None

        # gui
        self.setup_gui()

    def _progress_advance(self):
        # GUI should be updated in main thread. That's why we are calling advance method here
        if self.progress_bar:
            self.progress_bar.advance()

    def __get_genes(self):
        self.input_genes = []

        if self.use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.input_genes.append(
                    str(variable.attributes.get(self.gene_id_attribute, '?')))
        else:
            genes, _ = self.input_data.get_column_view(self.gene_id_column)
            self.input_genes = [str(g) for g in genes]

    @Inputs.genes
    def handle_input(self, data):
        self.closeContext()
        self.Error.clear()
        if data:
            self.input_data = data
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, None))
            self.use_attr_names = self.input_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.input_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.gene_id_column = self.input_data.attributes.get(
                GENE_ID_COLUMN, None)

            if not (self.use_attr_names is not None and
                    ((self.gene_id_attribute is None) ^
                     (self.gene_id_column is None))):

                if self.tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.tax_id is None:
                self.Error.missing_tax_id()
                return

            self.openContext(self.tax_id)

        self.__get_genes()
        self.download_gene_sets()

    def update_info_box(self):
        info_string = ''
        if self.input_genes:
            info_string += '{} unique gene names on input.\n'.format(
                len(self.input_genes))
            info_string += '{} genes on output.\n'.format(
                self.num_of_sel_genes)
        else:
            info_string += 'No genes on input.\n'

        self.input_info.setText(info_string)

    def on_gene_sets_download(self, result):
        # make sure this happens in the main thread.
        # Qt insists that widgets be created within the GUI(main) thread.
        assert threading.current_thread() == threading.main_thread()
        self.progress_bar.finish()
        self.setStatusMessage('')

        tax_id, sets = result
        self.set_hierarchy_model(self.hierarchy_widget,
                                 *hierarchy_tree(tax_id, sets))
        self.set_selected_hierarchies()

        self.update_info_box()
        self.workers = defaultdict(list)
        self.progress_bar_iterations = dict()

        for selected_hierarchy in self.get_hierarchies():
            gene_sets = geneset.load_gene_sets(selected_hierarchy)
            worker = Worker(get_collections,
                            gene_sets,
                            set(self.input_genes),
                            progress_callback=True,
                            partial_result=True)
            worker.signals.error.connect(self.handle_error)
            worker.signals.finished.connect(self.handle_worker_finished)
            worker.signals.progress.connect(self._progress_advance)
            worker.signals.partial_result.connect(self.populate_data_model)
            worker.setAutoDelete(False)

            self.workers[selected_hierarchy] = worker
            self.progress_bar_iterations[selected_hierarchy] = len(gene_sets)

        self.display_gene_sets()

    def handle_worker_finished(self):
        # We check if all workers have completed. If not, continue
        # dirty hax, is this ok?
        if self.progress_bar and self.progress_bar.widget.progressBarValue == 100:
            self.progress_bar.finish()
            self.setStatusMessage('')
            self.hierarchy_widget.setDisabled(False)

            # adjust column width
            for i in range(len(DATA_HEADER_LABELS) - 1):

                self.data_view.resizeColumnToContents(i)

            self.filter_proxy_model.setSourceModel(self.data_model)

    def populate_data_model(self, partial_result):
        assert threading.current_thread() == threading.main_thread()

        if partial_result:
            self.data_model.appendRow(partial_result)

    def set_hierarchy_model(self, model, tax_id, sets):
        def beautify_displayed_text(text):
            if '_' in text:
                return text.replace('_', ' ').title()
            else:
                return text

        # TODO: maybe optimize this code?
        for key, value in sets.items():
            item = QTreeWidgetItem(model, [beautify_displayed_text(key)])
            item.setFlags(item.flags()
                          & (Qt.ItemIsUserCheckable | ~Qt.ItemIsSelectable
                             | Qt.ItemIsEnabled))
            item.setExpanded(True)
            item.tax_id = tax_id
            item.hierarchy = key

            if value:
                item.setFlags(item.flags() | Qt.ItemIsTristate)
                self.set_hierarchy_model(item, tax_id, value)
            else:
                if item.parent():
                    item.hierarchy = ((item.parent().hierarchy, key), tax_id)

            if not item.childCount() and not item.parent():
                item.hierarchy = ((key, ), tax_id)

    def download_gene_sets(self):
        self.Error.clear()
        # reset hierarchy widget state
        self.hierarchy_widget.clear()
        # clear data view
        self.init_item_model()

        # get all gene sets for selected organism
        gene_sets = geneset.list_all(organism=self.tax_id)
        # init progress bar
        self.progress_bar = ProgressBar(self, iterations=len(gene_sets) * 100)
        # status message
        self.setStatusMessage('downloading sets')

        worker = Worker(download_gene_sets, gene_sets, progress_callback=True)
        worker.signals.progress.connect(self._progress_advance)
        worker.signals.result.connect(self.on_gene_sets_download)
        worker.signals.error.connect(self.handle_error)

        # move download process to worker thread
        self.threadpool.start(worker)

    def display_gene_sets(self):
        self.init_item_model()
        self.hierarchy_widget.setDisabled(True)

        only_selected_hier = self.get_hierarchies(only_selected=True)

        # init progress bar
        iterations = sum([
            self.progress_bar_iterations[hier] for hier in only_selected_hier
        ])
        self.progress_bar = ProgressBar(self, iterations=iterations)
        self.setStatusMessage('displaying gene sets')

        if not only_selected_hier:
            self.progress_bar.finish()
            self.setStatusMessage('')
            self.hierarchy_widget.setDisabled(False)
            return

        # save setting on selected hierarchies
        self.stored_selections = only_selected_hier
        # save context
        self.closeContext()

        for selected_hierarchy in only_selected_hier:
            self.threadpool.start(self.workers[selected_hierarchy])

        self.openContext(self.tax_id)

    def handle_error(self, ex):
        self.progress_bar.finish()
        self.setStatusMessage('')

        if isinstance(ex, ConnectionError):
            self.Error.cant_reach_host()

        print(ex)

    def set_selected_hierarchies(self):
        iterator = QTreeWidgetItemIterator(self.hierarchy_widget,
                                           QTreeWidgetItemIterator.All)

        while iterator.value():
            # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that
            #       holds subcategories. We don't want to display all sets from category
            if type(iterator.value().hierarchy) is not str:
                if iterator.value().hierarchy in self.stored_selections:
                    iterator.value().setCheckState(0, Qt.Checked)
                else:
                    iterator.value().setCheckState(0, Qt.Unchecked)

            iterator += 1

        # if no items are checked, we check first one at random
        if len(self.get_hierarchies(only_selected=True)) == 0:
            iterator = QTreeWidgetItemIterator(
                self.hierarchy_widget, QTreeWidgetItemIterator.NotChecked)

            while iterator.value():
                if type(iterator.value().hierarchy) is not str:
                    iterator.value().setCheckState(0, Qt.Checked)
                    return

                iterator += 1

    def get_hierarchies(self, **kwargs):
        """ return selected hierarchy
        """
        only_selected = kwargs.get('only_selected', None)

        sets_to_display = list()

        if only_selected:
            iterator = QTreeWidgetItemIterator(self.hierarchy_widget,
                                               QTreeWidgetItemIterator.Checked)
        else:
            iterator = QTreeWidgetItemIterator(self.hierarchy_widget)

        while iterator.value():
            # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that
            #       holds subcategories. We don't want to display all sets from category
            if type(iterator.value().hierarchy) is not str:

                if not only_selected:
                    sets_to_display.append(iterator.value().hierarchy)
                else:
                    if not iterator.value().isDisabled():
                        sets_to_display.append(iterator.value().hierarchy)

            iterator += 1

        return sets_to_display

    def commit(self):
        selection_model = self.data_view.selectionModel()

        if selection_model:
            # genes_from_set = selection_model.selectedRows(GENES)
            matched_genes = selection_model.selectedRows(MATCHED)

            if matched_genes and self.input_genes:
                genes = [
                    model_index.data(Qt.UserRole)
                    for model_index in matched_genes
                ]
                output_genes = [
                    gene_name for gene_name in list(set.union(*genes))
                ]
                self.num_of_sel_genes = len(output_genes)
                self.update_info_box()

                if self.use_attr_names:
                    selected = [
                        column for column in self.input_data.domain.attributes
                        if self.gene_id_attribute in column.attributes
                        and str(column.attributes[
                            self.gene_id_attribute]) in output_genes
                    ]

                    domain = Domain(selected,
                                    self.input_data.domain.class_vars,
                                    self.input_data.domain.metas)
                    new_data = self.input_data.from_table(
                        domain, self.input_data)
                    self.Outputs.matched_genes.send(new_data)

                else:
                    selected_rows = []
                    for row_index, row in enumerate(self.input_data):
                        gene_in_row = str(row[self.gene_id_column])
                        if gene_in_row in self.input_genes and gene_in_row in output_genes:
                            selected_rows.append(row_index)

                    if selected_rows:
                        selected = self.input_data[selected_rows]
                    else:
                        selected = None

                    self.Outputs.matched_genes.send(selected)

    def setup_gui(self):
        # control area
        info_box = vBox(self.controlArea, 'Input info')
        self.input_info = widgetLabel(info_box)

        hierarchy_box = widgetBox(self.controlArea, "Entity Sets")
        self.hierarchy_widget = QTreeWidget(self)
        self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers)
        self.hierarchy_widget.setHeaderLabels(HIERARCHY_HEADER_LABELS)
        self.hierarchy_widget.itemClicked.connect(self.display_gene_sets)
        hierarchy_box.layout().addWidget(self.hierarchy_widget)

        self.commit_button = auto_commit(self.controlArea,
                                         self,
                                         "auto_commit",
                                         "&Commit",
                                         box=False)

        # rubber(self.controlArea)

        # main area
        self.filter_proxy_model = QSortFilterProxyModel(self.data_view)
        self.filter_proxy_model.setFilterKeyColumn(3)

        self.data_view = QTreeView()
        self.data_view.setModel(self.filter_proxy_model)
        self.data_view.setAlternatingRowColors(True)
        self.data_view.sortByColumn(2, Qt.DescendingOrder)
        self.data_view.setSortingEnabled(True)
        self.data_view.setSelectionMode(QTreeView.ExtendedSelection)
        self.data_view.setEditTriggers(QTreeView.NoEditTriggers)
        self.data_view.viewport().setMouseTracking(True)
        self.data_view.setItemDelegateForColumn(
            TERM, LinkStyledItemDelegate(self.data_view))

        self.data_view.selectionModel().selectionChanged.connect(self.commit)

        self.lineEdit_filter = lineEdit(self.mainArea, self, 'search_pattern',
                                        'Filter gene sets:')
        self.lineEdit_filter.setPlaceholderText('search pattern ...')
        self.lineEdit_filter.textChanged.connect(
            self.filter_proxy_model.setFilterRegExp)

        self.mainArea.layout().addWidget(self.data_view)

    def init_item_model(self):
        if self.data_model:
            self.data_model.clear()
            self.filter_proxy_model.setSourceModel(None)
        else:
            self.data_model = QStandardItemModel()

        self.data_model.setSortRole(Qt.UserRole)
        self.data_model.setHorizontalHeaderLabels(DATA_HEADER_LABELS)

    def sizeHint(self):
        return QSize(1280, 960)