class OWGOEnrichmentAnalysis(widget.OWWidget):
    name = "GO Browser"
    description = "Enrichment analysis for Gene Ontology terms."
    icon = "../widgets/icons/GOBrowser.svg"
    priority = 2020

    inputs = [("Cluster Data", Orange.data.Table,
               "setDataset", widget.Single + widget.Default),
              ("Reference Data", Orange.data.Table,
               "setReferenceDataset")]

    outputs = [("Data on Selected Genes", Orange.data.Table),
               ("Data on Unselected Genes", Orange.data.Table),
               ("Data on Unknown Genes", Orange.data.Table),
               ("Enrichment Report", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    annotationIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)
    geneMatcherSettings = settings.Setting([True, False, False, False])
    useReferenceDataset = settings.Setting(False)
    aspectIndex = settings.Setting(0)

    useEvidenceType = settings.Setting(
        {et: True for et in go.evidenceTypesOrdered})

    filterByNumOfInstances = settings.Setting(False)
    minNumOfInstances = settings.Setting(1)
    filterByPValue = settings.Setting(True)
    maxPValue = settings.Setting(0.2)
    filterByPValue_nofdr = settings.Setting(False)
    maxPValue_nofdr = settings.Setting(0.01)
    probFunc = settings.Setting(0)

    selectionDirectAnnotation = settings.Setting(0)
    selectionDisjoint = settings.Setting(0)
    selectionAddTermAsClass = settings.Setting(0)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.clusterDataset = None
        self.referenceDataset = None
        self.ontology = None
        self.annotations = None
        self.loadedAnnotationCode = "---"
        self.treeStructRootKey = None
        self.probFunctions = [stats.Binomial(), stats.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = OWGOEnrichmentAnalysis.Initializing

        self.annotationCodes = []

        #############
        ## GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        ## Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(box, self, "Ontology/Annotation Info",
                   callback=self.ShowInfo,
                   tooltip="Show information on loaded ontology and annotations")

        box = gui.widgetBox(self.inputTab, "Organism")
        self.annotationComboBox = gui.comboBox(
            box, self, "annotationIndex", items=self.annotationCodes,
            callback=self._updateEnrichment, tooltip="Select organism")

        genebox = gui.widgetBox(self.inputTab, "Gene Names")
        self.geneAttrIndexCombo = gui.comboBox(
            genebox, self, "geneAttrIndex", callback=self._updateEnrichment,
            tooltip="Use this attribute to extract gene names from input data")
        self.geneAttrIndexCombo.setDisabled(self.useAttrNames)

        cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names",
                          tooltip="Use column names for gene names",
                          callback=self._updateEnrichment)
        cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled)

        gui.button(genebox, self, "Gene matcher settings",
                   callback=self.UpdateGeneMatcher,
                   tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab, self, "useReferenceDataset",
            ["Entire genome", "Reference set (input)"],
            tooltips=["Use entire genome for reference",
                      "Use genes from Referece Examples input signal as reference"],
            box="Reference", callback=self._updateEnrichment)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab, self, "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect", callback=self._updateEnrichment)

        ## Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(box, self, "filterByNumOfInstances", "Genes",
                     callback=self.FilterAndDisplayGraph, 
                     tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox, self, 'minNumOfInstances', 1, 100,
                 step=1, label='#:', labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box, self, "filterByPValue_nofdr", "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        #use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box, self, "filterByPValue", "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=["Use binomial distribution test",
                                        "Use hypergeometric distribution test"],
                              callback=self._updateEnrichment)
        box = gui.widgetBox(self.filterTab, "Evidence codes in annotation",
                              addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(
                etype, toolTip=go.evidenceTypes[etype],
                checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        ## Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(
            self.selectTab, self, "selectionDirectAnnotation",
            ["Directly or Indirectly", "Directly"],
            box="Annotated genes",
            callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box, self, "selectionDisjoint",
            btnLabels=["All selected genes",
                       "Term-specific genes",
                       "Common term genes"],
            tooltips=["Outputs genes annotated to all selected GO terms",
                      "Outputs genes that appear in only one of selected GO terms", 
                      "Outputs genes common to all selected GO terms"],
            callback=[self.ExampleSelection,
                      self.UpdateAddClassButton])

        self.addClassCB = gui.checkBox(
            box, self, "selectionAddTermAsClass", "Add GO Term as class",
            callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value',
                           'FDR', 'Genes', 'Enrichment']

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        self.setBlocking(True)
        self._executor = ThreadExecutor()
        self._init = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             ("GO", "taxonomy.pickle")]
        )
        self._init.finished.connect(self.__initialize_finish)
        self._executor.submit(self._init)


    def sizeHint(self):
        return QSize(1000, 700)

    def __initialize_finish(self):
        self.setBlocking(False)

        try:
            self.annotationFiles = listAvailable()
        except ConnectTimeout:
            self.error(2, "Internet connection error, unable to load data. " + \
                          "Check connection and create a new GO Browser widget.")
            self.filterTab.setEnabled(False)
            self.inputTab.setEnabled(False)
            self.selectTab.setEnabled(False)
            self.listView.setEnabled(False)
            self.sigTerms.setEnabled(False)
        else:
            self.annotationCodes = sorted(self.annotationFiles.keys())
            self.annotationComboBox.clear()
            self.annotationComboBox.addItems(self.annotationCodes)
            self.annotationComboBox.setCurrentIndex(self.annotationIndex)
            self.__state = OWGOEnrichmentAnalysis.Ready

    def __on_evidenceChanged(self):
        for etype, cb in self.evidenceCheckBoxDict.items():
            self.useEvidenceType[etype] = cb.isChecked()
        self._updateEnrichment()

    def UpdateGeneMatcher(self):
        """Open the Gene matcher settings dialog."""
        dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, modal=True)
        if dialog.exec_() != QDialog.Rejected:
            self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items]
            if self.annotations:
                self.SetGeneMatcher()
                self._updateEnrichment()

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.warning(0)
        self.warning(1)
        self.geneAttrIndexCombo.clear()
        self.ClearGraph()

        self.send("Data on Selected Genes", None)
        self.send("Data on Unselected Genes", None)
        self.send("Data on Unknown Genes", None)
        self.send("Enrichment Report", None)

    def setDataset(self, data=None):
        if self.__state == OWGOEnrichmentAnalysis.Initializing:
            self.__initialize_finish()

        self.closeContext()
        self.clear()
        self.clusterDataset = data

        if data is not None:
            domain = data.domain
            allvars = domain.variables + domain.metas
            self.candidateGeneAttrs = [var for var in allvars if isstring(var)]

            self.geneAttrIndexCombo.clear()
            for var in self.candidateGeneAttrs:
                self.geneAttrIndexCombo.addItem(*gui.attributeItem(var))
            taxid = data_hints.get_hint(data, "taxid", "")
            code = None
            try:
                code = go.from_taxid(taxid)
            except KeyError:
                pass
            except Exception as ex:
                print(ex)

            if code is not None:
                filename = "gene_association.%s.tar.gz" % code
                if filename in self.annotationFiles.values():
                    self.annotationIndex = \
                            [i for i, name in enumerate(self.annotationCodes) \
                             if self.annotationFiles[name] == filename].pop()

            self.useAttrNames = data_hints.get_hint(data, "genesinrows",
                                                    self.useAttrNames)
            self.openContext(data)

            self.geneAttrIndex = min(self.geneAttrIndex,
                                     len(self.candidateGeneAttrs) - 1)
            if len(self.candidateGeneAttrs) == 0:
                self.useAttrNames = True
                self.geneAttrIndex = -1
            elif self.geneAttrIndex < len(self.candidateGeneAttrs):
                self.geneAttrIndex = len(self.candidateGeneAttrs) - 1

            self._updateEnrichment()

    def setReferenceDataset(self, data=None):
        self.referenceDataset = data
        self.referenceRadioBox.buttons[1].setDisabled(not bool(data))
        self.referenceRadioBox.buttons[1].setText("Reference set")
        if self.clusterDataset is not None and self.useReferenceDataset:
            self.useReferenceDataset = 0 if not data else 1
            graph = self.Enrichment()
            self.SetGraph(graph)
        elif self.clusterDataset:
            self.__updateReferenceSetButton()

    def handleNewSignals(self):
        super().handleNewSignals()

    def _updateEnrichment(self):
        if self.clusterDataset is not None and \
                self.__state == OWGOEnrichmentAnalysis.Ready:
            pb = gui.ProgressBar(self, 100)
            self.Load(pb=pb)
            graph = self.Enrichment(pb=pb)
            self.FilterUnknownGenes()
            self.SetGraph(graph)

    def __updateReferenceSetButton(self):
        allgenes, refgenes = None, None
        if self.referenceDataset:
            try:
                allgenes = self.genesFromTable(self.referenceDataset)
            except Exception:
                allgenes = []
            refgenes, unknown = self.FilterAnnotatedGenes(allgenes)
        self.referenceRadioBox.buttons[1].setDisabled(not bool(allgenes))
        self.referenceRadioBox.buttons[1].setText("Reference set " + ("(%i genes, %i matched)" % (len(allgenes), len(refgenes)) if allgenes and refgenes else ""))

    def genesFromTable(self, data):
        if self.useAttrNames:
            genes = [v.name for v in data.domain.variables]
        else:
            attr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs) - 1)]
            genes = [str(ex[attr]) for ex in data if not numpy.isnan(ex[attr])]
            if any("," in gene for gene in genes):
                self.information(0, "Separators detected in gene names. Assuming multiple genes per example.")
                genes = reduce(operator.iadd, (genes.split(",") for genes in genes), [])
        return genes

    def FilterAnnotatedGenes(self, genes):
        matchedgenes = self.annotations.get_gene_names_translator(genes).values()
        return matchedgenes, [gene for gene in genes if gene not in matchedgenes]

    def FilterUnknownGenes(self):
        if not self.useAttrNames and self.candidateGeneAttrs:
            geneAttr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs)-1)]
            indices = []
            for i, ex in enumerate(self.clusterDataset):
                if not any(self.annotations.genematcher.match(n.strip()) for n in str(ex[geneAttr]).split(",")):
                    indices.append(i)
            if indices:
                data = self.clusterDataset[indices]
            else:
                data = None
            self.send("Data on Unknown Genes", data)
        else:
            self.send("Data on Unknown Genes", None)

    def Load(self, pb=None):

        if self.__state == OWGOEnrichmentAnalysis.Ready:
            go_files, tax_files = serverfiles.listfiles("GO"), serverfiles.listfiles("Taxonomy")
            calls = []
            pb, finish = (gui.ProgressBar(self, 0), True) if pb is None else (pb, False)
            count = 0
            if not tax_files:
                calls.append(("Taxonomy", "ncbi_taxnomy.tar.gz"))
                count += 1
            org = self.annotationCodes[min(self.annotationIndex, len(self.annotationCodes)-1)]
            if org != self.loadedAnnotationCode:
                count += 1
                if self.annotationFiles[org] not in go_files:
                    calls.append(("GO", self.annotationFiles[org]))
                    count += 1

            if "gene_ontology_edit.obo.tar.gz" not in go_files:
                calls.append(("GO", "gene_ontology_edit.obo.tar.gz"))
                count += 1
            if not self.ontology:
                count += 1
            pb.iter += count * 100

            for args in calls:
                serverfiles.localpath_download(*args, **dict(callback=pb.advance))

            i = len(calls)
            if not self.ontology:
                self.ontology = go.Ontology(progress_callback=lambda value: pb.advance())
                i += 1

            if org != self.loadedAnnotationCode:
                self.annotations = None
                gc.collect()  # Force run garbage collection
                code = self.annotationFiles[org].split(".")[-3]
                self.annotations = go.Annotations(code, genematcher=gene.GMDirect(), progress_callback=lambda value: pb.advance())
                i += 1
                self.loadedAnnotationCode = org
                count = defaultdict(int)
                geneSets = defaultdict(set)

                for anno in self.annotations.annotations:
                    count[anno.evidence] += 1
                    geneSets[anno.evidence].add(anno.geneName)
                for etype in go.evidenceTypesOrdered:
                    ecb = self.evidenceCheckBoxDict[etype]
                    ecb.setEnabled(bool(count[etype]))
                    ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype])))
            if finish:
                pb.finish()

    def SetGeneMatcher(self):
        if self.annotations:
            taxid = self.annotations.taxid
            matchers = []
            for matcher, use in zip([gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy], self.geneMatcherSettings):
                if use:
                    try:
                        if taxid == "352472":
                            matchers.extend([matcher(taxid), gene.GMDicty(),
                                             [matcher(taxid), gene.GMDicty()]])
                            # The reason machers are duplicated is that we want `matcher` or `GMDicty` to
                            # match genes by them self if possible. Only use the joint matcher if they fail.   
                        else:
                            matchers.append(matcher(taxid))
                    except Exception as ex:
                        print(ex)
            self.annotations.genematcher = gene.matcher(matchers)
            self.annotations.genematcher.set_targets(self.annotations.gene_names)

    def Enrichment(self, pb=None):
        assert self.clusterDataset is not None

        pb = gui.ProgressBar(self, 100) if pb is None else pb
        if not self.annotations.ontology:
            self.annotations.ontology = self.ontology

        if isinstance(self.annotations.genematcher, gene.GMDirect):
            self.SetGeneMatcher()
        self.error(1)
        self.warning([0, 1])

        if self.useAttrNames:
            clusterGenes = [v.name for v in self.clusterDataset.domain.attributes]
            self.information(0)
        elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs):
            geneAttr = self.candidateGeneAttrs[self.geneAttrIndex]
            clusterGenes = [str(ex[geneAttr]) for ex in self.clusterDataset
                            if not numpy.isnan(ex[geneAttr])]
            if any("," in gene for gene in clusterGenes):
                self.information(0, "Separators detected in cluster gene names. Assuming multiple genes per example.")
                clusterGenes = reduce(operator.iadd, (genes.split(",") for genes in clusterGenes), [])
            else:
                self.information(0)
        else:
            self.error(1, "Failed to extract gene names from input dataset!")
            return {}

        genesSetCount = len(set(clusterGenes))

        self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator(clusterGenes).values()

        self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (genesSetCount, len(clusterGenes), 100.0*len(clusterGenes)/genesSetCount if genesSetCount else 0.0))

        referenceGenes = None
        if not self.useReferenceDataset or self.referenceDataset is None:
            self.information(2)
            self.information(1)
            referenceGenes = self.annotations.gene_names

        elif self.referenceDataset is not None:
            if self.useAttrNames:
                referenceGenes = [v.name for v in self.referenceDataset.domain.attributes]
                self.information(1)
            elif geneAttr in (self.referenceDataset.domain.variables +
                              self.referenceDataset.domain.metas):
                referenceGenes = [str(ex[geneAttr]) for ex in self.referenceDataset
                                  if not numpy.isnan(ex[geneAttr])]
                if any("," in gene for gene in clusterGenes):
                    self.information(1, "Separators detected in reference gene names. Assuming multiple genes per example.")
                    referenceGenes = reduce(operator.iadd, (genes.split(",") for genes in referenceGenes), [])
                else:
                    self.information(1)
            else:
                self.information(1)
                referenceGenes = None

            if referenceGenes is None:
                referenceGenes = list(self.annotations.gene_names)
                self.referenceRadioBox.buttons[1].setText("Reference set")
                self.referenceRadioBox.buttons[1].setDisabled(True)
                self.information(2, "Unable to extract gene names from reference dataset. Using entire genome for reference")
                self.useReferenceDataset = 0
            else:
                refc = len(referenceGenes)
                referenceGenes = self.annotations.get_gene_names_translator(referenceGenes).values()
                self.referenceRadioBox.buttons[1].setText("Reference set (%i genes, %i matched)" % (refc, len(referenceGenes)))
                self.referenceRadioBox.buttons[1].setDisabled(False)
                self.information(2)
        else:
            self.useReferenceDataset = 0

        if not referenceGenes:
            self.error(1, "No valid reference set")
            return {}

        self.referenceGenes = referenceGenes
        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                evidences.append(etype)
        aspect = ["P", "C", "F"][self.aspectIndex]

        if clusterGenes:
            self.terms = terms = self.annotations.get_enriched_terms(
                clusterGenes, referenceGenes, evidences, aspect=aspect,
                prob=self.probFunctions[self.probFunc], use_fdr=False,
                progress_callback=lambda value: pb.advance())
            ids = []
            pvals = []
            for i, d in self.terms.items():
                ids.append(i)
                pvals.append(d[1])
            for i, fdr in zip(ids, stats.FDR(pvals)):  # save FDR as the last part of the tuple
                terms[i] = tuple(list(terms[i]) + [ fdr ])

        else:
            self.terms = terms = {}
        if not self.terms:
            self.warning(0, "No enriched terms found.")
        else:
            self.warning(0)

        pb.finish()
        self.treeStructDict = {}
        ids = self.terms.keys()

        self.treeStructRootKey = None

        parents = {}
        for id in ids:
            parents[id] = set([term for _, term in self.ontology[id].related])

        children = {}
        for term in self.terms:
            children[term] = set([id for id in ids if term in parents[id]])

        for term in self.terms:
            self.treeStructDict[term] = TreeNode(self.terms[term], children[term])
            if not self.ontology[term].related and not getattr(self.ontology[term], "is_obsolete", False):
                self.treeStructRootKey = term
        return terms

    def FilterGraph(self, graph):
        if self.filterByPValue_nofdr:
            graph = go.filterByPValue(graph, self.maxPValue_nofdr)
        if self.filterByPValue: #FDR
            graph = dict(filter(lambda item: item[1][3] <= self.maxPValue, graph.items()))
        if self.filterByNumOfInstances:
            graph = dict(filter(lambda item: len(item[1][0]) >= self.minNumOfInstances, graph.items()))
        return graph

    def FilterAndDisplayGraph(self):
        if self.clusterDataset:
            self.graph = self.FilterGraph(self.originalGraph)
            if self.originalGraph and not self.graph:
                self.warning(1, "All found terms were filtered out.")
            else:
                self.warning(1)
            self.ClearGraph()
            self.DisplayGraph()

    def SetGraph(self, graph=None):
        self.originalGraph = graph
        if graph:
            self.FilterAndDisplayGraph()
        else:
            self.graph = {}
            self.ClearGraph()

    def ClearGraph(self):
        self.listView.clear()
        self.listViewItems=[]
        self.sigTerms.clear()

    def DisplayGraph(self):
        fromParentDict = {}
        self.termListViewItemDict = {}
        self.listViewItems = []
        enrichment = lambda t: len(t[0]) / t[2] * (len(self.referenceGenes) / len(self.clusterGenes))
        maxFoldEnrichment = max([enrichment(term) for term in self.graph.values()] or [1])

        def addNode(term, parent, parentDisplayNode):
            if (parent, term) in fromParentDict:
                return
            if term in self.graph:
                displayNode = GOTreeWidgetItem(self.ontology[term], self.graph[term], len(self.clusterGenes), len(self.referenceGenes), maxFoldEnrichment, parentDisplayNode)
                displayNode.goId = term
                self.listViewItems.append(displayNode)
                if term in self.termListViewItemDict:
                    self.termListViewItemDict[term].append(displayNode)
                else:
                    self.termListViewItemDict[term] = [displayNode]
                fromParentDict[(parent, term)] = True
                parent = term
            else:
                displayNode = parentDisplayNode

            for c in self.treeStructDict[term].children:
                addNode(c, parent, displayNode)

        if self.treeStructDict:
            addNode(self.treeStructRootKey, None, self.listView)

        terms = self.graph.items()
        terms = sorted(terms, key=lambda item: item[1][1])
        self.sigTableTermsSorted = [t[0] for t in terms]

        self.sigTerms.clear()
        for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms):
            item = GOTreeWidgetItem(self.ontology[t_id],
                                    (genes, p_value, refCount, fdr),
                                    len(self.clusterGenes),
                                    len(self.referenceGenes),
                                    maxFoldEnrichment,
                                    self.sigTerms)
            item.goId = t_id

        self.listView.expandAll()
        for i in range(5):
            self.listView.resizeColumnToContents(i)
            self.sigTerms.resizeColumnToContents(i)
        self.sigTerms.resizeColumnToContents(6)
        width = min(self.listView.columnWidth(0), 350)
        self.listView.setColumnWidth(0, width)
        self.sigTerms.setColumnWidth(0, width)

        # Create and send the enrichemnt report table.
        termsDomain = Orange.data.Domain(
            [], [],
            # All is meta!
            [Orange.data.StringVariable("GO Term Id"),
             Orange.data.StringVariable("GO Term Name"),
             Orange.data.ContinuousVariable("Cluster Frequency"),
             Orange.data.ContinuousVariable("Genes in Cluster", number_of_decimals=0),
             Orange.data.ContinuousVariable("Reference Frequency"),
             Orange.data.ContinuousVariable("Genes in Reference", number_of_decimals=0),
             Orange.data.ContinuousVariable("p-value"),
             Orange.data.ContinuousVariable("FDR"),
             Orange.data.ContinuousVariable("Enrichment"),
             Orange.data.StringVariable("Genes")])

        terms = [[t_id,
                  self.ontology[t_id].name,
                  len(genes) / len(self.clusterGenes),
                  len(genes),
                  r_count / len(self.referenceGenes),
                  r_count,
                  p_value,
                  fdr,
                  len(genes) / len(self.clusterGenes) * \
                  len(self.referenceGenes) / r_count,
                  ",".join(genes)
                  ]
                 for t_id, (genes, p_value, r_count, fdr) in terms]

        if terms:
            X = numpy.empty((len(terms), 0))
            M = numpy.array(terms, dtype=object)
            termsTable = Orange.data.Table.from_numpy(termsDomain, X, metas=M)
        else:
            termsTable = Orange.data.Table(termsDomain)
        self.send("Enrichment Report", termsTable)

    def ViewSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selected = self.listView.selectedItems()
        self.selectedTerms = list(set([lvi.term.id for lvi in selected]))
        self.ExampleSelection()
        self.selectionChanging = 0

    def TableSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selectedIds = set([self.sigTerms.itemFromIndex(index).goId for index in self.sigTerms.selectedIndexes()])

        for i in range(self.sigTerms.topLevelItemCount()):
            item = self.sigTerms.topLevelItem(i)
            selected = item.goId in selectedIds
            term = item.goId

            if selected:
                self.selectedTerms.append(term)

            for lvi in self.termListViewItemDict[term]:
                try:
                    lvi.setSelected(selected)
                    if selected:
                        lvi.setExpanded(True)
                except RuntimeError:  # Underlying C/C++ object deleted
                    pass

        self.ExampleSelection()
        self.selectionChanging = 0

    def UpdateAddClassButton(self):
        self.addClassCB.setEnabled(self.selectionDisjoint == 1)

    def ExampleSelection(self):
        self.commit()

    def commit(self):
        if self.clusterDataset is None:
            return

        terms = set(self.selectedTerms)
        genes = reduce(operator.ior,
                       (set(self.graph[term][0]) for term in terms), set())

        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
#             if getattr(self, "useEvidence" + etype):
                evidences.append(etype)
        allTerms = self.annotations.get_annotated_terms(
            genes, direct_annotation_only=self.selectionDirectAnnotation,
            evidence_codes=evidences)

        if self.selectionDisjoint > 0:
            count = defaultdict(int)
            for term in self.selectedTerms:
                for g in allTerms.get(term, []):
                    count[g] += 1
            ccount = 1 if self.selectionDisjoint == 1 else len(self.selectedTerms)
            selectedGenes = [gene for gene, c in count.items()
                             if c == ccount and gene in genes]
        else:
            selectedGenes = reduce(
                operator.ior,
                (set(allTerms.get(term, [])) for term in self.selectedTerms),
                set())

        if self.useAttrNames:
            vars = [self.clusterDataset.domain[gene]
                    for gene in set(selectedGenes)]
            domain = Orange.data.Domain(
                vars, self.clusterDataset.domain.class_vars,
                self.clusterDataset.domain.metas)
            newdata = self.clusterDataset.from_table(domain, self.clusterDataset)

            self.send("Data on Selected Genes", newdata)
            self.send("Data on Unselected Genes", None)
        elif self.candidateGeneAttrs:
            selectedExamples = []
            unselectedExamples = []

            geneAttr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs)-1)]

            if self.selectionDisjoint == 1:
                goVar = Orange.data.DiscreteVariable(
                    "GO Term", values=list(self.selectedTerms))
                newDomain = Orange.data.Domain(
                    self.clusterDataset.domain.variables, goVar,
                    self.clusterDataset.domain.metas)
                goColumn = []
            for i, ex in enumerate(self.clusterDataset):
                if not numpy.isnan(ex[geneAttr]) and any(gene in selectedGenes for gene in str(ex[geneAttr]).split(",")):
                    if self.selectionDisjoint == 1 and self.selectionAddTermAsClass:
                        terms = filter(lambda term: any(gene in self.graph[term][0] for gene in str(ex[geneAttr]).split(",")) , self.selectedTerms)
                        term = sorted(terms)[0]
                        goColumn.append(goVar.values.index(term))
                    selectedExamples.append(i)
                else:
                    unselectedExamples.append(i)

            if selectedExamples:
                selectedExamples = self.clusterDataset[selectedExamples]
                if self.selectionDisjoint == 1 and self.selectionAddTermAsClass:
                    selectedExamples = Orange.data.Table.from_table(newDomain, selectedExamples)
                    view, issparse = selectedExamples.get_column_view(goVar)
                    assert not issparse
                    view[:] = goColumn
            else:
                selectedExamples = None

            if unselectedExamples:
                unselectedExamples = self.clusterDataset[unselectedExamples]
            else:
                unselectedExamples = None

            self.send("Data on Selected Genes", selectedExamples)
            self.send("Data on Unselected Genes", unselectedExamples)

    def ShowInfo(self):
        dialog = QDialog(self)
        dialog.setModal(False)
        dialog.setLayout(QVBoxLayout())
        label = QLabel(dialog)
        label.setText("Ontology:\n" + self.ontology.header
                      if self.ontology else "Ontology not loaded!")
        dialog.layout().addWidget(label)

        label = QLabel(dialog)
        label.setText("Annotations:\n" + self.annotations.header.replace("!", "")
                      if self.annotations else "Annotations not loaded!")
        dialog.layout().addWidget(label)
        dialog.show()

    def onDeleteWidget(self):
        """Called before the widget is removed from the canvas.
        """
        self.annotations = None
        self.ontology = None
        gc.collect()  # Force collection
Пример #2
0
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = DESCRIPTION
    icon = "../widgets/icons/GEODataSets.svg"
    priority = PRIORITY

    inputs = []
    outputs = [("Expression Data", Orange.data.Table)]

    settingsList = [
        "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings",
        "currentGds", "autoCommit", "datasetNames"
    ]

    outputRows = Setting(True)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01'
    ))

    autoCommit = Setting(False)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box,
                              self,
                              "outputRows",
                              ["Genes in rows", "Samples in rows"],
                              "Rows",
                              callback=self.commitIf)

        gui.checkBox(box,
                     self,
                     "mergeSpots",
                     "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box,
            self,
            "datasetName",
            "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited)
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(box,
                                           self,
                                           "Commit",
                                           callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea,
                            self,
                            "autoCommit",
                            "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(textChanged=self.filter)
        self.completer = TokenListCompleter(self,
                                            caseSensitivity=Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection)
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"])
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged)
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = [
            "dataset_id", "title", "platform_organism", "description"
        ]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float, )))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(gds[key] for gds in self.gds
                                for key in self.searchKeys)
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.setTokenList(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole)))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows)
                self.treeWidget.scrollTo(current_index,
                                         QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable
                                | Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked))
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = unicode(self.filterLineEdit.text())
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    @Slot(int, int)
    def progressCompleted(self, value, total):
        if total > 0:
            self.progressBarSet(100. * value / total, processEvents=False)
        else:
            pass
            # TODO: report 'indeterminate progress'

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit(processEvents=None)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            progress = methodinvoke(self, "progressCompleted", (int, int))

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds_ensure_downloaded(gds_id, progress)
                gds = geo.GDS(gds_id)
                data = gds.getdata(report_genes=report_genes,
                                   transpose=transpose,
                                   sample_type=sample_type)
                data.name = title
                return data

            get_data = partial(get_data,
                               self.currentGds["dataset_id"],
                               report_genes=self.mergeSpots,
                               transpose=self.outputRows,
                               sample_type=sample_type,
                               title=self.datasetName
                               or self.currentGds["title"])
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)
        self.progressBarFinished(processEvents=False)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, ("Error while connecting to the NCBI ftp server! "
                           "'%s'" % error))
            sys.excepthook(type(error), error, getattr(error, "__traceback__"))
            return
        finally:
            self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:

            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out

            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Orange.data.Domain([
                attr for attr in data.domain.attributes
                if samples.issuperset(attr.attributes.items())
            ], data.domain.class_var, data.domain.metas)
            #             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes)
            data = Orange.data.Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""),
                            10.0)
        data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0)

        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0), " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def send_report(self):
        self.report_items("GEO Dataset",
                          [("ID", self.currentGds['dataset_id']),
                           ("Title", self.currentGds['title']),
                           ("Organism", self.currentGds['sample_organism'])])
        self.report_items("Data",
                          [("Samples", self.currentGds['sample_count']),
                           ("Features", self.currentGds['feature_count']),
                           ("Genes", self.currentGds['gene_count'])])
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.currentGds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for type in subsets:
            self.report_html += "<b>" + type + ":</b></br>"
            for desc, count in subsets[type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = unicode(self.nameEdit.text())
            self.commitIf()
Пример #3
0
class OWPIPAx(widget.OWWidget):
    name = "PIPAx"
    description = "Access data from PIPA RNA-Seq database."
    icon = "../widgets/icons/PIPA.svg"
    priority = 35

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("")
    password = settings.Setting("")

    log2 = settings.Setting(False)
    rtypei = settings.Setting(5)  # hardcoded rpkm mapability polya
    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    #: The stored current selection (in experiments view)
    #: SelectionByKey | None
    currentSelection = settings.Setting(None)
    #: Stored selections (presets)
    #: list of SelectionByKey
    storedSelections = settings.Setting([])
    #: Stored column sort keys (from Sort view)
    #: list of strings
    storedSortingOrder = settings.Setting(
        ["Strain", "Experiment", "Genotype", "Timepoint"])

    experimentsHeaderState = settings.Setting(
        {name: False for _, name in HEADER[:ID_INDEX + 1]}
    )

    def __init__(self, parent=None, signalManager=None, name="PIPAx"):
        super().__init__(parent)

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.result_types = []
        self.mappings = {}

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        gui.button(self.controlArea, self, "Reload",
                     callback=self.Reload)
        gui.button(self.controlArea, self, "Clear cache",
                     callback=self.clear_cache)

        b = gui.widgetBox(self.controlArea, "Experiment Sets")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections

        self.selectionSetsWidget.selectionModified.connect(store_selections)
        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        def store_sort_order():
            self.storedSortingOrder = self.columnsSortingWidget.sortingOrder
        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)
        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(
            box, self, "rtypei", items=[], callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        self.commit_button = gui.button(self.controlArea, self, "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Authentication")

        gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"searchString", "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget
        )

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel()
        )

        self.mainArea.layout().addWidget(self.experimentsWidget)

        # Restore the selection states from the stored settings
        self.selectionSetsWidget.selections = self.storedSelections
        self.columnsSortingWidget.sortingOrder = self.storedSortingOrder

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.UpdateExperiments)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        self.UpdateExperiments(reload=True)

    def Connect(self):
        self.error(1)
        self.warning(1)

        def en(x):
            return x if len(x) else None

        self.dbc = dicty.PIPAx(cache=self.buffer,
                               username=en(self.username),
                               password=self.password)

        # check password
        if en(self.username) != None:
            try:
                self.dbc.mappings(reload=True)
            except dicty.AuthenticationError:
                self.error(1, "Wrong username or password")
                self.dbc = None
            except Exception as ex:
                print("Error when contacting the PIPA database", ex)
                sys.excepthook(*sys.exc_info())
                try:  # maybe cached?
                    self.dbc.mappings()
                    self.warning(1, "Can not access database - using cached data.")
                except Exception as ex:
                    self.dbc = None
                    self.error(1, "Can not access database.")

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei][0]
        else:
            return "-1"

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [desc for _, desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateExperiments(self, reload=False):
        self.experimentsWidget.clear()
        self.items = []

        self.progressBarInit()

        if not self.dbc:
            self.Connect()

        mappings = {}
        result_types = []
        sucind = False  # success indicator for database index

        try:
            mappings = self.dbc.mappings(reload=reload)
            result_types = self.dbc.result_types(reload=reload)
            sucind = True
        except Exception as ex:
            try:
                mappings = self.dbc.mappings()
                result_types = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception as ex:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.mappings = mappings
        self.result_types = result_types

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(
                self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def UpdateResultsList(self, reload=False):

        results_list = {}
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception as ex:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception as ex:
                self.error(0, "Can not access database.")

        self.results_list = results_list
        mappings_key_dict = dict(((m["data_id"], m["id"]), key) \
                                 for key, m in self.mappings.items())

        def mapping_unique_id(annot):
            """Map annotations dict from results_list to unique
            `mappings` ids.
            """
            data_id, mappings_id = annot["data_id"], annot["mappings_id"]
            return mappings_key_dict[data_id, mappings_id]

        elements = []

        # softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(10))
            items_shown[c] = i

        items_to_show = dict((mapping_unique_id(annot), annot)
                             for annot in self.results_list.values())

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(10)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [it for i, it in enumerate(self.items) if i not in delete_ind]

        for r_annot in [items_to_show[i] for i in add_items]:
            d = defaultdict(lambda: "?", r_annot)
            row_items = [""] + [d.get(key, "?") for key, _ in HEADER[1:]]
            try:
                time_dict = literal_eval(row_items[DATE_INDEX])
                date_rna = date(time_dict["fullYearUTC"],
                                time_dict["monthUTC"] + 1,  # Why is month 0 based?
                                time_dict["dateUTC"])
                row_items[DATE_INDEX] = date_rna.strftime("%x")
            except Exception:
                row_items[DATE_INDEX] = ''

            row_items[ID_INDEX] = mapping_unique_id(r_annot)
            elements.append(row_items)

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)

            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        # which is the ok buffer version
        # FIXME: what attribute to use for version?
        self.wantbufver = \
            lambda x, ad=self.results_list: \
            defaultdict(lambda: "?", ad[x])["date"]

        self.wantbufver = lambda x: "0"

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:
            fn = self.dbc.download_key_function()
            result_id_key = dict(((m["data_id"], m["mappings_id"]), key) \
                                 for key, m in self.results_list.items())

            for item in self.items:
                c = str(item.text(10))
                mapping = self.mappings[c]
                data_id, mappings_id = mapping["data_id"], mapping["id"]
                r_id = result_id_key[data_id, mappings_id]
                # Get the buffered version
                buffered = self.dbc.inBuffer(fn(r_id))
                value = " " if buffered == self.wantbufver(r_id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(not all(s in item \
                                   for s in self.searchString.split())
                           )

    def Commit(self):
        if not self.dbc:
            self.Connect()

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(10))
            annots = self.mappings[unique_id]
            ids.append((annots["data_id"], annots["id"]))

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = dict((name, key) for key, name in HEADER)

        hview = self.experimentsWidget.header()
        shownHeaders = [label for i, label in \
                        list(enumerate(self.headerLabels))[1:] \
                        if not hview.isSectionHidden(i)
                        ]

        allowed_labels = [reverse_header_dict.get(label, label) \
                          for label in shownHeaders]

        if self.joinreplicates and "id" not in allowed_labels:
            # need 'id' labels in join_replicates for attribute names
            allowed_labels.append("id")

        if len(ids):
            table = self.dbc.get_data(ids=ids, result_type=self.rtype(),
                                      callback=pb.advance,
                                      exclude_constant_labels=self.excludeconstant,
                                      #                          bufver=self.wantbufver,
                                      transform=transfn,
                                      allowed_labels=allowed_labels)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                                              ignorenames=["replicate", "data_id", "mappings_id",
                                                           "data_name", "id", "unique_id"],
                                              namefn=None,
                                              avg=dicty.median
                                              )

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes,
                                key=sorting_key)

            domain = Orange.data.Domain(
                attributes, table.domain.class_var, table.domain.metas)
            table = table.from_table(domain, table)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(1, 2, 3, 10))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
Пример #4
0
class OWWordEnrichment(OWWidget):
    # Basic widget info
    name = "Word Enrichment"
    description = "Word enrichment analysis for selected documents."
    icon = "icons/SetEnrichment.svg"
    priority = 600

    # Input/output
    class Inputs:
        selected_data = Input("Selected Data", Table)
        data = Input("Data", Table)

    want_main_area = True

    class Error(OWWidget.Error):
        no_bow_features = Msg('No bag-of-words features!')
        no_words_overlap = Msg('No words overlap!')
        empty_selection = Msg('Selected data is empty!')
        all_selected = Msg('All examples can not be selected!')

    # Settings
    filter_by_p = Setting(False)
    filter_p_value = Setting(0.01)
    filter_by_fdr = Setting(True)
    filter_fdr_value = Setting(0.2)

    def __init__(self):
        super().__init__()

        # Init data
        self.data = None
        self.selected_data = None
        self.selected_data_transformed = None   # used for transforming the 'selected data' into the 'data' domain

        self.words = []
        self.p_values = []
        self.fdr_values = []

        # Info section
        fbox = gui.widgetBox(self.controlArea, "Info")
        self.info_all = gui.label(fbox, self, 'Cluster words:')
        self.info_sel = gui.label(fbox, self, 'Selected words:')
        self.info_fil = gui.label(fbox, self, 'After filtering:')

        # Filtering settings
        fbox = gui.widgetBox(self.controlArea, "Filter")
        hbox = gui.widgetBox(fbox, orientation=0)

        self.chb_p = gui.checkBox(hbox, self, "filter_by_p", "p-value",
                                  callback=self.filter_and_display,
                                  tooltip="Filter by word p-value")
        self.spin_p = gui.doubleSpin(hbox, self, 'filter_p_value',
                                     1e-4, 1, step=1e-4, labelWidth=15,
                                     callback=self.filter_and_display,
                                     callbackOnReturn=True,
                                     tooltip="Max p-value for word")
        self.spin_p.setEnabled(self.filter_by_p)

        hbox = gui.widgetBox(fbox, orientation=0)
        self.chb_fdr = gui.checkBox(hbox, self, "filter_by_fdr", "FDR",
                                    callback=self.filter_and_display,
                                    tooltip="Filter by word FDR")
        self.spin_fdr = gui.doubleSpin(hbox, self, 'filter_fdr_value',
                                       1e-4, 1, step=1e-4, labelWidth=15,
                                       callback=self.filter_and_display,
                                       callbackOnReturn=True,
                                       tooltip="Max p-value for word")
        self.spin_fdr.setEnabled(self.filter_by_fdr)
        gui.rubber(self.controlArea)

        # Word's list view
        self.cols = ['Word', 'p-value', 'FDR']
        self.sig_words = QTreeWidget()
        self.sig_words.setColumnCount(len(self.cols))
        self.sig_words.setHeaderLabels(self.cols)
        self.sig_words.setSortingEnabled(True)
        self.sig_words.setSelectionMode(QTreeView.ExtendedSelection)
        self.sig_words.sortByColumn(2, 0)   # 0 is ascending order
        for i in range(len(self.cols)):
            self.sig_words.resizeColumnToContents(i)
        self.mainArea.layout().addWidget(self.sig_words)

    def sizeHint(self):
        return QSize(450, 240)

    @Inputs.data
    def set_data(self, data=None):
        self.data = data

    @Inputs.selected_data
    def set_data_selected(self, data=None):
        self.selected_data = data

    def handleNewSignals(self):
        self.check_data()

    def get_bow_domain(self):
        domain = self.data.domain
        return Domain(
            attributes=[a for a in domain.attributes
                        if a.attributes.get('bow-feature', False)],
            class_vars=domain.class_vars,
            metas=domain.metas,
            source=domain)

    def check_data(self):
        self.Error.clear()
        if isinstance(self.data, Table) and \
                isinstance(self.selected_data, Table):
            if len(self.selected_data) == 0:
                self.Error.empty_selection()
                self.clear()
                return

            # keep only BoW features
            bow_domain = self.get_bow_domain()
            if len(bow_domain.attributes) == 0:
                self.Error.no_bow_features()
                self.clear()
                return
            self.data = Corpus.from_table(bow_domain, self.data)
            self.selected_data_transformed = Corpus.from_table(bow_domain, self.selected_data)

            if np_sp_sum(self.selected_data_transformed.X) == 0:
                self.Error.no_words_overlap()
                self.clear()
            elif len(self.data) == len(self.selected_data):
                self.Error.all_selected()
                self.clear()
            else:
                self.apply()
        else:
            self.clear()

    def clear(self):
        self.sig_words.clear()
        self.info_all.setText('Cluster words:')
        self.info_sel.setText('Selected words:')
        self.info_fil.setText('After filtering:')

    def filter_enabled(self, b):
        self.chb_p.setEnabled(b)
        self.chb_fdr.setEnabled(b)
        self.spin_p.setEnabled(b)
        self.spin_fdr.setEnabled(b)

    def filter_and_display(self):
        self.spin_p.setEnabled(self.filter_by_p)
        self.spin_fdr.setEnabled(self.filter_by_fdr)
        self.sig_words.clear()

        if self.selected_data_transformed is None:  # do nothing when no Data
            return

        count = 0
        if self.words:
            for word, pval, fval in zip(self.words, self.p_values, self.fdr_values):
                if (not self.filter_by_p or pval <= self.filter_p_value) and \
                        (not self.filter_by_fdr or fval <= self.filter_fdr_value):
                    it = EATreeWidgetItem(word, pval, fval, self.sig_words)
                    self.sig_words.addTopLevelItem(it)
                    count += 1

        for i in range(len(self.cols)):
            self.sig_words.resizeColumnToContents(i)

        self.info_all.setText('Cluster words: {}'.format(len(self.selected_data_transformed.domain.attributes)))
        self.info_sel.setText('Selected words: {}'.format(np.count_nonzero(np_sp_sum(self.selected_data_transformed.X, axis=0))))
        if not self.filter_by_p and not self.filter_by_fdr:
            self.info_fil.setText('After filtering:')
            self.info_fil.setEnabled(False)
        else:
            self.info_fil.setEnabled(True)
            self.info_fil.setText('After filtering: {}'.format(count))

    def progress(self, p):
        self.progressBarSet(p)

    def apply(self):
        self.clear()
        self.progressBarInit()
        self.filter_enabled(False)

        self.words = [i.name for i in self.selected_data_transformed.domain.attributes]
        self.p_values = hypergeom_p_values(self.data.X,
                                           self.selected_data_transformed.X,
                                           callback=self.progress)
        self.fdr_values = false_discovery_rate(self.p_values)
        self.filter_and_display()
        self.filter_enabled(True)
        self.progressBarFinished()

    def tree_to_table(self):
        view = [self.cols]
        items = self.sig_words.topLevelItemCount()
        for i in range(items):
            line = []
            for j in range(3):
                line.append(self.sig_words.topLevelItem(i).text(j))
            view.append(line)
        return(view)

    def send_report(self):
        if self.words:
            self.report_table("Enriched words", self.tree_to_table())
Пример #5
0
class OWGOEnrichmentAnalysis(widget.OWWidget):
    name = "GO Browser"
    description = "Enrichment analysis for Gene Ontology terms."
    icon = "../widgets/icons/GOBrowser.svg"
    priority = 2020

    inputs = [("Cluster Data", Orange.data.Table, "setDataset",
               widget.Single + widget.Default),
              ("Reference Data", Orange.data.Table, "setReferenceDataset")]

    outputs = [("Data on Selected Genes", Orange.data.Table),
               ("Data on Unselected Genes", Orange.data.Table),
               ("Data on Unknown Genes", Orange.data.Table),
               ("Enrichment Report", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    annotationIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)
    geneMatcherSettings = settings.Setting([True, False, False, False])
    useReferenceDataset = settings.Setting(False)
    aspectIndex = settings.Setting(0)

    useEvidenceType = settings.Setting(
        {et: True
         for et in go.evidenceTypesOrdered})

    filterByNumOfInstances = settings.Setting(False)
    minNumOfInstances = settings.Setting(1)
    filterByPValue = settings.Setting(True)
    maxPValue = settings.Setting(0.2)
    filterByPValue_nofdr = settings.Setting(False)
    maxPValue_nofdr = settings.Setting(0.01)
    probFunc = settings.Setting(0)

    selectionDirectAnnotation = settings.Setting(0)
    selectionDisjoint = settings.Setting(0)
    selectionAddTermAsClass = settings.Setting(0)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.clusterDataset = None
        self.referenceDataset = None
        self.ontology = None
        self.annotations = None
        self.loadedAnnotationCode = "---"
        self.treeStructRootKey = None
        self.probFunctions = [stats.Binomial(), stats.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = OWGOEnrichmentAnalysis.Initializing

        self.annotationCodes = []

        #############
        ## GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        ## Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(
            box,
            self,
            "Ontology/Annotation Info",
            callback=self.ShowInfo,
            tooltip="Show information on loaded ontology and annotations")

        box = gui.widgetBox(self.inputTab, "Organism")
        self.annotationComboBox = gui.comboBox(box,
                                               self,
                                               "annotationIndex",
                                               items=self.annotationCodes,
                                               callback=self._updateEnrichment,
                                               tooltip="Select organism")

        genebox = gui.widgetBox(self.inputTab, "Gene Names")
        self.geneAttrIndexCombo = gui.comboBox(
            genebox,
            self,
            "geneAttrIndex",
            callback=self._updateEnrichment,
            tooltip="Use this attribute to extract gene names from input data")
        self.geneAttrIndexCombo.setDisabled(self.useAttrNames)

        cb = gui.checkBox(genebox,
                          self,
                          "useAttrNames",
                          "Use column names",
                          tooltip="Use column names for gene names",
                          callback=self._updateEnrichment)
        cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled)

        gui.button(genebox,
                   self,
                   "Gene matcher settings",
                   callback=self.UpdateGeneMatcher,
                   tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab,
            self,
            "useReferenceDataset", ["Entire genome", "Reference set (input)"],
            tooltips=[
                "Use entire genome for reference",
                "Use genes from Referece Examples input signal as reference"
            ],
            box="Reference",
            callback=self._updateEnrichment)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab,
            self,
            "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect",
            callback=self._updateEnrichment)

        ## Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(
            box,
            self,
            "filterByNumOfInstances",
            "Genes",
            callback=self.FilterAndDisplayGraph,
            tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox,
                 self,
                 'minNumOfInstances',
                 1,
                 100,
                 step=1,
                 label='#:',
                 labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box,
                     self,
                     "filterByPValue_nofdr",
                     "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box),
                       self,
                       'maxPValue_nofdr',
                       1e-8,
                       1,
                       step=1e-8,
                       label='p:',
                       labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        #use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box,
                     self,
                     "filterByPValue",
                     "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box),
                       self,
                       'maxPValue',
                       1e-8,
                       1,
                       step=1e-8,
                       label='p:',
                       labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box,
                              self,
                              "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=[
                                  "Use binomial distribution test",
                                  "Use hypergeometric distribution test"
                              ],
                              callback=self._updateEnrichment)
        box = gui.widgetBox(self.filterTab,
                            "Evidence codes in annotation",
                            addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(etype,
                            toolTip=go.evidenceTypes[etype],
                            checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        ## Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(self.selectTab,
                                    self,
                                    "selectionDirectAnnotation",
                                    ["Directly or Indirectly", "Directly"],
                                    box="Annotated genes",
                                    callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box,
            self,
            "selectionDisjoint",
            btnLabels=[
                "All selected genes", "Term-specific genes",
                "Common term genes"
            ],
            tooltips=[
                "Outputs genes annotated to all selected GO terms",
                "Outputs genes that appear in only one of selected GO terms",
                "Outputs genes common to all selected GO terms"
            ],
            callback=[self.ExampleSelection, self.UpdateAddClassButton])

        self.addClassCB = gui.checkBox(box,
                                       self,
                                       "selectionAddTermAsClass",
                                       "Add GO Term as class",
                                       callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = [
            'GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes',
            'Enrichment'
        ]

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        self.setBlocking(True)
        self._executor = ThreadExecutor()
        self._init = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN,
                                        taxonomy.Taxonomy.FILENAME),
                                       ("GO", "taxonomy.pickle")])
        self._init.finished.connect(self.__initialize_finish)
        self._executor.submit(self._init)

    def sizeHint(self):
        return QSize(1000, 700)

    def __initialize_finish(self):
        self.setBlocking(False)

        try:
            self.annotationFiles = listAvailable()
        except ConnectTimeout:
            self.error(2, "Internet connection error, unable to load data. " + \
                          "Check connection and create a new GO Browser widget.")
            self.filterTab.setEnabled(False)
            self.inputTab.setEnabled(False)
            self.selectTab.setEnabled(False)
            self.listView.setEnabled(False)
            self.sigTerms.setEnabled(False)
        else:
            self.annotationCodes = sorted(self.annotationFiles.keys())
            self.annotationComboBox.clear()
            self.annotationComboBox.addItems(self.annotationCodes)
            self.annotationComboBox.setCurrentIndex(self.annotationIndex)
            self.__state = OWGOEnrichmentAnalysis.Ready

    def __on_evidenceChanged(self):
        for etype, cb in self.evidenceCheckBoxDict.items():
            self.useEvidenceType[etype] = cb.isChecked()
        self._updateEnrichment()

    def UpdateGeneMatcher(self):
        """Open the Gene matcher settings dialog."""
        dialog = GeneMatcherDialog(self,
                                   defaults=self.geneMatcherSettings,
                                   modal=True)
        if dialog.exec_() != QDialog.Rejected:
            self.geneMatcherSettings = [
                getattr(dialog, item[0]) for item in dialog.items
            ]
            if self.annotations:
                self.SetGeneMatcher()
                self._updateEnrichment()

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.warning(0)
        self.warning(1)
        self.geneAttrIndexCombo.clear()
        self.ClearGraph()

        self.send("Data on Selected Genes", None)
        self.send("Data on Unselected Genes", None)
        self.send("Data on Unknown Genes", None)
        self.send("Enrichment Report", None)

    def setDataset(self, data=None):
        if self.__state == OWGOEnrichmentAnalysis.Initializing:
            self.__initialize_finish()

        self.closeContext()
        self.clear()
        self.clusterDataset = data

        if data is not None:
            domain = data.domain
            allvars = domain.variables + domain.metas
            self.candidateGeneAttrs = [var for var in allvars if isstring(var)]

            self.geneAttrIndexCombo.clear()
            for var in self.candidateGeneAttrs:
                self.geneAttrIndexCombo.addItem(*gui.attributeItem(var))
            taxid = data_hints.get_hint(data, "taxid", "")
            code = None
            try:
                code = go.from_taxid(taxid)
            except KeyError:
                pass
            except Exception as ex:
                print(ex)

            if code is not None:
                filename = "gene_association.%s.tar.gz" % code
                if filename in self.annotationFiles.values():
                    self.annotationIndex = \
                            [i for i, name in enumerate(self.annotationCodes) \
                             if self.annotationFiles[name] == filename].pop()

            self.useAttrNames = data_hints.get_hint(data, "genesinrows",
                                                    self.useAttrNames)
            self.openContext(data)

            self.geneAttrIndex = min(self.geneAttrIndex,
                                     len(self.candidateGeneAttrs) - 1)
            if len(self.candidateGeneAttrs) == 0:
                self.useAttrNames = True
                self.geneAttrIndex = -1
            elif self.geneAttrIndex < len(self.candidateGeneAttrs):
                self.geneAttrIndex = len(self.candidateGeneAttrs) - 1

            self._updateEnrichment()

    def setReferenceDataset(self, data=None):
        self.referenceDataset = data
        self.referenceRadioBox.buttons[1].setDisabled(not bool(data))
        self.referenceRadioBox.buttons[1].setText("Reference set")
        if self.clusterDataset is not None and self.useReferenceDataset:
            self.useReferenceDataset = 0 if not data else 1
            graph = self.Enrichment()
            self.SetGraph(graph)
        elif self.clusterDataset:
            self.__updateReferenceSetButton()

    def handleNewSignals(self):
        super().handleNewSignals()

    def _updateEnrichment(self):
        if self.clusterDataset is not None and \
                self.__state == OWGOEnrichmentAnalysis.Ready:
            pb = gui.ProgressBar(self, 100)
            self.Load(pb=pb)
            graph = self.Enrichment(pb=pb)
            self.FilterUnknownGenes()
            self.SetGraph(graph)

    def __updateReferenceSetButton(self):
        allgenes, refgenes = None, None
        if self.referenceDataset:
            try:
                allgenes = self.genesFromTable(self.referenceDataset)
            except Exception:
                allgenes = []
            refgenes, unknown = self.FilterAnnotatedGenes(allgenes)
        self.referenceRadioBox.buttons[1].setDisabled(not bool(allgenes))
        self.referenceRadioBox.buttons[1].setText("Reference set " + (
            "(%i genes, %i matched)" %
            (len(allgenes), len(refgenes)) if allgenes and refgenes else ""))

    def genesFromTable(self, data):
        if self.useAttrNames:
            genes = [v.name for v in data.domain.variables]
        else:
            attr = self.candidateGeneAttrs[min(
                self.geneAttrIndex,
                len(self.candidateGeneAttrs) - 1)]
            genes = [str(ex[attr]) for ex in data if not numpy.isnan(ex[attr])]
            if any("," in gene for gene in genes):
                self.information(
                    0,
                    "Separators detected in gene names. Assuming multiple genes per example."
                )
                genes = reduce(operator.iadd,
                               (genes.split(",") for genes in genes), [])
        return genes

    def FilterAnnotatedGenes(self, genes):
        matchedgenes = self.annotations.get_gene_names_translator(
            genes).values()
        return matchedgenes, [
            gene for gene in genes if gene not in matchedgenes
        ]

    def FilterUnknownGenes(self):
        if not self.useAttrNames and self.candidateGeneAttrs:
            geneAttr = self.candidateGeneAttrs[min(
                self.geneAttrIndex,
                len(self.candidateGeneAttrs) - 1)]
            indices = []
            for i, ex in enumerate(self.clusterDataset):
                if not any(
                        self.annotations.genematcher.match(n.strip())
                        for n in str(ex[geneAttr]).split(",")):
                    indices.append(i)
            if indices:
                data = self.clusterDataset[indices]
            else:
                data = None
            self.send("Data on Unknown Genes", data)
        else:
            self.send("Data on Unknown Genes", None)

    def Load(self, pb=None):

        if self.__state == OWGOEnrichmentAnalysis.Ready:
            go_files, tax_files = serverfiles.listfiles(
                "GO"), serverfiles.listfiles("Taxonomy")
            calls = []
            pb, finish = (gui.ProgressBar(self, 0),
                          True) if pb is None else (pb, False)
            count = 0
            if not tax_files:
                calls.append(("Taxonomy", "ncbi_taxnomy.tar.gz"))
                count += 1
            org = self.annotationCodes[min(self.annotationIndex,
                                           len(self.annotationCodes) - 1)]
            if org != self.loadedAnnotationCode:
                count += 1
                if self.annotationFiles[org] not in go_files:
                    calls.append(("GO", self.annotationFiles[org]))
                    count += 1

            if "gene_ontology_edit.obo.tar.gz" not in go_files:
                calls.append(("GO", "gene_ontology_edit.obo.tar.gz"))
                count += 1
            if not self.ontology:
                count += 1
            pb.iter += count * 100

            for args in calls:
                serverfiles.localpath_download(*args,
                                               **dict(callback=pb.advance))

            i = len(calls)
            if not self.ontology:
                self.ontology = go.Ontology(
                    progress_callback=lambda value: pb.advance())
                i += 1

            if org != self.loadedAnnotationCode:
                self.annotations = None
                gc.collect()  # Force run garbage collection
                code = self.annotationFiles[org].split(".")[-3]
                self.annotations = go.Annotations(
                    code,
                    genematcher=gene.GMDirect(),
                    progress_callback=lambda value: pb.advance())
                i += 1
                self.loadedAnnotationCode = org
                count = defaultdict(int)
                geneSets = defaultdict(set)

                for anno in self.annotations.annotations:
                    count[anno.evidence] += 1
                    geneSets[anno.evidence].add(anno.geneName)
                for etype in go.evidenceTypesOrdered:
                    ecb = self.evidenceCheckBoxDict[etype]
                    ecb.setEnabled(bool(count[etype]))
                    ecb.setText(etype + ": %i annots(%i genes)" %
                                (count[etype], len(geneSets[etype])))
            if finish:
                pb.finish()

    def SetGeneMatcher(self):
        if self.annotations:
            taxid = self.annotations.taxid
            matchers = []
            for matcher, use in zip(
                [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy],
                    self.geneMatcherSettings):
                if use:
                    try:
                        if taxid == "352472":
                            matchers.extend([
                                matcher(taxid),
                                gene.GMDicty(),
                                [matcher(taxid),
                                 gene.GMDicty()]
                            ])
                            # The reason machers are duplicated is that we want `matcher` or `GMDicty` to
                            # match genes by them self if possible. Only use the joint matcher if they fail.
                        else:
                            matchers.append(matcher(taxid))
                    except Exception as ex:
                        print(ex)
            self.annotations.genematcher = gene.matcher(matchers)
            self.annotations.genematcher.set_targets(
                self.annotations.gene_names)

    def Enrichment(self, pb=None):
        assert self.clusterDataset is not None

        pb = gui.ProgressBar(self, 100) if pb is None else pb
        if not self.annotations.ontology:
            self.annotations.ontology = self.ontology

        if isinstance(self.annotations.genematcher, gene.GMDirect):
            self.SetGeneMatcher()
        self.error(1)
        self.warning([0, 1])

        if self.useAttrNames:
            clusterGenes = [
                v.name for v in self.clusterDataset.domain.attributes
            ]
            self.information(0)
        elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs):
            geneAttr = self.candidateGeneAttrs[self.geneAttrIndex]
            clusterGenes = [
                str(ex[geneAttr]) for ex in self.clusterDataset
                if not numpy.isnan(ex[geneAttr])
            ]
            if any("," in gene for gene in clusterGenes):
                self.information(
                    0,
                    "Separators detected in cluster gene names. Assuming multiple genes per example."
                )
                clusterGenes = reduce(operator.iadd,
                                      (genes.split(",")
                                       for genes in clusterGenes), [])
            else:
                self.information(0)
        else:
            self.error(1, "Failed to extract gene names from input dataset!")
            return {}

        genesSetCount = len(set(clusterGenes))

        self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator(
            clusterGenes).values()

        self.infoLabel.setText(
            "%i unique genes on input\n%i (%.1f%%) genes with known annotations"
            % (genesSetCount, len(clusterGenes), 100.0 * len(clusterGenes) /
               genesSetCount if genesSetCount else 0.0))

        referenceGenes = None
        if not self.useReferenceDataset or self.referenceDataset is None:
            self.information(2)
            self.information(1)
            referenceGenes = self.annotations.gene_names

        elif self.referenceDataset is not None:
            if self.useAttrNames:
                referenceGenes = [
                    v.name for v in self.referenceDataset.domain.attributes
                ]
                self.information(1)
            elif geneAttr in (self.referenceDataset.domain.variables +
                              self.referenceDataset.domain.metas):
                referenceGenes = [
                    str(ex[geneAttr]) for ex in self.referenceDataset
                    if not numpy.isnan(ex[geneAttr])
                ]
                if any("," in gene for gene in clusterGenes):
                    self.information(
                        1,
                        "Separators detected in reference gene names. Assuming multiple genes per example."
                    )
                    referenceGenes = reduce(operator.iadd,
                                            (genes.split(",")
                                             for genes in referenceGenes), [])
                else:
                    self.information(1)
            else:
                self.information(1)
                referenceGenes = None

            if referenceGenes is None:
                referenceGenes = list(self.annotations.gene_names)
                self.referenceRadioBox.buttons[1].setText("Reference set")
                self.referenceRadioBox.buttons[1].setDisabled(True)
                self.information(
                    2,
                    "Unable to extract gene names from reference dataset. Using entire genome for reference"
                )
                self.useReferenceDataset = 0
            else:
                refc = len(referenceGenes)
                referenceGenes = self.annotations.get_gene_names_translator(
                    referenceGenes).values()
                self.referenceRadioBox.buttons[1].setText(
                    "Reference set (%i genes, %i matched)" %
                    (refc, len(referenceGenes)))
                self.referenceRadioBox.buttons[1].setDisabled(False)
                self.information(2)
        else:
            self.useReferenceDataset = 0

        if not referenceGenes:
            self.error(1, "No valid reference set")
            return {}

        self.referenceGenes = referenceGenes
        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                evidences.append(etype)
        aspect = ["P", "C", "F"][self.aspectIndex]

        if clusterGenes:
            self.terms = terms = self.annotations.get_enriched_terms(
                clusterGenes,
                referenceGenes,
                evidences,
                aspect=aspect,
                prob=self.probFunctions[self.probFunc],
                use_fdr=False,
                progress_callback=lambda value: pb.advance())
            ids = []
            pvals = []
            for i, d in self.terms.items():
                ids.append(i)
                pvals.append(d[1])
            for i, fdr in zip(ids, stats.FDR(
                    pvals)):  # save FDR as the last part of the tuple
                terms[i] = tuple(list(terms[i]) + [fdr])

        else:
            self.terms = terms = {}
        if not self.terms:
            self.warning(0, "No enriched terms found.")
        else:
            self.warning(0)

        pb.finish()
        self.treeStructDict = {}
        ids = self.terms.keys()

        self.treeStructRootKey = None

        parents = {}
        for id in ids:
            parents[id] = set([term for _, term in self.ontology[id].related])

        children = {}
        for term in self.terms:
            children[term] = set([id for id in ids if term in parents[id]])

        for term in self.terms:
            self.treeStructDict[term] = TreeNode(self.terms[term],
                                                 children[term])
            if not self.ontology[term].related and not getattr(
                    self.ontology[term], "is_obsolete", False):
                self.treeStructRootKey = term
        return terms

    def FilterGraph(self, graph):
        if self.filterByPValue_nofdr:
            graph = go.filterByPValue(graph, self.maxPValue_nofdr)
        if self.filterByPValue:  #FDR
            graph = dict(
                filter(lambda item: item[1][3] <= self.maxPValue,
                       graph.items()))
        if self.filterByNumOfInstances:
            graph = dict(
                filter(lambda item: len(item[1][0]) >= self.minNumOfInstances,
                       graph.items()))
        return graph

    def FilterAndDisplayGraph(self):
        if self.clusterDataset:
            self.graph = self.FilterGraph(self.originalGraph)
            if self.originalGraph and not self.graph:
                self.warning(1, "All found terms were filtered out.")
            else:
                self.warning(1)
            self.ClearGraph()
            self.DisplayGraph()

    def SetGraph(self, graph=None):
        self.originalGraph = graph
        if graph:
            self.FilterAndDisplayGraph()
        else:
            self.graph = {}
            self.ClearGraph()

    def ClearGraph(self):
        self.listView.clear()
        self.listViewItems = []
        self.sigTerms.clear()

    def DisplayGraph(self):
        fromParentDict = {}
        self.termListViewItemDict = {}
        self.listViewItems = []
        enrichment = lambda t: len(t[0]) / t[2] * (len(self.referenceGenes) /
                                                   len(self.clusterGenes))
        maxFoldEnrichment = max(
            [enrichment(term) for term in self.graph.values()] or [1])

        def addNode(term, parent, parentDisplayNode):
            if (parent, term) in fromParentDict:
                return
            if term in self.graph:
                displayNode = GOTreeWidgetItem(self.ontology[term],
                                               self.graph[term],
                                               len(self.clusterGenes),
                                               len(self.referenceGenes),
                                               maxFoldEnrichment,
                                               parentDisplayNode)
                displayNode.goId = term
                self.listViewItems.append(displayNode)
                if term in self.termListViewItemDict:
                    self.termListViewItemDict[term].append(displayNode)
                else:
                    self.termListViewItemDict[term] = [displayNode]
                fromParentDict[(parent, term)] = True
                parent = term
            else:
                displayNode = parentDisplayNode

            for c in self.treeStructDict[term].children:
                addNode(c, parent, displayNode)

        if self.treeStructDict:
            addNode(self.treeStructRootKey, None, self.listView)

        terms = self.graph.items()
        terms = sorted(terms, key=lambda item: item[1][1])
        self.sigTableTermsSorted = [t[0] for t in terms]

        self.sigTerms.clear()
        for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms):
            item = GOTreeWidgetItem(self.ontology[t_id],
                                    (genes, p_value, refCount, fdr),
                                    len(self.clusterGenes),
                                    len(self.referenceGenes),
                                    maxFoldEnrichment, self.sigTerms)
            item.goId = t_id

        self.listView.expandAll()
        for i in range(5):
            self.listView.resizeColumnToContents(i)
            self.sigTerms.resizeColumnToContents(i)
        self.sigTerms.resizeColumnToContents(6)
        width = min(self.listView.columnWidth(0), 350)
        self.listView.setColumnWidth(0, width)
        self.sigTerms.setColumnWidth(0, width)

        # Create and send the enrichemnt report table.
        termsDomain = Orange.data.Domain(
            [],
            [],
            # All is meta!
            [
                Orange.data.StringVariable("GO Term Id"),
                Orange.data.StringVariable("GO Term Name"),
                Orange.data.ContinuousVariable("Cluster Frequency"),
                Orange.data.ContinuousVariable("Genes in Cluster",
                                               number_of_decimals=0),
                Orange.data.ContinuousVariable("Reference Frequency"),
                Orange.data.ContinuousVariable("Genes in Reference",
                                               number_of_decimals=0),
                Orange.data.ContinuousVariable("p-value"),
                Orange.data.ContinuousVariable("FDR"),
                Orange.data.ContinuousVariable("Enrichment"),
                Orange.data.StringVariable("Genes")
            ])

        terms = [[t_id,
                  self.ontology[t_id].name,
                  len(genes) / len(self.clusterGenes),
                  len(genes),
                  r_count / len(self.referenceGenes),
                  r_count,
                  p_value,
                  fdr,
                  len(genes) / len(self.clusterGenes) * \
                  len(self.referenceGenes) / r_count,
                  ",".join(genes)
                  ]
                 for t_id, (genes, p_value, r_count, fdr) in terms]

        if terms:
            X = numpy.empty((len(terms), 0))
            M = numpy.array(terms, dtype=object)
            termsTable = Orange.data.Table.from_numpy(termsDomain, X, metas=M)
        else:
            termsTable = Orange.data.Table(termsDomain)
        self.send("Enrichment Report", termsTable)

    def ViewSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selected = self.listView.selectedItems()
        self.selectedTerms = list(set([lvi.term.id for lvi in selected]))
        self.ExampleSelection()
        self.selectionChanging = 0

    def TableSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selectedIds = set([
            self.sigTerms.itemFromIndex(index).goId
            for index in self.sigTerms.selectedIndexes()
        ])

        for i in range(self.sigTerms.topLevelItemCount()):
            item = self.sigTerms.topLevelItem(i)
            selected = item.goId in selectedIds
            term = item.goId

            if selected:
                self.selectedTerms.append(term)

            for lvi in self.termListViewItemDict[term]:
                try:
                    lvi.setSelected(selected)
                    if selected:
                        lvi.setExpanded(True)
                except RuntimeError:  # Underlying C/C++ object deleted
                    pass

        self.ExampleSelection()
        self.selectionChanging = 0

    def UpdateAddClassButton(self):
        self.addClassCB.setEnabled(self.selectionDisjoint == 1)

    def ExampleSelection(self):
        self.commit()

    def commit(self):
        if self.clusterDataset is None:
            return

        terms = set(self.selectedTerms)
        genes = reduce(operator.ior,
                       (set(self.graph[term][0]) for term in terms), set())

        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                #             if getattr(self, "useEvidence" + etype):
                evidences.append(etype)
        allTerms = self.annotations.get_annotated_terms(
            genes,
            direct_annotation_only=self.selectionDirectAnnotation,
            evidence_codes=evidences)

        if self.selectionDisjoint > 0:
            count = defaultdict(int)
            for term in self.selectedTerms:
                for g in allTerms.get(term, []):
                    count[g] += 1
            ccount = 1 if self.selectionDisjoint == 1 else len(
                self.selectedTerms)
            selectedGenes = [
                gene for gene, c in count.items()
                if c == ccount and gene in genes
            ]
        else:
            selectedGenes = reduce(operator.ior,
                                   (set(allTerms.get(term, []))
                                    for term in self.selectedTerms), set())

        if self.useAttrNames:
            vars = [
                self.clusterDataset.domain[gene] for gene in set(selectedGenes)
            ]
            domain = Orange.data.Domain(vars,
                                        self.clusterDataset.domain.class_vars,
                                        self.clusterDataset.domain.metas)
            newdata = self.clusterDataset.from_table(domain,
                                                     self.clusterDataset)

            self.send("Data on Selected Genes", newdata)
            self.send("Data on Unselected Genes", None)
        elif self.candidateGeneAttrs:
            selectedExamples = []
            unselectedExamples = []

            geneAttr = self.candidateGeneAttrs[min(
                self.geneAttrIndex,
                len(self.candidateGeneAttrs) - 1)]

            if self.selectionDisjoint == 1:
                goVar = Orange.data.DiscreteVariable("GO Term",
                                                     values=list(
                                                         self.selectedTerms))
                newDomain = Orange.data.Domain(
                    self.clusterDataset.domain.variables, goVar,
                    self.clusterDataset.domain.metas)
                goColumn = []
            for i, ex in enumerate(self.clusterDataset):
                if not numpy.isnan(ex[geneAttr]) and any(
                        gene in selectedGenes
                        for gene in str(ex[geneAttr]).split(",")):
                    if self.selectionDisjoint == 1 and self.selectionAddTermAsClass:
                        terms = filter(
                            lambda term: any(gene in self.graph[term][0]
                                             for gene in str(ex[geneAttr]).
                                             split(",")), self.selectedTerms)
                        term = sorted(terms)[0]
                        goColumn.append(goVar.values.index(term))
                    selectedExamples.append(i)
                else:
                    unselectedExamples.append(i)

            if selectedExamples:
                selectedExamples = self.clusterDataset[selectedExamples]
                if self.selectionDisjoint == 1 and self.selectionAddTermAsClass:
                    selectedExamples = Orange.data.Table.from_table(
                        newDomain, selectedExamples)
                    view, issparse = selectedExamples.get_column_view(goVar)
                    assert not issparse
                    view[:] = goColumn
            else:
                selectedExamples = None

            if unselectedExamples:
                unselectedExamples = self.clusterDataset[unselectedExamples]
            else:
                unselectedExamples = None

            self.send("Data on Selected Genes", selectedExamples)
            self.send("Data on Unselected Genes", unselectedExamples)

    def ShowInfo(self):
        dialog = QDialog(self)
        dialog.setModal(False)
        dialog.setLayout(QVBoxLayout())
        label = QLabel(dialog)
        label.setText(
            "Ontology:\n" +
            self.ontology.header if self.ontology else "Ontology not loaded!")
        dialog.layout().addWidget(label)

        label = QLabel(dialog)
        label.setText("Annotations:\n" +
                      self.annotations.header.replace("!", "") if self.
                      annotations else "Annotations not loaded!")
        dialog.layout().addWidget(label)
        dialog.show()

    def onDeleteWidget(self):
        """Called before the widget is removed from the canvas.
        """
        self.annotations = None
        self.ontology = None
        gc.collect()  # Force collection
Пример #6
0
class OWGenExpress(widget.OWWidget):
    name = "GenExpress"
    description = "Expression data from GenExpress."
    icon = "../widgets/icons/GenCloud.svg"
    priority = 36

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("anonymous")
    password = settings.Setting("")
    log2 = settings.Setting(False)
    transpose = settings.Setting(False)
    rtypei = settings.Setting(0)
    projecti = settings.Setting(0)
    serveri = settings.Setting(0)
    exnamei = settings.Setting(6)

    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    currentSelection = settings.Setting(None)

    experimentsHeaderState = settings.Setting(
        {name: False
         for _, name in HEADER[:ID_INDEX + 1]})

    storedSortOrder = settings.Setting([])
    storedSelections = settings.Setting([])

    def __init__(self, parent=None):
        super().__init__(parent)

        self.servers = [
            ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'),
            ('https://cloud.genialis.com/', 'Genialis'),
        ]

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.items = []

        self.result_types = []

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        box = gui.widgetBox(self.controlArea, 'Project')
        self.projectCB = gui.comboBox(box,
                                      self,
                                      "projecti",
                                      items=[],
                                      callback=self.ProjectChosen)

        self.projects = []

        b = gui.widgetBox(self.controlArea, "Selection bookmarks")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred,
                                               QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections

        self.selectionSetsWidget.selectionModified.connect(store_selections)

        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred,
                                                QSizePolicy.Maximum)

        box = gui.widgetBox(self.controlArea, 'Experiment name')
        self.experimentNameCB = gui.comboBox(box,
                                             self,
                                             "exnamei",
                                             items=SORTING_MODEL_LIST)

        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)
        self.columnsSortingWidget.sortingOrder = self.storedSortOrder

        def store_sort_order():
            self.storedSortOrder = self.columnsSortingWidget.sortingOrder

        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(box,
                                              self,
                                              "rtypei",
                                              items=[],
                                              callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        gui.checkBox(self.controlArea, self, "transpose", "Genes as columns")

        self.commit_button = gui.button(self.controlArea,
                                        self,
                                        "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Server')
        gui.comboBox(box,
                     self,
                     "serveri",
                     items=[title for url, title in self.servers],
                     callback=self.ServerChosen)

        gui.lineEdit(box,
                     self,
                     "username",
                     "Username:"******"password",
                                  "Password:"******"Clear cache",
                   callback=self.clear_cache)

        gui.lineEdit(self.mainArea,
                     self,
                     "searchString",
                     "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget)

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel())
        self.selectionSetsWidget.setSelections(self.storedSelections)

        self.mainArea.layout().addWidget(self.experimentsWidget)

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.ConnectAndUpdate)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        if self.dbc:

            def get_data_count(project_id):
                # XXX: is there a better way?
                # Note: limit 0 would return all objects
                return self.dbc.gen.api.data.get(
                    case_ids__contains=project_id,
                    type__startswith='data:expression:',
                    limit=1)['meta']['total_count']

            self.projects = sorted([
                p for p in self.dbc.projects().items()
                if get_data_count(p[0]) > 0
            ],
                                   key=lambda x: x[1])
            self.UpdateProjects()
            self.ProjectChosen()
            self.UpdateExperimentTypes()

    def Connect(self):
        self.error(1)
        self.warning(1)

        username = '******'
        password = '******'
        url = self.servers[self.serveri][0]

        if self.username:
            username = self.username
            password = self.password

        if username.lower() in ['*****@*****.**', 'anonymous']:
            username = '******'
            password = '******'

        self.dbc = None
        self.projects = []
        self.result_types = []

        try:
            self.dbc = Genesis(address=url,
                               username=username,
                               password=password,
                               cache=self.buffer)
        except requests.exceptions.ConnectionError:
            self.dbc = Genesis(address=url,
                               username=username,
                               password=password,
                               connect=False,
                               cache=self.buffer)
            self.warning(1, "Could not connect to server, working from cache.")
        except Exception:
            self.error(1, "Wrong username or password.")

        self.UpdateProjects()
        self.UpdateExperimentTypes()  # clear lists

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei]
        else:
            return None

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [self.result_types_labels[desc] for desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        #do not update anything if the list is empty
        if len(self.result_types):
            self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateProjects(self):
        self.projectCB.clear()
        items = [desc for pid, desc in self.projects]
        self.projectCB.addItems(items)
        #do not update anything if the list if empty
        if len(self.projects) > 0:
            self.projecti = max(0, min(self.projecti, len(self.projects) - 1))

    def UpdateExperiments(self, reload=False):

        self.experimentsWidget.clear()

        if not self.dbc or not self.dbc.projectid:  # the connection did not succeed
            return

        self.items = []

        self.progressBarInit()

        result_types = []
        result_types_labels = []

        sucind = False  # success indicator for database index

        try:
            result_types, result_types_labels = self.dbc.result_types(
                reload=reload)
            sucind = True
        except Exception:
            try:
                result_types, result_types_labels = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.result_types = result_types
        self.result_types_labels = result_types_labels

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(
                self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def ProjectChosen(self, reload=False):
        if self.projects:
            self.dbc.projectid = self.projects[self.projecti][0]
        else:
            self.dbc.projectid = None

        self.UpdateExperiments(reload=reload)

    def ServerChosen(self):
        self.ConnectAndUpdate()

    def UpdateResultsList(self, reload=False):
        results_list = self.dbc.results_list(self.rtype(), reload=reload)
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception:
                self.error(0, "Can not access database.")

        self.results_list = results_list

        #softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(ID_INDEX))
            items_shown[c] = i

        items_to_show = set(id_ for id_ in self.results_list)

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(ID_INDEX)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [
            it for i, it in enumerate(self.items) if i not in delete_ind
        ]

        for r_annot in add_items:
            d = defaultdict(lambda: "?", self.results_list[r_annot])
            row_items = [""] + [
                to_text(d.get(key, "?")) for key, _ in HEADER[1:]
            ]
            row_items[ID_INDEX] = r_annot

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)
            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        self.wantbufver = lambda x: self.results_list[x]["date_modified"]

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:

            for item in self.items:
                id = str(item.text(ID_INDEX))
                version = self.dbc._in_buffer(id + "|||" + self.rtype())
                value = " " if version == self.wantbufver(id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(not all(s in item
                                   for s in self.searchString.split()))

    def Commit(self):

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(ID_INDEX))
            ids.append(unique_id)

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = {name: name for key, name in HEADER}
        reverse_header_dict["ID"] = "id"

        allowed_labels = None

        def namefn(a):
            name = SORTING_MODEL_LIST[self.exnamei]
            name = reverse_header_dict.get(name, "id")
            return dict(a)[name]

        if len(ids):
            table = self.dbc.get_data(
                ids=ids,
                result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels,
                namefn=namefn)

            if self.joinreplicates:
                table = dicty.join_replicates(
                    table,
                    ignorenames=self.dbc.IGNORE_REPLICATE,
                    namefn="name",
                    avg=dicty.median,
                    fnshow=lambda x: " | ".join(map(str, x)))

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(
                        atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except ValueError:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([
                    optfloat(atts.get(reverse_header_dict[name], ""), name)
                    for name in sortOrder
                ])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(attributes, table.domain.class_vars,
                                        table.domain.metas)

            table = Orange.data.Table.from_table(domain, table)
            table = Orange.data.Table(domain, table)

            if self.transpose:
                experiments = [at for at in table.domain.variables]
                attr = [
                    compat.ContinuousVariable.make(ex['DDB'].value)
                    for ex in table
                ]
                metavars = sorted(table.domain.variables[0].attributes.keys())
                metavars = [
                    compat.StringVariable.make(name) for name in metavars
                ]
                domain = compat.create_domain(attr, None, metavars)
                metavars = compat.get_metas(domain)
                metas = [[exp.attributes[var.name] for var in metavars]
                         for exp in experiments]
                table = compat.create_table(domain, table.X.transpose(), None,
                                            metas)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(ID_INDEX,))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
class OWGOBrowser(widget.OWWidget):
    name = "GO Browser"
    description = "Enrichment analysis for Gene Ontology terms."
    icon = "../widgets/icons/OWGOBrowser.svg"
    priority = 7

    inputs = [("Cluster Data", Orange.data.Table,
               "setDataset", widget.Single + widget.Default),
              ("Reference Data", Orange.data.Table,
               "setReferenceDataset")]

    outputs = [("Data on Selected Genes", Orange.data.Table),
               ("Enrichment Report", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)
    useReferenceDataset = settings.Setting(False)
    aspectIndex = settings.Setting(0)

    useEvidenceType = settings.Setting(
        {et: True for et in go.evidenceTypesOrdered})

    filterByNumOfInstances = settings.Setting(False)
    minNumOfInstances = settings.Setting(1)
    filterByPValue = settings.Setting(True)
    maxPValue = settings.Setting(0.2)
    filterByPValue_nofdr = settings.Setting(False)
    maxPValue_nofdr = settings.Setting(0.01)
    probFunc = settings.Setting(0)

    selectionDirectAnnotation = settings.Setting(0)
    selectionDisjoint = settings.Setting(0)

    class Error(widget.OWWidget.Error):
        serverfiles_unavailable = widget.Msg('Can not locate annotation files, '
                                             'please check your connection and try again.')
        missing_annotation = widget.Msg(ERROR_ON_MISSING_ANNOTATION)
        missing_gene_id = widget.Msg(ERROR_ON_MISSING_GENE_ID)
        missing_tax_id = widget.Msg(ERROR_ON_MISSING_TAX_ID)

    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.input_data = None
        self.ref_data = None
        self.ontology = None
        self.annotations = None
        self.loaded_annotation_code = None
        self.treeStructRootKey = None
        self.probFunctions = [statistics.Binomial(), statistics.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = State.Ready
        self.__scheduletimer = QTimer(self, singleShot=True)
        self.__scheduletimer.timeout.connect(self.__update)

        #############
        # GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        # Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(box, self, "Ontology/Annotation Info",
                   callback=self.ShowInfo,
                   tooltip="Show information on loaded ontology and annotations")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab, self, "useReferenceDataset",
            ["Entire genome", "Reference set (input)"],
            tooltips=["Use entire genome for reference",
                      "Use genes from Referece Examples input signal as reference"],
            box="Reference", callback=self.__invalidate)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab, self, "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect", callback=self.__invalidate)

        # Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(box, self, "filterByNumOfInstances", "Genes",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox, self, 'minNumOfInstances', 1, 100,
                 step=1, label='#:', labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box, self, "filterByPValue_nofdr", "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        # use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box, self, "filterByPValue", "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=["Use binomial distribution test",
                                        "Use hypergeometric distribution test"],
                              callback=self.__invalidate)  # TODO: only update the p values
        box = gui.widgetBox(self.filterTab, "Evidence codes in annotation",
                              addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(
                etype, toolTip=go.evidenceTypes[etype],
                checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        # Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(
            self.selectTab, self, "selectionDirectAnnotation",
            ["Directly or Indirectly", "Directly"],
            box="Annotated genes",
            callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box, self, "selectionDisjoint",
            btnLabels=["All selected genes",
                       "Term-specific genes",
                       "Common term genes"],
            tooltips=["Outputs genes annotated to all selected GO terms",
                      "Outputs genes that appear in only one of selected GO terms",
                      "Outputs genes common to all selected GO terms"],
            callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value',
                           'FDR', 'Genes', 'Enrichment']

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}
        self.originalGraph = None

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        class AnnotationSlot(SimpleNamespace):
            taxid = ...  # type: str
            name = ...   # type: str
            filename = ...  # type:str

            @staticmethod
            def parse_tax_id(f_name):
                return f_name.split('.')[1]

        try:
            remote_files = serverfiles.ServerFiles().listfiles(DOMAIN)
        except (ConnectTimeout, RequestException, ConnectionError):
            # TODO: Warn user about failed connection to the remote server
            remote_files = []

        self.available_annotations = [
            AnnotationSlot(
                taxid=AnnotationSlot.parse_tax_id(annotation_file),
                name=taxonomy.common_taxid_to_name(AnnotationSlot.parse_tax_id(annotation_file)),
                filename=FILENAME_ANNOTATION.format(AnnotationSlot.parse_tax_id(annotation_file))
            )
            for _, annotation_file in set(remote_files + serverfiles.listfiles(DOMAIN))
            if annotation_file != FILENAME_ONTOLOGY

        ]
        self._executor = ThreadExecutor()

    def sizeHint(self):
        return QSize(1000, 700)

    def __on_evidenceChanged(self):
        for etype, cb in self.evidenceCheckBoxDict.items():
            self.useEvidenceType[etype] = cb.isChecked()
        self.__invalidate()

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.warning(0)
        self.warning(1)
        self.ClearGraph()

        self.send("Data on Selected Genes", None)
        self.send("Enrichment Report", None)

    def setDataset(self, data=None):
        self.closeContext()
        self.clear()
        self.Error.clear()
        if data:
            self.input_data = data
            self.tax_id = str(self.input_data.attributes.get(TAX_ID, None))
            self.use_attr_names = self.input_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.input_data.attributes.get(GENE_ID_ATTRIBUTE, None)
            self.gene_id_column = self.input_data.attributes.get(GENE_ID_COLUMN, None)
            self.annotation_index = None

            if not(self.use_attr_names is not None
                   and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))):

                if self.tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.tax_id is None:
                self.Error.missing_tax_id()
                return

            _c2i = {a.taxid: i for i, a in enumerate(self.available_annotations)}
            try:
                self.annotation_index = _c2i[self.tax_id]
            except KeyError:
                self.Error.serverfiles_unavailable()
                # raise ValueError('Taxonomy {} not supported.'.format(self.tax_id))
                return

            self.__invalidate()

    def setReferenceDataset(self, data=None):
        self.Error.clear()
        if data:
            self.ref_data = data
            self.ref_tax_id = str(self.ref_data.attributes.get(TAX_ID, None))
            self.ref_use_attr_names = self.ref_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
            self.ref_gene_id_attribute = self.ref_data.attributes.get(GENE_ID_ATTRIBUTE, None)
            self.ref_gene_id_column = self.ref_data.attributes.get(GENE_ID_COLUMN, None)

            if not (self.ref_use_attr_names is not None
                    and ((self.ref_gene_id_attribute is None) ^ (self.ref_gene_id_column is None))):

                if self.ref_tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.ref_tax_id is None:
                self.Error.missing_tax_id()
                return

        self.referenceRadioBox.buttons[1].setDisabled(not bool(data))
        self.referenceRadioBox.buttons[1].setText("Reference set")
        if self.input_data is not None and self.useReferenceDataset:
            self.useReferenceDataset = 0 if not data else 1
            self.__invalidate()

    @Slot()
    def __invalidate(self):
        # Invalidate the current results or pending task and schedule an
        # update.
        self.__scheduletimer.start()
        if self.__state != State.Ready:
            self.__state |= State.Stale

        self.SetGraph({})
        self.ref_genes = None
        self.input_genes = None

    def __invalidateAnnotations(self):
        self.annotations = None
        self.loaded_annotation_code = None
        if self.input_data:
            self.infoLabel.setText("...\n")
        self.__invalidate()

    @Slot()
    def __update(self):
        self.__scheduletimer.stop()
        if self.input_data is None:
            return

        if self.__state & State.Running:
            self.__state |= State.Stale
        elif self.__state & State.Downloading:
            self.__state |= State.Stale
        elif self.__state & State.Ready:
            if self.__ensure_data():
                self.Load()
                self.Enrichment()
            else:
                assert self.__state & State.Downloading
                assert self.isBlocking()

    def __get_ref_genes(self):
        self.ref_genes = []

        if self.ref_use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.ref_genes.append(str(variable.attributes.get(self.ref_gene_id_attribute, '?')))
        else:
            genes, _ = self.ref_data.get_column_view(self.ref_gene_id_column)
            self.ref_genes = [str(g) for g in genes]

    def __get_input_genes(self):
        self.input_genes = []

        if self.use_attr_names:
            for variable in self.input_data.domain.attributes:
                self.input_genes .append(str(variable.attributes.get(self.gene_id_attribute, '?')))
        else:
            genes, _ = self.input_data.get_column_view(self.gene_id_column)
            self.input_genes = [str(g) for g in genes]

    def FilterAnnotatedGenes(self, genes):
        matchedgenes = self.annotations.get_gene_names_translator(genes).values()
        return matchedgenes, [gene for gene in genes if gene not in matchedgenes]

    def __start_download(self, files_list):
        # type: (List[Tuple[str, str]]) -> None
        task = EnsureDownloaded(files_list)
        task.progress.connect(self._progressBarSet)

        f = self._executor.submit(task)
        fw = FutureWatcher(f, self)
        fw.finished.connect(self.__download_finish)
        fw.finished.connect(fw.deleteLater)
        fw.resultReady.connect(self.__invalidate)

        self.progressBarInit(processEvents=None)
        self.setBlocking(True)
        self.setStatusMessage("Downloading")
        self.__state = State.Downloading

    @Slot(Future)
    def __download_finish(self, result):
        # type: (Future[None]) -> None
        assert QThread.currentThread() is self.thread()
        assert result.done()
        self.setBlocking(False)
        self.setStatusMessage("")
        self.progressBarFinished(processEvents=False)
        try:
            result.result()
        except ConnectTimeout:
            logging.getLogger(__name__).error("Error:")
            self.error(2, "Internet connection error, unable to load data. " +
                       "Check connection and create a new GO Browser widget.")
        except RequestException as err:
            logging.getLogger(__name__).error("Error:")
            self.error(2, "Internet error:\n" + str(err))
        except BaseException as err:
            logging.getLogger(__name__).error("Error:")
            self.error(2, "Error:\n" + str(err))
            raise
        else:
            self.error(2)
        finally:
            self.__state = State.Ready

    def __ensure_data(self):
        # Ensure that all required database (ontology and annotations for
        # the current selected organism are present. If not start a download in
        # the background. Return True if all dbs are present and false
        # otherwise
        assert self.__state == State.Ready
        annotation = self.available_annotations[self.annotation_index]
        go_files = [fname for domain, fname in serverfiles.listfiles(DOMAIN)]
        files = []

        if annotation.filename not in go_files:
            files.append(("GO", annotation.filename))

        if FILENAME_ONTOLOGY not in go_files:
            files.append((DOMAIN, FILENAME_ONTOLOGY))
        if files:
            self.__start_download(files)
            assert self.__state == State.Downloading
            return False
        else:
            return True

    def Load(self):
        a = self.available_annotations[self.annotation_index]

        if self.ontology is None:
            self.ontology = go.Ontology()

        if a.taxid != self.loaded_annotation_code:
            self.annotations = None
            gc.collect()  # Force run garbage collection
            self.annotations = go.Annotations(a.taxid)
            self.loaded_annotation_code = a.taxid
            count = defaultdict(int)
            geneSets = defaultdict(set)

            for anno in self.annotations.annotations:
                count[anno.evidence] += 1
                geneSets[anno.evidence].add(anno.gene_id)
            for etype in go.evidenceTypesOrdered:
                ecb = self.evidenceCheckBoxDict[etype]
                ecb.setEnabled(bool(count[etype]))
                ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype])))

    def Enrichment(self):
        assert self.input_data is not None
        assert self.__state == State.Ready

        if not self.annotations.ontology:
            self.annotations.ontology = self.ontology

        self.error(1)
        self.warning([0, 1])

        self.__get_input_genes()
        self.input_genes = set(self.input_genes)
        self.known_input_genes = self.annotations.get_genes_with_known_annotation(self.input_genes)

        # self.clusterGenes = clusterGenes = self.annotations.map_to_ncbi_id(self.input_genes).values()

        self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" %
                               (len(self.input_genes), len(self.known_input_genes),
                                100.0*len(self.known_input_genes)/len(self.input_genes)
                                if len(self.input_genes) else 0.0))

        if not self.useReferenceDataset or self.ref_data is None:
            self.information(2)
            self.information(1)
            self.ref_genes = self.annotations.genes()
            self.ref_genes = set(self.ref_genes)

        elif self.ref_data is not None:
            self.__get_ref_genes()
            self.ref_genes = set(self.ref_genes)

            ref_count = len(self.ref_genes)
            if ref_count == 0:
                self.ref_genes = self.annotations.genes()
                self.referenceRadioBox.buttons[1].setText("Reference set")
                self.referenceRadioBox.buttons[1].setDisabled(True)
                self.information(2, "Unable to extract gene names from reference dataset. "
                                    "Using entire genome for reference")
                self.useReferenceDataset = 0
            else:
                self.referenceRadioBox.buttons[1].setText("Reference set ({} genes)".format(ref_count))
                self.referenceRadioBox.buttons[1].setDisabled(False)
                self.information(2)
        else:
            self.useReferenceDataset = 0
            self.ref_genes = []

        if not self.ref_genes:
            self.error(1, "No valid reference set")
            return {}

        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                evidences.append(etype)
        aspect = ['Process', 'Component', 'Function'][self.aspectIndex]

        self.progressBarInit(processEvents=False)
        self.setBlocking(True)
        self.__state = State.Running

        if self.input_genes:
            f = self._executor.submit(
                self.annotations.get_enriched_terms,
                self.input_genes, self.ref_genes, evidences, aspect=aspect,
                prob=self.probFunctions[self.probFunc], use_fdr=False,

                progress_callback=methodinvoke(
                    self, "_progressBarSet", (float,))
            )
            fw = FutureWatcher(f, parent=self)
            fw.done.connect(self.__on_enrichment_done)
            fw.done.connect(fw.deleteLater)
            return
        else:
            f = Future()
            f.set_result({})
            self.__on_enrichment_done(f)

    def __on_enrichment_done(self, results):
        # type: (Future[Dict[str, tuple]]) -> None
        self.progressBarFinished(processEvents=False)
        self.setBlocking(False)
        self.setStatusMessage("")
        if self.__state & State.Stale:
            self.__state = State.Ready
            self.__invalidate()
            return

        self.__state = State.Ready
        try:
            results = results.result()  # type: Dict[str, tuple]
        except Exception as ex:
            results = {}
            error = str(ex)
            self.error(1, error)

        if results:
            terms = list(results.items())
            fdr_vals = statistics.FDR([d[1] for _, d in terms])
            terms = [(key, d + (fdr,))
                     for (key, d), fdr in zip(terms, fdr_vals)]
            terms = dict(terms)

        else:
            terms = {}

        self.terms = terms

        if not self.terms:
            self.warning(0, "No enriched terms found.")
        else:
            self.warning(0)

        self.treeStructDict = {}
        ids = self.terms.keys()

        self.treeStructRootKey = None

        parents = {}
        for id in ids:
            parents[id] = set([term for _, term in self.ontology[id].related])

        children = {}
        for term in self.terms:
            children[term] = set([id for id in ids if term in parents[id]])

        for term in self.terms:
            self.treeStructDict[term] = TreeNode(self.terms[term], children[term])
            if not self.ontology[term].related and not getattr(self.ontology[term], "is_obsolete", False):
                self.treeStructRootKey = term

        self.SetGraph(terms)
        self._updateEnrichmentReportOutput()
        self.commit()

    def _updateEnrichmentReportOutput(self):
        terms = sorted(self.terms.items(), key=lambda item: item[1][1])
        # Create and send the enrichemnt report table.
        termsDomain = Orange.data.Domain(
            [], [],
            # All is meta!
            [Orange.data.StringVariable("GO Term Id"),
             Orange.data.StringVariable("GO Term Name"),
             Orange.data.ContinuousVariable("Cluster Frequency"),
             Orange.data.ContinuousVariable("Genes in Cluster",
                                            number_of_decimals=0),
             Orange.data.ContinuousVariable("Reference Frequency"),
             Orange.data.ContinuousVariable("Genes in Reference",
                                            number_of_decimals=0),
             Orange.data.ContinuousVariable("p-value"),
             Orange.data.ContinuousVariable("FDR"),
             Orange.data.ContinuousVariable("Enrichment"),
             Orange.data.StringVariable("Genes")])

        terms = [[t_id,
                  self.ontology[t_id].name,
                  len(genes) / len(self.input_genes),
                  len(genes),
                  r_count / len(self.ref_genes),
                  r_count,
                  p_value,
                  fdr,
                  len(genes) / len(self.input_genes) * \
                  len(self.ref_genes) / r_count,
                  ",".join(genes)
                  ]
                 for t_id, (genes, p_value, r_count, fdr) in terms
                 if genes and r_count]

        if terms:
            X = numpy.empty((len(terms), 0))
            M = numpy.array(terms, dtype=object)
            termsTable = Orange.data.Table.from_numpy(termsDomain, X,
                                                      metas=M)
        else:
            termsTable = None
        self.send("Enrichment Report", termsTable)

    @Slot(float)
    def _progressBarSet(self, value):
        assert QThread.currentThread() is self.thread()
        self.progressBarSet(value, processEvents=None)

    @Slot()
    def _progressBarFinish(self):
        assert QThread.currentThread() is self.thread()
        self.progressBarFinished(processEvents=None)

    def FilterGraph(self, graph):
        if self.filterByPValue_nofdr:
            graph = go.filterByPValue(graph, self.maxPValue_nofdr)
        if self.filterByPValue:  # FDR
            graph = dict(filter(lambda item: item[1][3] <= self.maxPValue, graph.items()))
        if self.filterByNumOfInstances:
            graph = dict(filter(lambda item: len(item[1][0]) >= self.minNumOfInstances, graph.items()))
        return graph

    def FilterAndDisplayGraph(self):
        if self.input_data and self.originalGraph is not None:
            self.graph = self.FilterGraph(self.originalGraph)
            if self.originalGraph and not self.graph:
                self.warning(1, "All found terms were filtered out.")
            else:
                self.warning(1)
            self.ClearGraph()
            self.DisplayGraph()

    def SetGraph(self, graph=None):
        self.originalGraph = graph
        if graph:
            self.FilterAndDisplayGraph()
        else:
            self.graph = {}
            self.ClearGraph()

    def ClearGraph(self):
        self.listView.clear()
        self.listViewItems=[]
        self.sigTerms.clear()

    def DisplayGraph(self):
        fromParentDict = {}
        self.termListViewItemDict = {}
        self.listViewItems = []

        def enrichment(t):
            try:
                return len(t[0]) / t[2] * (len(self.ref_genes) / len(self.input_genes))
            except ZeroDivisionError:
                # TODO: find out why this happens
                return 0

        maxFoldEnrichment = max([enrichment(term) for term in self.graph.values()] or [1])

        def addNode(term, parent, parentDisplayNode):
            if (parent, term) in fromParentDict:
                return
            if term in self.graph:
                displayNode = GOTreeWidgetItem(self.ontology[term], self.graph[term], len(self.input_genes),
                                               len(self.ref_genes), maxFoldEnrichment, parentDisplayNode)
                displayNode.goId = term
                self.listViewItems.append(displayNode)
                if term in self.termListViewItemDict:
                    self.termListViewItemDict[term].append(displayNode)
                else:
                    self.termListViewItemDict[term] = [displayNode]
                fromParentDict[(parent, term)] = True
                parent = term
            else:
                displayNode = parentDisplayNode

            for c in self.treeStructDict[term].children:
                addNode(c, parent, displayNode)

        if self.treeStructDict:
            addNode(self.treeStructRootKey, None, self.listView)

        terms = self.graph.items()
        terms = sorted(terms, key=lambda item: item[1][1])
        self.sigTableTermsSorted = [t[0] for t in terms]

        self.sigTerms.clear()
        for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms):
            item = GOTreeWidgetItem(self.ontology[t_id],
                                    (genes, p_value, refCount, fdr),
                                    len(self.input_genes),
                                    len(self.ref_genes),
                                    maxFoldEnrichment,
                                    self.sigTerms)
            item.goId = t_id

        self.listView.expandAll()
        for i in range(5):
            self.listView.resizeColumnToContents(i)
            self.sigTerms.resizeColumnToContents(i)
        self.sigTerms.resizeColumnToContents(6)
        width = min(self.listView.columnWidth(0), 350)
        self.listView.setColumnWidth(0, width)
        self.sigTerms.setColumnWidth(0, width)

    def ViewSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selected = self.listView.selectedItems()
        self.selectedTerms = list(set([lvi.term.id for lvi in selected]))
        self.ExampleSelection()
        self.selectionChanging = 0

    def TableSelectionChanged(self):
        if self.selectionChanging:
            return

        self.selectionChanging = 1
        self.selectedTerms = []
        selectedIds = set([self.sigTerms.itemFromIndex(index).goId for index in self.sigTerms.selectedIndexes()])

        for i in range(self.sigTerms.topLevelItemCount()):
            item = self.sigTerms.topLevelItem(i)
            selected = item.goId in selectedIds
            term = item.goId

            if selected:
                self.selectedTerms.append(term)

            for lvi in self.termListViewItemDict[term]:
                try:
                    lvi.setSelected(selected)
                    if selected:
                        lvi.setExpanded(True)
                except RuntimeError:  # Underlying C/C++ object deleted
                    pass
        self.selectionChanging = 0
        self.ExampleSelection()

    def ExampleSelection(self):
        self.commit()

    def commit(self):
        if self.input_data is None or self.originalGraph is None or \
                self.annotations is None:
            return
        if self.__state & State.Stale:
            return

        terms = set(self.selectedTerms)
        genes = reduce(operator.ior,
                       (set(self.graph[term][0]) for term in terms), set())

        evidences = []
        for etype in go.evidenceTypesOrdered:
            if self.useEvidenceType[etype]:
                evidences.append(etype)

        allTerms = self.annotations.get_annotated_terms(
            genes, direct_annotation_only=self.selectionDirectAnnotation,
            evidence_codes=evidences)

        if self.selectionDisjoint > 0:
            count = defaultdict(int)
            for term in self.selectedTerms:
                for g in allTerms.get(term, []):
                    count[g] += 1
            ccount = 1 if self.selectionDisjoint == 1 else len(self.selectedTerms)
            selected_genes = [gene for gene, c in count.items() if c == ccount and gene in genes]
        else:
            selected_genes = reduce(
                operator.ior,
                (set(allTerms.get(term, [])) for term in self.selectedTerms), set())

        if self.use_attr_names:
            selected = [column for column in self.input_data.domain.attributes
                        if self.gene_id_attribute in column.attributes and
                        str(column.attributes[self.gene_id_attribute]) in set(selected_genes)]

            domain = Orange.data.Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas)
            new_data = self.input_data.from_table(domain, self.input_data)
            self.send("Data on Selected Genes", new_data)

        else:
            selected_rows = []
            for row_index, row in enumerate(self.input_data):
                gene_in_row = str(row[self.gene_id_column])
                if gene_in_row in self.input_genes and gene_in_row in selected_genes:
                    selected_rows.append(row_index)

                if selected_rows:
                    selected = self.input_data[selected_rows]
                else:
                    selected = None

                self.send("Data on Selected Genes", selected)

    def ShowInfo(self):
        dialog = QDialog(self)
        dialog.setModal(False)
        dialog.setLayout(QVBoxLayout())
        label = QLabel(dialog)
        label.setText("Ontology:\n" + self.ontology.header
                      if self.ontology else "Ontology not loaded!")
        dialog.layout().addWidget(label)

        label = QLabel(dialog)
        label.setText("Annotations:\n" + self.annotations.header.replace("!", "")
                      if self.annotations else "Annotations not loaded!")
        dialog.layout().addWidget(label)
        dialog.show()

    def onDeleteWidget(self):
        """Called before the widget is removed from the canvas.
        """
        self.annotations = None
        self.ontology = None
        gc.collect()  # Force collection
Пример #8
0
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = DESCRIPTION
    icon = "../widgets/icons/GEODataSets.svg"
    priority = PRIORITY

    inputs = []
    outputs = [("Expression Data", Orange.data.Table)]

    settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates",
                    "splitterSettings", "currentGds", "autoCommit",
                    "datasetNames"]

    outputRows = Setting(True)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting(
        (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
         b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01')
    )

    autoCommit = Setting(False)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box, self, "outputRows",
                              ["Genes in rows", "Samples in rows"], "Rows",
                              callback=self.commitIf)

        gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box, self, "datasetName", "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited
        )
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(
                box, self, "Commit", callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea, self, "autoCommit", "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(
            textChanged=self.filter
        )
        self.completer = TokenListCompleter(
            self, caseSensitivity=Qt.CaseInsensitive
        )
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget,
                                         role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection
        )
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"]
        )
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged
        )
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = ["dataset_id", "title", "platform_organism",
                           "description"]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float,)))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(
            gds[key] for gds in self.gds for key in self.searchKeys
        )
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.setTokenList(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole)))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows
                )
                self.treeWidget.scrollTo(
                    current_index, QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable |
                                Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked)
            )
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")]
                )
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked)
                )
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = unicode(self.filterLineEdit.text())
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    @Slot(int, int)
    def progressCompleted(self, value, total):
        if total > 0:
            self.progressBarSet(100. * value / total, processEvents=False)
        else:
            pass
            # TODO: report 'indeterminate progress'

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit(processEvents=None)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            progress = methodinvoke(self, "progressCompleted", (int, int))

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds_ensure_downloaded(gds_id, progress)
                gds = geo.GDS(gds_id)
                data = gds.getdata(
                    report_genes=report_genes, transpose=transpose,
                    sample_type=sample_type
                )
                data.name = title
                return data

            get_data = partial(
                get_data, self.currentGds["dataset_id"],
                report_genes=self.mergeSpots,
                transpose=self.outputRows,
                sample_type=sample_type,
                title=self.datasetName or self.currentGds["title"]
            )
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)
        self.progressBarFinished(processEvents=False)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, ("Error while connecting to the NCBI ftp server! "
                           "'%s'" % error))
            sys.excepthook(type(error), error, getattr(error, "__traceback__"))
            return
        finally:
            self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:
            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out
            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Orange.data.Domain(
                [attr for attr in data.domain.attributes
                 if samples.issuperset(attr.attributes.items())],
                data.domain.class_var,
                data.domain.metas
            )
#             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes
                )
            data = Orange.data.Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""),
                            10.0)
        data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0)

        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0),  " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters]

    def send_report(self):
        self.report_items("GEO Dataset",
                          [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']),
                           ("Organism", self.currentGds['sample_organism'])])
        self.report_items("Data", [("Samples", self.currentGds['sample_count']),
                                   ("Features", self.currentGds['feature_count']),
                                   ("Genes", self.currentGds['gene_count'])])
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.currentGds['subsets']:
            subsets[subset['type']].append((subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for type in subsets:
            self.report_html += "<b>" + type + ":</b></br>"
            for desc, count in subsets[type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(desc, count)
        self.report_html += "</ul>"

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = unicode(self.nameEdit.text())
            self.commitIf()
Пример #9
0
class OWSetEnrichment(widget.OWWidget):
    name = "Set Enrichment"
    description = ""
    icon = "../widgets/icons/GeneSetEnrichment.svg"
    priority = 5000

    inputs = [("Data", Orange.data.Table, "setData", widget.Default),
              ("Reference", Orange.data.Table, "setReference")]
    outputs = [("Data subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    taxid = settings.ContextSetting(None)
    speciesIndex = settings.ContextSetting(0)
    genesinrows = settings.ContextSetting(False)
    geneattr = settings.ContextSetting(0)
    categoriesCheckState = settings.ContextSetting({})

    useReferenceData = settings.Setting(False)
    useMinCountFilter = settings.Setting(True)
    useMaxPValFilter = settings.Setting(True)
    useMaxFDRFilter = settings.Setting(True)
    minClusterCount = settings.Setting(3)
    maxPValue = settings.Setting(0.01)
    maxFDR = settings.Setting(0.01)
    autocommit = settings.Setting(False)

    Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4

    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea,
            self,
            "speciesIndex",
            "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(box,
                                             self,
                                             "geneattr",
                                             "Entity feature",
                                             sendSelectedValue=0,
                                             callback=self.updateAnnotations)

        cb = gui.checkBox(box,
                          self,
                          "genesinrows",
                          "Use feature names",
                          callback=self.updateAnnotations,
                          disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

        #         gui.button(box, self, "Gene matcher settings",
        #                    callback=self.updateGeneMatcherSettings,
        #                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self,
            "useReferenceData", ["All entities", "Reference set (input)"],
            tooltips=[
                "Use entire genome (for gene set enrichment) or all " +
                "available entities for reference",
                "Use entities from Reference Examples input signal " +
                "as reference"
            ],
            box="Reference",
            callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget,
                 self,
                 "minClusterCount",
                 0,
                 100,
                 label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(pvalfilterbox,
                          self,
                          "useMaxPValFilter",
                          "p-value",
                          callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox,
            self,
            "maxPValue",
            0.0,
            1.0,
            0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(fdrfilterbox,
                          self,
                          "useMaxFDRFilter",
                          "FDR",
                          callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox,
            self,
            "maxFDR",
            0.0,
            1.0,
            0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QLineEdit(self, placeholderText="Filter ...")

        self.filterCompleter = QCompleter(self.filterLineEdit)
        self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QTreeView.NoEditTriggers,
        )
        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(
            contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN,
                                  taxonomy.Taxonomy.FILENAME),
                                 (geneset.sfdomain, "index.pck")])

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(parent=self,
                                        threadPool=QThreadPool(self))
        self._executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 600)

    def __initialize_finish(self):
        # Finalize the the widget's initialization (preferably after
        # ensuring all required databases have been downloaded.

        sets = geneset.list_all()
        taxids = set(taxonomy.common_taxids() +
                     list(filter(None, [tid for _, tid, _ in sets])))
        organisms = [(tid, name_or_none(tid)) for tid in taxids]
        organisms = [(tid, name) for tid, name in organisms
                     if name is not None]

        organisms = [(None, "None")] + sorted(organisms)
        taxids = [tid for tid, _ in organisms]
        names = [name for _, name in organisms]
        self.taxid_list = taxids

        self.speciesComboBox.clear()
        self.speciesComboBox.addItems(names)
        self.genesets = sets

        if self.taxid in self.taxid_list:
            taxid = self.taxid
        else:
            taxid = self.taxid_list[0]

        self.taxid = None
        self.setCurrentOrganism(taxid)
        self.setBlocking(False)
        self.__state = OWSetEnrichment.Ready
        self.setStatusMessage("")

    def setCurrentOrganism(self, taxid):
        """Set the current organism `taxid`."""
        if taxid not in self.taxid_list:
            taxid = self.taxid_list[min(self.speciesIndex,
                                        len(self.taxid_list) - 1)]
        if self.taxid != taxid:
            self.taxid = taxid
            self.speciesIndex = self.taxid_list.index(taxid)
            self.refreshHierarchy()
            self._invalidateGeneMatcher()
            self._invalidate()

    def currentOrganism(self):
        """Return the current organism taxid"""
        return self.taxid

    def __on_speciesIndexChanged(self):
        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = "< Do not look >"
        self.setCurrentOrganism(taxid)
        if self.__invalidated and self.data is not None:
            self.updateAnnotations()

    def clear(self):
        """Clear/reset the widget state."""
        self._cancelPending()
        self.state = None

        self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment

        self._clearView()

        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

        self.geneAttrComboBox.clear()
        self.geneAttrs = []
        self._updatesummary()

    def _cancelPending(self):
        """Cancel pending tasks."""
        if self.state is not None:
            self.state.results.cancel()
            self.state.namematcher.cancel()
            self.state.cancelled = True

    def _clearView(self):
        """Clear the enrichment report view (main area)."""
        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

    def setData(self, data=None):
        """Set the input dataset with query gene names"""
        if self.__state & OWSetEnrichment.Initializing:
            self.__initialize_finish()

        self.error(0)
        self.closeContext()
        self.clear()

        self.groupsWidget.clear()
        self.data = data

        if data is not None:
            varlist = [
                var for var in data.domain.variables + data.domain.metas
                if isinstance(var, Orange.data.StringVariable)
            ]

            self.geneAttrs = varlist
            for var in varlist:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            oldtaxid = self.taxid
            self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1)

            taxid = data_hints.get_hint(data, "taxid", "")
            if taxid in self.taxid_list:
                self.speciesIndex = self.taxid_list.index(taxid)
                self.taxid = taxid

            self.genesinrows = data_hints.get_hint(data, "genesinrows",
                                                   self.genesinrows)

            self.openContext(data)
            if oldtaxid != self.taxid:
                self.taxid = "< Do not look >"
                self.setCurrentOrganism(taxid)

            self.refreshHierarchy()
            self._invalidate()

    def setReference(self, data=None):
        """Set the (optional) input dataset with reference gene names."""
        self.referenceData = data
        self.referenceRadioBox.setEnabled(bool(data))
        if self.useReferenceData:
            self._invalidate()

    def handleNewSignals(self):
        if self.__invalidated:
            self.updateAnnotations()

    def _invalidateGeneMatcher(self):
        _, f = self.__genematcher
        f.cancel()
        self.__genematcher = (None, fulfill(gene.matcher([])))

    def _invalidate(self):
        self.__invalidated = True

    def genesFromTable(self, table):
        if self.genesinrows:
            genes = [attr.name for attr in table.domain.attributes]
        else:
            geneattr = self.geneAttrs[self.geneattr]
            genes = [str(ex[geneattr]) for ex in table]
        return genes

    def getHierarchy(self, taxid):
        def recursive_dict():
            return defaultdict(recursive_dict)

        collection = recursive_dict()

        def collect(col, hier):
            if hier:
                collect(col[hier[0]], hier[1:])

        for hierarchy, t_id, _ in self.genesets:
            collect(collection[t_id], hierarchy)

        return (taxid, collection[taxid]), (None, collection[None])

    def setHierarchy(self, hierarchy, hierarchy_noorg):
        self.groupsWidgetItems = {}

        def fill(col, parent, full=(), org=""):
            for key, value in sorted(col.items()):
                full_cat = full + (key, )
                item = QTreeWidgetItem(parent, [key])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable
                              | Qt.ItemIsSelectable | Qt.ItemIsEnabled)
                if value:
                    item.setFlags(item.flags() | Qt.ItemIsTristate)

                checked = self.categoriesCheckState.get((full_cat, org),
                                                        Qt.Checked)
                item.setData(0, Qt.CheckStateRole, checked)
                item.setExpanded(True)
                item.category = full_cat
                item.organism = org
                self.groupsWidgetItems[full_cat] = item
                fill(value, item, full_cat, org=org)

        self.groupsWidget.clear()
        fill(hierarchy[1], self.groupsWidget, org=hierarchy[0])
        fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0])

    def refreshHierarchy(self):
        self.setHierarchy(*self.getHierarchy(
            taxid=self.taxid_list[self.speciesIndex]))

    def selectedCategories(self):
        """
        Return a list of currently selected hierarchy keys.

        A key is a tuple of identifiers from the root to the leaf of
        the hierarchy tree.
        """
        return [
            key for key, check in self.getHierarchyCheckState().items()
            if check == Qt.Checked
        ]

    def getHierarchyCheckState(self):
        def collect(item, full=()):
            checked = item.checkState(0)
            name = str(item.data(0, Qt.DisplayRole))
            full_cat = full + (name, )
            result = [((full_cat, item.organism), checked)]
            for i in range(item.childCount()):
                result.extend(collect(item.child(i), full_cat))
            return result

        items = [
            self.groupsWidget.topLevelItem(i)
            for i in range(self.groupsWidget.topLevelItemCount())
        ]
        states = itertools.chain(*(collect(item) for item in items))
        return dict(states)

    def subsetSelectionChanged(self, item, column):
        # The selected geneset (hierarchy) subset has been changed by the
        # user. Update the displayed results.
        # Update the stored state (persistent settings)
        self.categoriesCheckState = self.getHierarchyCheckState()
        categories = self.selectedCategories()

        if self.data is not None:
            if self._nogenematching() or \
                    not set(categories) <= set(self.currentAnnotatedCategories):
                self.updateAnnotations()
            else:
                self.filterAnnotationsChartView()

    def updateGeneMatcherSettings(self):
        raise NotImplementedError

        from .OWGOEnrichmentAnalysis import GeneMatcherDialog
        dialog = GeneMatcherDialog(self,
                                   defaults=self.geneMatcherSettings,
                                   enabled=[True] * 4,
                                   modal=True)
        if dialog.exec_():
            self.geneMatcherSettings = [
                getattr(dialog, item[0]) for item in dialog.items
            ]
            self._invalidateGeneMatcher()
            if self.data is not None:
                self.updateAnnotations()

    def _genematcher(self):
        """
        Return a Future[gene.SequenceMatcher]
        """
        taxid = self.taxid_list[self.speciesIndex]

        current, matcher_f = self.__genematcher

        if taxid == current and \
                not matcher_f.cancelled():
            return matcher_f

        self._invalidateGeneMatcher()

        if taxid is None:
            self.__genematcher = (None, fulfill(gene.matcher([])))
            return self.__genematcher[1]

        matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy]
        matchers = [
            m for m, use in zip(matchers, self.geneMatcherSettings) if use
        ]

        def create():
            return gene.matcher([m(taxid) for m in matchers])

        matcher_f = self._executor.submit(create)
        self.__genematcher = (taxid, matcher_f)
        return self.__genematcher[1]

    def _nogenematching(self):
        return self.taxid is None or not any(self.geneMatcherSettings)

    def updateAnnotations(self):
        if self.data is None:
            return

        assert not self.__state & OWSetEnrichment.Initializing
        self._cancelPending()
        self._clearView()

        self.information(0)
        self.warning(0)
        self.error(0)

        if not self.genesinrows and len(self.geneAttrs) == 0:
            self.error(0, "Input data contains no columns with gene names")
            return

        self.__state = OWSetEnrichment.RunningEnrichment

        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = taxid

        categories = self.selectedCategories()

        clusterGenes = self.genesFromTable(self.data)

        if self.referenceData is not None and self.useReferenceData:
            referenceGenes = self.genesFromTable(self.referenceData)
        else:
            referenceGenes = None

        self.currentAnnotatedCategories = categories

        genematcher = self._genematcher()

        self.progressBarInit()

        ## Load collections in a worker thread
        # TODO: Use cached collections if already loaded and
        # use ensure_genesetsdownloaded with progress report (OWSelectGenes)
        collections = self._executor.submit(geneset.collections, *categories)

        def refset_null():
            """Return the default background reference set"""
            col = collections.result()
            return reduce(operator.ior, (set(g.genes) for g in col), set())

        def refset_ncbi():
            """Return all NCBI gene names"""
            geneinfo = gene.NCBIGeneInfo(taxid)
            return set(geneinfo.keys())

        def namematcher():
            matcher = genematcher.result()
            match = matcher.set_targets(ref_set.result())
            match.umatch = memoize(match.umatch)
            return match

        def map_unames():
            matcher = namematcher.result()
            query = list(filter(None, map(matcher.umatch, querynames)))
            reference = list(
                filter(None, map(matcher.umatch, ref_set.result())))
            return query, reference

        if self._nogenematching():
            if referenceGenes is None:
                ref_set = self._executor.submit(refset_null)
            else:
                ref_set = fulfill(referenceGenes)
        else:
            if referenceGenes == None:
                ref_set = self._executor.submit(refset_ncbi)
            else:
                ref_set = fulfill(referenceGenes)

        namematcher = self._executor.submit(namematcher)
        querynames = clusterGenes

        state = types.SimpleNamespace()
        state.query_set = clusterGenes
        state.reference_set = referenceGenes
        state.namematcher = namematcher
        state.query_count = len(set(clusterGenes))
        state.reference_count = (len(set(referenceGenes))
                                 if referenceGenes is not None else None)

        state.cancelled = False

        progress = methodinvoke(self, "_setProgress", (float, ))
        info = methodinvoke(self, "_setRunInfo", (str, ))

        @withtraceback
        def run():
            info("Loading data")
            match = namematcher.result()
            query, reference = map_unames()
            gscollections = collections.result()

            results = []
            info("Running enrichment")
            p = 0
            for i, gset in enumerate(gscollections):
                genes = set(filter(None, map(match.umatch, gset.genes)))
                enr = set_enrichment(genes, reference, query)
                results.append((gset, enr))

                if state.cancelled:
                    raise UserInteruptException

                pnew = int(100 * i / len(gscollections))
                if pnew != p:
                    progress(pnew)
                    p = pnew
            progress(100)
            info("")
            return query, reference, results

        task = Task(function=run)
        task.resultReady.connect(self.__on_enrichment_finished)
        task.exceptionReady.connect(self.__on_enrichment_failed)
        result = self._executor.submit(task)
        state.results = result

        self.state = state
        self._updatesummary()

    def __on_enrichment_failed(self, exception):
        if not isinstance(exception, UserInteruptException):
            print("ERROR:", exception, file=sys.stderr)
            print(exception._traceback, file=sys.stderr)

        self.progressBarFinished()
        self.setStatusMessage("")
        self.__state &= ~OWSetEnrichment.RunningEnrichment

    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results), default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels([
            "Category", "Term", "Count", "Reference count", "p-value", "FDR",
            "Enrichment"
        ])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped,
                     user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped,
                     user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(
                ),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit)

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(
            gsname(geneset) for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames), set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max(
                (e.enrichment_score
                 for _, e in results if np.isfinite(e.enrichment_score)),
                default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score)))

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView))

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))


#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")

    def _updatesummary(self):
        state = self.state
        if state is None:
            self.error(0, )
            self.warning(0)
            self.infoBox.setText("No data on input.\n")
            return

        text = "{.query_count} unique names on input\n".format(state)

        if state.results.done() and not state.results.exception():
            mapped, _, _ = state.results.result()
            ratio_mapped = (len(mapped) /
                            state.query_count if state.query_count else 0)
            text += ("%i (%.1f%%) gene names matched" %
                     (len(mapped), 100.0 * ratio_mapped))
        elif not state.results.done():
            text += "..."
        else:
            text += "<Error {}>".format(str(state.results.exception()))
        self.infoBox.setText(text)

        # TODO: warn on no enriched sets found (i.e no query genes
        # mapped to any set)

    def filterAnnotationsChartView(self, filterString=""):
        if self.__state & OWSetEnrichment.RunningEnrichment:
            return

        # TODO: Move filtering to a filter proxy model
        # TODO: Re-enable string search

        categories = set(", ".join(cat)
                         for cat, _ in self.selectedCategories())

        #         filterString = str(self.filterLineEdit.text()).lower()

        model = self.annotationsChartView.model()

        def ishidden(index):
            # Is item at index (row) hidden
            item = model.item(index)
            item_cat = item.data(Qt.DisplayRole)
            return item_cat not in categories

        hidemask = [ishidden(i) for i in range(model.rowCount())]

        # compute FDR according the selected categories
        pvals = [
            model.item(i, 4).data(Qt.UserRole)
            for i, hidden in enumerate(hidemask) if not hidden
        ]
        fdrs = utils.stats.FDR(pvals)

        # update FDR for the selected collections and apply filtering rules
        itemsHidden = []
        fdriter = iter(fdrs)
        for index, hidden in enumerate(hidemask):
            if not hidden:
                fdr = next(fdriter)
                pval = model.index(index, 4).data(Qt.UserRole)
                count = model.index(index, 2).data(Qt.ToolTipRole)

                hidden = (self.useMinCountFilter and count < self.minClusterCount) or \
                         (self.useMaxPValFilter and pval > self.maxPValue) or \
                         (self.useMaxFDRFilter and fdr > self.maxFDR)

                if not hidden:
                    fdr_item = model.item(index, 5)
                    fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole)
                    fdr_item.setData(fmtp(fdr), Qt.DisplayRole)
                    fdr_item.setData(fdr, Qt.UserRole)

            self.annotationsChartView.setRowHidden(index, QModelIndex(),
                                                   hidden)

            itemsHidden.append(hidden)

        if model.rowCount() and all(itemsHidden):
            self.information(0, "All sets were filtered out.")
        else:
            self.information(0)

        self._updatesummary()

    @Slot(float)
    def _setProgress(self, value):
        assert QThread.currentThread() is self.thread()
        self.progressBarSet(value, processEvents=None)

    @Slot(str)
    def _setRunInfo(self, text):
        self.setStatusMessage(text)

    def commit(self):
        if self.data is None or \
                self.__state & OWSetEnrichment.RunningEnrichment:
            return

        model = self.annotationsChartView.model()
        rows = self.annotationsChartView.selectionModel().selectedRows(0)
        selected = [model.item(index.row(), 0) for index in rows]
        mapped = reduce(operator.ior, (set(item.enrichment.query_mapped)
                                       for item in selected), set())
        assert self.state.namematcher.done()
        matcher = self.state.namematcher.result()

        axis = 1 if self.genesinrows else 0
        if axis == 1:
            mapped = [
                attr for attr in self.data.domain.attributes
                if matcher.umatch(attr.name) in mapped
            ]

            newdomain = Orange.data.Domain(mapped, self.data.domain.class_vars,
                                           self.data.domain.metas)
            data = self.data.from_table(newdomain, self.data)
        else:
            geneattr = self.geneAttrs[self.geneattr]
            selected = [
                i for i, ex in enumerate(self.data)
                if matcher.umatch(str(ex[geneattr])) in mapped
            ]
            data = self.data[selected]
        self.send("Data subset", data)

    def onDeleteWidget(self):
        if self.state is not None:
            self._cancelPending()
            self.state = None
        self._executor.shutdown(wait=False)
Пример #10
0
class OWGenExpress(widget.OWWidget):
    name = "GenExpress"
    description = "Expression data from GenExpress."
    icon = "../widgets/icons/GenCloud.svg"
    priority = 36

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("anonymous")
    password = settings.Setting("")
    log2 = settings.Setting(False)
    transpose = settings.Setting(False)
    rtypei = settings.Setting(0)
    projecti = settings.Setting(0)
    serveri = settings.Setting(0)
    exnamei = settings.Setting(6)

    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    currentSelection = settings.Setting(None)

    experimentsHeaderState = settings.Setting({
        name: False for _, name in HEADER[:ID_INDEX + 1]}
    )

    storedSortOrder = settings.Setting([])
    storedSelections = settings.Setting([])

    def __init__(self, parent=None):
        super().__init__(parent)

        self.servers = [
            ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'),
            ('https://cloud.genialis.com/', 'Genialis'),
        ]

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.items = []

        self.result_types = []

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        box = gui.widgetBox(self.controlArea, 'Project')
        self.projectCB = gui.comboBox(
            box, self, "projecti", items=[], callback=self.ProjectChosen)

        self.projects = []

        b = gui.widgetBox(self.controlArea, "Selection bookmarks")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections
        self.selectionSetsWidget.selectionModified.connect(store_selections)

        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        box = gui.widgetBox(self.controlArea, 'Experiment name')
        self.experimentNameCB = gui.comboBox(
            box, self, "exnamei", items=SORTING_MODEL_LIST)

        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)
        self.columnsSortingWidget.sortingOrder = self.storedSortOrder

        def store_sort_order():
            self.storedSortOrder = self.columnsSortingWidget.sortingOrder
        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)

        gui.separator(self.controlArea)


        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(
            box, self, "rtypei", items=[], callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        gui.checkBox(self.controlArea, self, "transpose",
                     "Genes as columns")

        self.commit_button = gui.button(self.controlArea, self, "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Server')
        gui.comboBox(box, self, "serveri",
                     items=[title for url, title in self.servers],
                     callback=self.ServerChosen)

        gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Clear cache",
                   callback=self.clear_cache)

        gui.lineEdit(self.mainArea, self, "searchString", "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget)

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel())
        self.selectionSetsWidget.setSelections(self.storedSelections)

        self.mainArea.layout().addWidget(self.experimentsWidget)

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.ConnectAndUpdate)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        if self.dbc:
            def get_data_count(project_id):
                # XXX: is there a better way?
                # Note: limit 0 would return all objects
                return self.dbc.gen.api.data.get(case_ids__contains=project_id,
                                                 type__startswith='data:expression:',
                                                 limit=1)['meta']['total_count']

            self.projects = sorted([p for p in self.dbc.projects().items() if
                                    get_data_count(p[0]) > 0], key=lambda x: x[1])
            self.UpdateProjects()
            self.ProjectChosen()
            self.UpdateExperimentTypes()

    def Connect(self):
        self.error(1)
        self.warning(1)

        username = '******'
        password = '******'
        url = self.servers[self.serveri][0]

        if self.username:
            username = self.username
            password = self.password

        if username.lower() in ['*****@*****.**', 'anonymous']:
            username = '******'
            password = '******'

        self.dbc = None
        self.projects = []
        self.result_types = []

        try:
            self.dbc = Genesis(
                address=url, username=username, password=password,
                cache=self.buffer)
        except requests.exceptions.ConnectionError:
            self.dbc = Genesis(
                address=url, username=username, password=password,
                connect=False, cache=self.buffer)
            self.warning(1, "Could not connect to server, working from cache.")
        except Exception:
            self.error(1, "Wrong username or password.")

        self.UpdateProjects()
        self.UpdateExperimentTypes()  # clear lists

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei]
        else:
            return None

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [self.result_types_labels[desc] for desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        #do not update anything if the list is empty
        if len(self.result_types):
            self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateProjects(self):
        self.projectCB.clear()
        items = [desc for pid, desc in self.projects]
        self.projectCB.addItems(items)
        #do not update anything if the list if empty
        if len(self.projects) > 0:
            self.projecti = max(0, min(self.projecti, len(self.projects) - 1))

    def UpdateExperiments(self, reload=False):

        self.experimentsWidget.clear()

        if not self.dbc or not self.dbc.projectid:  # the connection did not succeed
            return

        self.items = []

        self.progressBarInit()

        result_types = []
        result_types_labels = []

        sucind = False  # success indicator for database index

        try:
            result_types, result_types_labels = self.dbc.result_types(reload=reload)
            sucind = True
        except Exception:
            try:
                result_types, result_types_labels = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.result_types = result_types
        self.result_types_labels = result_types_labels

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def ProjectChosen(self, reload=False):
        if self.projects:
            self.dbc.projectid = self.projects[self.projecti][0]
        else:
            self.dbc.projectid = None

        self.UpdateExperiments(reload=reload)

    def ServerChosen(self):
        self.ConnectAndUpdate()

    def UpdateResultsList(self, reload=False):
        results_list = self.dbc.results_list(self.rtype(), reload=reload)
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception:
                self.error(0, "Can not access database.")

        self.results_list = results_list

        #softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(ID_INDEX))
            items_shown[c] = i

        items_to_show = set(id_ for id_ in self.results_list)

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(ID_INDEX)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [it for i, it in enumerate(self.items)
                      if i not in delete_ind]

        for r_annot in add_items:
            d = defaultdict(lambda: "?", self.results_list[r_annot])
            row_items = [""] + [to_text(d.get(key, "?"))
                                for key, _ in HEADER[1:]]
            row_items[ID_INDEX] = r_annot

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)
            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        self.wantbufver = lambda x: self.results_list[x]["date_modified"]

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:

            for item in self.items:
                id = str(item.text(ID_INDEX))
                version = self.dbc._in_buffer(id + "|||" + self.rtype())
                value = " " if version == self.wantbufver(id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(
                not all(s in item for s in self.searchString.split()))

    def Commit(self):

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(ID_INDEX))
            ids.append(unique_id)

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = {name: name for key, name in HEADER}
        reverse_header_dict["ID"] = "id"

        allowed_labels = None

        def namefn(a):
            name = SORTING_MODEL_LIST[self.exnamei]
            name = reverse_header_dict.get(name, "id")
            return dict(a)[name]

        if len(ids):
            table = self.dbc.get_data(
                ids=ids, result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels,
                namefn=namefn)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                    ignorenames=self.dbc.IGNORE_REPLICATE,
                    namefn="name",
                    avg=dicty.median,
                    fnshow=lambda x: " | ".join(map(str, x)))

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except ValueError:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name)
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(
                attributes, table.domain.class_vars, table.domain.metas)

            table = Orange.data.Table.from_table(domain, table)
            table = Orange.data.Table(domain, table)

            if self.transpose:
                experiments = [at for at in table.domain.variables]
                attr = [compat.ContinuousVariable.make(ex['DDB'].value) for ex in table]
                metavars = sorted(table.domain.variables[0].attributes.keys())
                metavars = [compat.StringVariable.make(name) for name in metavars]
                domain = compat.create_domain(attr, None, metavars)
                metas = [[exp.attributes[var.name] for var in metavars] for exp in experiments]
                table = compat.create_table(domain, table.X.transpose(), None, metas)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(ID_INDEX,))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
Пример #11
0
class OWSetEnrichment(widget.OWWidget):
    name = "Set Enrichment"
    description = ""
    icon = "../widgets/icons/GeneSetEnrichment.svg"
    priority = 5000

    inputs = [("Data", Orange.data.Table, "setData", widget.Default),
              ("Reference", Orange.data.Table, "setReference")]
    outputs = [("Data subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    taxid = settings.ContextSetting(None)
    speciesIndex = settings.ContextSetting(0)
    genesinrows = settings.ContextSetting(False)
    geneattr = settings.ContextSetting(0)
    categoriesCheckState = settings.ContextSetting({})

    useReferenceData = settings.Setting(False)
    useMinCountFilter = settings.Setting(True)
    useMaxPValFilter = settings.Setting(True)
    useMaxFDRFilter = settings.Setting(True)
    minClusterCount = settings.Setting(3)
    maxPValue = settings.Setting(0.01)
    maxFDR = settings.Setting(0.01)
    autocommit = settings.Setting(False)

    Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4

    def __init__(self, parent=None):
        super().__init__(parent)

        self.geneMatcherSettings = [False, False, True, False]

        self.data = None
        self.referenceData = None
        self.taxid_list = []

        self.__genematcher = (None, fulfill(gene.matcher([])))
        self.__invalidated = False

        self.currentAnnotatedCategories = []
        self.state = None
        self.__state = OWSetEnrichment.Initializing

        box = gui.widgetBox(self.controlArea, "Info")
        self.infoBox = gui.widgetLabel(box, "Info")
        self.infoBox.setText("No data on input.\n")

        self.speciesComboBox = gui.comboBox(
            self.controlArea, self,
            "speciesIndex", "Species",
            callback=self.__on_speciesIndexChanged)

        box = gui.widgetBox(self.controlArea, "Entity names")
        self.geneAttrComboBox = gui.comboBox(
            box, self, "geneattr", "Entity feature", sendSelectedValue=0,
            callback=self.updateAnnotations)

        cb = gui.checkBox(
            box, self, "genesinrows", "Use feature names",
            callback=self.updateAnnotations,
            disables=[(-1, self.geneAttrComboBox)])
        cb.makeConsistent()

#         gui.button(box, self, "Gene matcher settings",
#                    callback=self.updateGeneMatcherSettings,
#                    tooltip="Open gene matching settings dialog")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.controlArea,
            self, "useReferenceData",
            ["All entities", "Reference set (input)"],
            tooltips=["Use entire genome (for gene set enrichment) or all " +
                      "available entities for reference",
                      "Use entities from Reference Examples input signal " +
                      "as reference"],
            box="Reference", callback=self.updateAnnotations)

        box = gui.widgetBox(self.controlArea, "Entity Sets")
        self.groupsWidget = QTreeWidget(self)
        self.groupsWidget.setHeaderLabels(["Category"])
        box.layout().addWidget(self.groupsWidget)

        hLayout = QHBoxLayout()
        hLayout.setSpacing(10)
        hWidget = gui.widgetBox(self.mainArea, orientation=hLayout)
        gui.spin(hWidget, self, "minClusterCount",
                 0, 100, label="Entities",
                 tooltip="Minimum entity count",
                 callback=self.filterAnnotationsChartView,
                 callbackOnReturn=True,
                 checked="useMinCountFilter",
                 checkCallback=self.filterAnnotationsChartView)

        pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            pvalfilterbox, self, "useMaxPValFilter", "p-value",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001,
            tooltip="Maximum p-value",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal")
        cb = gui.checkBox(
            fdrfilterbox, self, "useMaxFDRFilter", "FDR",
            callback=self.filterAnnotationsChartView)

        sp = gui.doubleSpin(
            fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001,
            tooltip="Maximum False discovery rate",
            callback=self.filterAnnotationsChartView,
            callbackOnReturn=True,
        )
        sp.setEnabled(self.useMaxFDRFilter)
        cb.toggled[bool].connect(sp.setEnabled)

        fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight)
        fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft)

        self.filterLineEdit = QLineEdit(
            self, placeholderText="Filter ...")

        self.filterCompleter = QCompleter(self.filterLineEdit)
        self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.filterCompleter)

        hLayout.addWidget(self.filterLineEdit)
        self.mainArea.layout().addWidget(hWidget)

        self.filterLineEdit.textChanged.connect(
            self.filterAnnotationsChartView)

        self.annotationsChartView = QTreeView(
            alternatingRowColors=True,
            sortingEnabled=True,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            editTriggers=QTreeView.NoEditTriggers,
        )
        self.annotationsChartView.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.annotationsChartView)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.annotationsChartView)
        self.annotationsChartView.header().installEventFilter(contextEventFilter)

        self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged)
        gui.auto_commit(self.controlArea, self, "autocommit", "Commit")

        self.setBlocking(True)

        task = EnsureDownloaded(
            [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME),
             (geneset.sfdomain, "index.pck")]
        )

        task.finished.connect(self.__initialize_finish)
        self.setStatusMessage("Initializing")
        self._executor = ThreadExecutor(
            parent=self, threadPool=QThreadPool(self))
        self._executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 600)

    def __initialize_finish(self):
        # Finalize the the widget's initialization (preferably after
        # ensuring all required databases have been downloaded.

        sets = geneset.list_all()
        taxids = set(taxonomy.common_taxids() +
                     list(filter(None, [tid for _, tid, _ in sets])))
        organisms = [(tid, name_or_none(tid)) for tid in taxids]
        organisms = [(tid, name) for tid, name in organisms
                     if name is not None]

        organisms = [(None, "None")] + sorted(organisms)
        taxids = [tid for tid, _ in organisms]
        names = [name for _, name in organisms]
        self.taxid_list = taxids

        self.speciesComboBox.clear()
        self.speciesComboBox.addItems(names)
        self.genesets = sets

        if self.taxid in self.taxid_list:
            taxid = self.taxid
        else:
            taxid = self.taxid_list[0]

        self.taxid = None
        self.setCurrentOrganism(taxid)
        self.setBlocking(False)
        self.__state = OWSetEnrichment.Ready
        self.setStatusMessage("")

    def setCurrentOrganism(self, taxid):
        """Set the current organism `taxid`."""
        if taxid not in self.taxid_list:
            taxid = self.taxid_list[min(self.speciesIndex,
                                        len(self.taxid_list) - 1)]
        if self.taxid != taxid:
            self.taxid = taxid
            self.speciesIndex = self.taxid_list.index(taxid)
            self.refreshHierarchy()
            self._invalidateGeneMatcher()
            self._invalidate()

    def currentOrganism(self):
        """Return the current organism taxid"""
        return self.taxid

    def __on_speciesIndexChanged(self):
        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = "< Do not look >"
        self.setCurrentOrganism(taxid)
        if self.__invalidated and self.data is not None:
            self.updateAnnotations()

    def clear(self):
        """Clear/reset the widget state."""
        self._cancelPending()
        self.state = None

        self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment

        self._clearView()

        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

        self.geneAttrComboBox.clear()
        self.geneAttrs = []
        self._updatesummary()

    def _cancelPending(self):
        """Cancel pending tasks."""
        if self.state is not None:
            self.state.results.cancel()
            self.state.namematcher.cancel()
            self.state.cancelled = True

    def _clearView(self):
        """Clear the enrichment report view (main area)."""
        if self.annotationsChartView.model() is not None:
            self.annotationsChartView.model().clear()

    def setData(self, data=None):
        """Set the input dataset with query gene names"""
        if self.__state & OWSetEnrichment.Initializing:
            self.__initialize_finish()

        self.error(0)
        self.closeContext()
        self.clear()

        self.groupsWidget.clear()
        self.data = data

        if data is not None:
            varlist = [var for var in data.domain.variables + data.domain.metas
                       if isinstance(var, Orange.data.StringVariable)]

            self.geneAttrs = varlist
            for var in varlist:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            oldtaxid = self.taxid
            self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1)

            taxid = data_hints.get_hint(data, "taxid", "")
            if taxid in self.taxid_list:
                self.speciesIndex = self.taxid_list.index(taxid)
                self.taxid = taxid

            self.genesinrows = data_hints.get_hint(
                data, "genesinrows", self.genesinrows)

            self.openContext(data)
            if oldtaxid != self.taxid:
                self.taxid = "< Do not look >"
                self.setCurrentOrganism(taxid)

            self.refreshHierarchy()
            self._invalidate()

    def setReference(self, data=None):
        """Set the (optional) input dataset with reference gene names."""
        self.referenceData = data
        self.referenceRadioBox.setEnabled(bool(data))
        if self.useReferenceData:
            self._invalidate()

    def handleNewSignals(self):
        if self.__invalidated:
            self.updateAnnotations()

    def _invalidateGeneMatcher(self):
        _, f = self.__genematcher
        f.cancel()
        self.__genematcher = (None, fulfill(gene.matcher([])))

    def _invalidate(self):
        self.__invalidated = True

    def genesFromTable(self, table):
        if self.genesinrows:
            genes = [attr.name for attr in table.domain.attributes]
        else:
            geneattr = self.geneAttrs[self.geneattr]
            genes = [str(ex[geneattr]) for ex in table]
        return genes

    def getHierarchy(self, taxid):
        def recursive_dict():
            return defaultdict(recursive_dict)
        collection = recursive_dict()

        def collect(col, hier):
            if hier:
                collect(col[hier[0]], hier[1:])

        for hierarchy, t_id, _ in self.genesets:
            collect(collection[t_id], hierarchy)

        return (taxid, collection[taxid]), (None, collection[None])

    def setHierarchy(self, hierarchy, hierarchy_noorg):
        self.groupsWidgetItems = {}

        def fill(col, parent, full=(), org=""):
            for key, value in sorted(col.items()):
                full_cat = full + (key,)
                item = QTreeWidgetItem(parent, [key])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable |
                              Qt.ItemIsSelectable | Qt.ItemIsEnabled)
                if value:
                    item.setFlags(item.flags() | Qt.ItemIsTristate)

                checked = self.categoriesCheckState.get(
                    (full_cat, org), Qt.Checked)
                item.setData(0, Qt.CheckStateRole, checked)
                item.setExpanded(True)
                item.category = full_cat
                item.organism = org
                self.groupsWidgetItems[full_cat] = item
                fill(value, item, full_cat, org=org)

        self.groupsWidget.clear()
        fill(hierarchy[1], self.groupsWidget, org=hierarchy[0])
        fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0])

    def refreshHierarchy(self):
        self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex]))

    def selectedCategories(self):
        """
        Return a list of currently selected hierarchy keys.

        A key is a tuple of identifiers from the root to the leaf of
        the hierarchy tree.
        """
        return [key for key, check in self.getHierarchyCheckState().items()
                if check == Qt.Checked]

    def getHierarchyCheckState(self):
        def collect(item, full=()):
            checked = item.checkState(0)
            name = str(item.data(0, Qt.DisplayRole))
            full_cat = full + (name,)
            result = [((full_cat, item.organism), checked)]
            for i in range(item.childCount()):
                result.extend(collect(item.child(i), full_cat))
            return result

        items = [self.groupsWidget.topLevelItem(i)
                 for i in range(self.groupsWidget.topLevelItemCount())]
        states = itertools.chain(*(collect(item) for item in items))
        return dict(states)

    def subsetSelectionChanged(self, item, column):
        # The selected geneset (hierarchy) subset has been changed by the
        # user. Update the displayed results.
        # Update the stored state (persistent settings)
        self.categoriesCheckState = self.getHierarchyCheckState()
        categories = self.selectedCategories()

        if self.data is not None:
            if self._nogenematching() or \
                    not set(categories) <= set(self.currentAnnotatedCategories):
                self.updateAnnotations()
            else:
                self.filterAnnotationsChartView()

    def updateGeneMatcherSettings(self):
        raise NotImplementedError

        from .OWGOEnrichmentAnalysis import GeneMatcherDialog
        dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True)
        if dialog.exec_():
            self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items]
            self._invalidateGeneMatcher()
            if self.data is not None:
                self.updateAnnotations()

    def _genematcher(self):
        """
        Return a Future[gene.SequenceMatcher]
        """
        taxid = self.taxid_list[self.speciesIndex]

        current, matcher_f = self.__genematcher

        if taxid == current and \
                not matcher_f.cancelled():
            return matcher_f

        self._invalidateGeneMatcher()

        if taxid is None:
            self.__genematcher = (None, fulfill(gene.matcher([])))
            return self.__genematcher[1]

        matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy]
        matchers = [m for m, use in zip(matchers, self.geneMatcherSettings)
                    if use]

        def create():
            return gene.matcher([m(taxid) for m in matchers])

        matcher_f = self._executor.submit(create)
        self.__genematcher = (taxid, matcher_f)
        return self.__genematcher[1]

    def _nogenematching(self):
        return self.taxid is None or not any(self.geneMatcherSettings)

    def updateAnnotations(self):
        if self.data is None:
            return

        assert not self.__state & OWSetEnrichment.Initializing
        self._cancelPending()
        self._clearView()

        self.information(0)
        self.warning(0)
        self.error(0)

        if not self.genesinrows and len(self.geneAttrs) == 0:
            self.error(0, "Input data contains no columns with gene names")
            return

        self.__state = OWSetEnrichment.RunningEnrichment

        taxid = self.taxid_list[self.speciesIndex]
        self.taxid = taxid

        categories = self.selectedCategories()

        clusterGenes = self.genesFromTable(self.data)

        if self.referenceData is not None and self.useReferenceData:
            referenceGenes = self.genesFromTable(self.referenceData)
        else:
            referenceGenes = None

        self.currentAnnotatedCategories = categories

        genematcher = self._genematcher()

        self.progressBarInit()

        ## Load collections in a worker thread
        # TODO: Use cached collections if already loaded and
        # use ensure_genesetsdownloaded with progress report (OWSelectGenes)
        collections = self._executor.submit(geneset.collections, *categories)

        def refset_null():
            """Return the default background reference set"""
            col = collections.result()
            return reduce(operator.ior, (set(g.genes) for g in col), set())

        def refset_ncbi():
            """Return all NCBI gene names"""
            geneinfo = gene.NCBIGeneInfo(taxid)
            return set(geneinfo.keys())

        def namematcher():
            matcher = genematcher.result()
            match = matcher.set_targets(ref_set.result())
            match.umatch = memoize(match.umatch)
            return match

        def map_unames():
            matcher = namematcher.result()
            query = list(filter(None, map(matcher.umatch, querynames)))
            reference = list(filter(None, map(matcher.umatch, ref_set.result())))
            return query, reference

        if self._nogenematching():
            if referenceGenes is None:
                ref_set = self._executor.submit(refset_null)
            else:
                ref_set = fulfill(referenceGenes)
        else:
            if referenceGenes == None:
                ref_set = self._executor.submit(refset_ncbi)
            else:
                ref_set = fulfill(referenceGenes)

        namematcher = self._executor.submit(namematcher)
        querynames = clusterGenes

        state = types.SimpleNamespace()
        state.query_set = clusterGenes
        state.reference_set = referenceGenes
        state.namematcher = namematcher
        state.query_count = len(set(clusterGenes))
        state.reference_count = (len(set(referenceGenes))
                                 if referenceGenes is not None else None)

        state.cancelled = False

        progress = methodinvoke(self, "_setProgress", (float,))
        info = methodinvoke(self, "_setRunInfo", (str,))

        @withtraceback
        def run():
            info("Loading data")
            match = namematcher.result()
            query, reference = map_unames()
            gscollections = collections.result()

            results = []
            info("Running enrichment")
            p = 0
            for i, gset in enumerate(gscollections):
                genes = set(filter(None, map(match.umatch, gset.genes)))
                enr = set_enrichment(genes, reference, query)
                results.append((gset, enr))

                if state.cancelled:
                    raise UserInteruptException

                pnew = int(100 * i / len(gscollections))
                if pnew != p:
                    progress(pnew)
                    p = pnew
            progress(100)
            info("")
            return query, reference, results

        task = Task(function=run)
        task.resultReady.connect(self.__on_enrichment_finished)
        task.exceptionReady.connect(self.__on_enrichment_failed)
        result = self._executor.submit(task)
        state.results = result

        self.state = state
        self._updatesummary()

    def __on_enrichment_failed(self, exception):
        if not isinstance(exception, UserInteruptException):
            print("ERROR:", exception, file=sys.stderr)
            print(exception._traceback, file=sys.stderr)

        self.progressBarFinished()
        self.setStatusMessage("")
        self.__state &= ~OWSetEnrichment.RunningEnrichment

    def __on_enrichment_finished(self, results):
        assert QThread.currentThread() is self.thread()
        self.__state &= ~OWSetEnrichment.RunningEnrichment

        query, reference, results = results

        if self.annotationsChartView.model():
            self.annotationsChartView.model().clear()

        nquery = len(query)
        nref = len(reference)
        maxcount = max((len(e.query_mapped) for _, e in results),
                       default=1)
        maxrefcount = max((len(e.reference_mapped) for _, e in results),
                          default=1)
        nspaces = int(math.ceil(math.log10(maxcount or 1)))
        refspaces = int(math.ceil(math.log(maxrefcount or 1)))
        query_fmt = "%" + str(nspaces) + "s  (%.2f%%)"
        ref_fmt = "%" + str(refspaces) + "s  (%.2f%%)"

        def fmt_count(fmt, count, total):
            return fmt % (count, 100.0 * count / (total or 1))

        fmt_query_count = partial(fmt_count, query_fmt)
        fmt_ref_count = partial(fmt_count, ref_fmt)

        linkFont = QFont(self.annotationsChartView.viewOptions().font)
        linkFont.setUnderline(True)

        def item(value=None, tooltip=None, user=None):
            si = QStandardItem()
            if value is not None:
                si.setData(value, Qt.DisplayRole)
            if tooltip is not None:
                si.setData(tooltip, Qt.ToolTipRole)
            if user is not None:
                si.setData(user, Qt.UserRole)
            else:
                si.setData(value, Qt.UserRole)
            return si

        model = QStandardItemModel()
        model.setSortRole(Qt.UserRole)
        model.setHorizontalHeaderLabels(
            ["Category", "Term", "Count", "Reference count", "p-value",
             "FDR", "Enrichment"])
        for i, (gset, enrich) in enumerate(results):
            if len(enrich.query_mapped) == 0:
                continue
            nquery_mapped = len(enrich.query_mapped)
            nref_mapped = len(enrich.reference_mapped)

            row = [
                item(", ".join(gset.hierarchy)),
                item(gsname(gset), tooltip=gset.link),
                item(fmt_query_count(nquery_mapped, nquery),
                     tooltip=nquery_mapped, user=nquery_mapped),
                item(fmt_ref_count(nref_mapped, nref),
                     tooltip=nref_mapped, user=nref_mapped),
                item(fmtp(enrich.p_value), user=enrich.p_value),
                item(),  # column 5, FDR, is computed in filterAnnotationsChartView
                item(enrich.enrichment_score,
                     tooltip="%.3f" % enrich.enrichment_score,
                     user=enrich.enrichment_score)
            ]
            row[0].geneset = gset
            row[0].enrichment = enrich
            row[1].setData(gset.link, gui.LinkRole)
            row[1].setFont(linkFont)
            row[1].setForeground(QColor(Qt.blue))

            model.appendRow(row)

        self.annotationsChartView.setModel(model)
        self.annotationsChartView.selectionModel().selectionChanged.connect(
            self.commit
        )

        if not model.rowCount():
            self.warning(0, "No enriched sets found.")
        else:
            self.warning(0)

        allnames = set(gsname(geneset)
                       for geneset, (count, _, _, _) in results if count)

        allnames |= reduce(operator.ior,
                           (set(word_split(name)) for name in allnames),
                           set())

        self.filterCompleter.setModel(None)
        self.completerModel = QStringListModel(sorted(allnames))
        self.filterCompleter.setModel(self.completerModel)

        if results:
            max_score = max((e.enrichment_score for _, e in results
                             if np.isfinite(e.enrichment_score)),
                            default=1)

            self.annotationsChartView.setItemDelegateForColumn(
                6, BarItemDelegate(self, scale=(0.0, max_score))
            )

        self.annotationsChartView.setItemDelegateForColumn(
            1, gui.LinkStyledItemDelegate(self.annotationsChartView)
        )

        header = self.annotationsChartView.header()
        for i in range(model.columnCount()):
            sh = self.annotationsChartView.sizeHintForColumn(i)
            sh = max(sh, header.sectionSizeHint(i))
            self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30))
#             self.annotationsChartView.resizeColumnToContents(i)

        self.filterAnnotationsChartView()

        self.progressBarFinished()
        self.setStatusMessage("")

    def _updatesummary(self):
        state = self.state
        if state is None:
            self.error(0,)
            self.warning(0)
            self.infoBox.setText("No data on input.\n")
            return

        text = "{.query_count} unique names on input\n".format(state)

        if state.results.done() and not state.results.exception():
            mapped, _, _ = state.results.result()
            ratio_mapped = (len(mapped) / state.query_count
                            if state.query_count else 0)
            text += ("%i (%.1f%%) gene names matched" %
                     (len(mapped), 100.0 * ratio_mapped))
        elif not state.results.done():
            text += "..."
        else:
            text += "<Error {}>".format(str(state.results.exception()))
        self.infoBox.setText(text)

        # TODO: warn on no enriched sets found (i.e no query genes
        # mapped to any set)

    def filterAnnotationsChartView(self, filterString=""):
        if self.__state & OWSetEnrichment.RunningEnrichment:
            return

        # TODO: Move filtering to a filter proxy model
        # TODO: Re-enable string search

        categories = set(", ".join(cat)
                         for cat, _ in self.selectedCategories())

#         filterString = str(self.filterLineEdit.text()).lower()

        model = self.annotationsChartView.model()

        def ishidden(index):
            # Is item at index (row) hidden
            item = model.item(index)
            item_cat = item.data(Qt.DisplayRole)
            return item_cat not in categories

        hidemask = [ishidden(i) for i in range(model.rowCount())]

        # compute FDR according the selected categories
        pvals = [model.item(i, 4).data(Qt.UserRole)
                 for i, hidden in enumerate(hidemask) if not hidden]
        fdrs = utils.stats.FDR(pvals)

        # update FDR for the selected collections and apply filtering rules
        itemsHidden = []
        fdriter = iter(fdrs)
        for index, hidden in enumerate(hidemask):
            if not hidden:
                fdr = next(fdriter)
                pval = model.index(index, 4).data(Qt.UserRole)
                count = model.index(index, 2).data(Qt.ToolTipRole)

                hidden = (self.useMinCountFilter and count < self.minClusterCount) or \
                         (self.useMaxPValFilter and pval > self.maxPValue) or \
                         (self.useMaxFDRFilter and fdr > self.maxFDR)

                if not hidden:
                    fdr_item = model.item(index, 5)
                    fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole)
                    fdr_item.setData(fmtp(fdr), Qt.DisplayRole)
                    fdr_item.setData(fdr, Qt.UserRole)

            self.annotationsChartView.setRowHidden(
                index, QModelIndex(), hidden)

            itemsHidden.append(hidden)

        if model.rowCount() and all(itemsHidden):
            self.information(0, "All sets were filtered out.")
        else:
            self.information(0)

        self._updatesummary()

    @Slot(float)
    def _setProgress(self, value):
        assert QThread.currentThread() is self.thread()
        self.progressBarSet(value, processEvents=None)

    @Slot(str)
    def _setRunInfo(self, text):
        self.setStatusMessage(text)

    def commit(self):
        if self.data is None or \
                self.__state & OWSetEnrichment.RunningEnrichment:
            return

        model = self.annotationsChartView.model()
        rows = self.annotationsChartView.selectionModel().selectedRows(0)
        selected = [model.item(index.row(), 0) for index in rows]
        mapped = reduce(operator.ior,
                        (set(item.enrichment.query_mapped)
                         for item in selected),
                        set())
        assert self.state.namematcher.done()
        matcher = self.state.namematcher.result()

        axis = 1 if self.genesinrows else 0
        if axis == 1:
            mapped = [attr for attr in self.data.domain.attributes
                      if matcher.umatch(attr.name) in mapped]

            newdomain = Orange.data.Domain(
                mapped, self.data.domain.class_vars, self.data.domain.metas)
            data = self.data.from_table(newdomain, self.data)
        else:
            geneattr = self.geneAttrs[self.geneattr]
            selected = [i for i, ex in enumerate(self.data)
                        if matcher.umatch(str(ex[geneattr])) in mapped]
            data = self.data[selected]
        self.send("Data subset", data)

    def onDeleteWidget(self):
        if self.state is not None:
            self._cancelPending()
            self.state = None
        self._executor.shutdown(wait=False)
Пример #12
0
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin):
    name = "GEO Data Sets"
    description = "Access to Gene Expression Omnibus data sets."
    icon = "icons/OWGEODatasets.svg"
    priority = 2

    class Outputs:
        gds_data = Output("Expression Data", Table)

    search_pattern = Setting('')
    auto_commit = Setting(True)
    genes_as_rows = Setting(False)
    mergeSpots = Setting(True)
    selected_gds = Setting(None)
    gdsSelectionStates = Setting({})
    splitter_settings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01',
    ))

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.gds_info: Optional[GDSInfo] = GDSInfo(
        )  # TODO: handle possible exceptions
        self.gds_data: Optional[Table] = None

        # Control area
        box = widgetBox(self.controlArea, 'Info', addSpace=True)
        self.infoBox = widgetLabel(box, 'Initializing\n\n')

        box = widgetBox(self.controlArea, 'Output', addSpace=True)
        radioButtonsInBox(box,
                          self,
                          'genes_as_rows',
                          ['Samples in rows', 'Genes in rows'],
                          callback=self._run)
        separator(box)

        rubber(self.controlArea)
        auto_commit(self.controlArea,
                    self,
                    'auto_commit',
                    '&Commit',
                    box=False)

        # Main Area

        # Filter widget
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self._apply_filter)
        self.mainArea.layout().addWidget(self.filter)

        splitter_vertical = QSplitter(Qt.Vertical, self.mainArea)

        self.mainArea.layout().addWidget(splitter_vertical)

        # set table view
        self.table_view = QTableView(splitter_vertical)
        self.table_view.setShowGrid(False)
        self.table_view.setSortingEnabled(True)
        self.table_view.sortByColumn(1, Qt.AscendingOrder)
        self.table_view.setAlternatingRowColors(True)
        self.table_view.verticalHeader().setVisible(False)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.table_view.setSelectionMode(QAbstractItemView.SingleSelection)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSizeAdjustPolicy(
            QAbstractScrollArea.AdjustToContents)

        self.table_model = GEODatasetsModel()
        self.table_model.initialize(self.gds_info)
        self.table_view.setModel(self.table_model)
        self.table_model.show_table()

        self.table_view.horizontalHeader().setStretchLastSection(True)
        self.table_view.resizeColumnsToContents()

        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)

        # set item delegates
        self.table_view.setItemDelegateForColumn(
            self.table_model.pubmedid_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.gds_id_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.indicator_col,
            IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole),
        )

        splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical)

        # Description Widget
        box = widgetBox(splitter_horizontal, 'Description')
        self.description_widget = widgetLabel(box, '')
        self.description_widget.setWordWrap(True)
        rubber(box)

        # Sample Annotations Widget
        box = widgetBox(splitter_horizontal, 'Sample Annotations')
        self.annotations_widget = QTreeWidget(box)
        self.annotations_widget.setHeaderLabels(
            ['Type (Sample annotations)', 'Sample count'])
        self.annotations_widget.setRootIsDecorated(True)
        box.layout().addWidget(self.annotations_widget)
        self._annotations_updating = False
        self.annotations_widget.itemChanged.connect(
            self.on_annotation_selection_changed)
        self.splitters = splitter_vertical, splitter_horizontal

        for sp, setting in zip(self.splitters, self.splitter_settings):
            sp.splitterMoved.connect(self._splitter_moved)
            sp.restoreState(setting)

        self.table_view.selectionModel().selectionChanged.connect(
            self.on_gds_selection_changed)
        self._apply_filter()

        self._run()

    def _splitter_moved(self, *args):
        self.splitter_settings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def _set_description_widget(self):
        self.description_widget.setText(
            self.selected_gds.get('description', 'Description not available.'))

    def _set_annotations_widget(self, gds):
        self._annotations_updating = True
        self.annotations_widget.clear()

        annotations = defaultdict(set)
        subsets_count = {}

        for desc in gds['subsets']:
            annotations[desc['type']].add(desc['description'])
            subsets_count[desc['description']] = str(len(desc['sample_id']))

        for _type, subsets in annotations.items():
            key = (gds["name"], _type)
            parent = QTreeWidgetItem(self.annotations_widget, [_type])
            parent.key = key
            for subset in subsets:
                key = (gds['name'], _type, subset)
                item = QTreeWidgetItem(
                    parent, [subset, subsets_count.get(subset, '')])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key

        self._annotations_updating = False
        self.annotations_widget.expandAll()
        for i in range(self.annotations_widget.columnCount()):
            self.annotations_widget.resizeColumnToContents(i)

    def _set_selection(self):
        if self.selected_gds is not None:
            index = self.table_model.get_row_index(
                self.selected_gds.get('name'))
            if index is not None:
                self.table_view.selectionModel().blockSignals(True)
                self.table_view.selectRow(index)
                self._handle_selection_changed()
                self.table_view.selectionModel().blockSignals(False)

    def _handle_selection_changed(self):
        if self.table_model.table is not None:
            selection = self.table_view.selectionModel().selectedRows(
                self.table_model.gds_id_col)
            selected_gds_name = selection[0].data(
            ) if len(selection) > 0 else None

            if selected_gds_name:
                self.selected_gds = self.table_model.info.get(
                    selected_gds_name)
                self._set_annotations_widget(self.selected_gds)
                self._set_description_widget()
            else:
                self.annotations_widget.clear()
                self.description_widget.clear()

            self.update_info()

    def _apply_filter(self):
        if self.table_model.table is not None:
            self.table_model.show_table(
                filter_pattern=str(self.search_pattern))
            self._set_selection()
            self.update_info()

    def _run(self):
        if self.selected_gds is not None:
            self.gds_data = None
            self.start(run_download_task, self.selected_gds.get('name'),
                       self.get_selected_samples(), self.genes_as_rows)

    def on_gds_selection_changed(self):
        self._handle_selection_changed()
        self._run()

    def on_annotation_selection_changed(self):
        if self._annotations_updating:
            return
        for i in range(self.annotations_widget.topLevelItemCount()):
            item = self.annotations_widget.topLevelItem(i)
            if 'key' in item.__dict__:
                self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                if 'key' in child.__dict__:
                    self.gdsSelectionStates[child.key] = child.checkState(0)

        self._run()

    def update_info(self):
        all_gds = len(self.table_model.info)
        text = "{} datasets\n{} datasets cached\n".format(
            all_gds, len(local_files.listfiles()))
        filtered = self.table_view.model().rowCount()
        if all_gds != filtered:
            text += "{} after filtering".format(filtered)
        self.infoBox.setText(text)

    def get_selected_samples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        TODO: this could probably be simplified.

        """
        def childiter(item):
            """ Iterate over the children of an QTreeWidgetItem instance.
            """
            for i in range(item.childCount()):
                yield item.child(i)

        samples = []
        unused_types = []
        used_types = []

        for stype in childiter(self.annotations_widget.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        _samples = defaultdict(list)
        for sample, sample_type in samples:
            _samples[sample].append(sample_type)
        return _samples

    def commit(self):
        self.Outputs.gds_data.send(self.gds_data)

    def on_done(self, result: Result):
        assert isinstance(result.gds_dataset, Table)
        self.gds_data = result.gds_dataset
        self.commit()

        if self.gds_info:
            self.table_model.initialize(self.gds_info)
            self._apply_filter()

    def on_partial_result(self, result: Any) -> None:
        pass

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def send_report(self):
        self.report_items(
            "GEO Dataset",
            [
                ("ID", self.selected_gds['name']),
                ("Title", self.selected_gds['title']),
                ("Organism", self.selected_gds['sample_organism']),
            ],
        )
        self.report_items(
            "Data",
            [
                ("Samples", self.selected_gds['sample_count']),
                ("Features", self.selected_gds['variables']),
                ("Genes", self.selected_gds['genes']),
            ],
        )
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.selected_gds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for _type in subsets:
            self.report_html += "<b>" + _type + ":</b></br>"
            for desc, count in subsets[_type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"
Пример #13
0
class OWPIPAx(widget.OWWidget):
    name = "PIPAx"
    description = "Access data from PIPA RNA-Seq database."
    icon = "../widgets/icons/PIPA.svg"
    priority = 35

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("")
    password = settings.Setting("")

    log2 = settings.Setting(False)
    rtypei = settings.Setting(5)  # hardcoded rpkm mapability polya
    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    #: The stored current selection (in experiments view)
    #: SelectionByKey | None
    currentSelection = settings.Setting(None)
    #: Stored selections (presets)
    #: list of SelectionByKey
    storedSelections = settings.Setting([])
    #: Stored column sort keys (from Sort view)
    #: list of strings
    storedSortingOrder = settings.Setting(
        ["Strain", "Experiment", "Genotype", "Timepoint"])

    experimentsHeaderState = settings.Setting(
        {name: False
         for _, name in HEADER[:ID_INDEX + 1]})

    def __init__(self, parent=None, signalManager=None, name="PIPAx"):
        super().__init__(parent)

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.result_types = []
        self.mappings = {}

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        gui.button(self.controlArea, self, "Reload", callback=self.Reload)
        gui.button(self.controlArea,
                   self,
                   "Clear cache",
                   callback=self.clear_cache)

        b = gui.widgetBox(self.controlArea, "Experiment Sets")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred,
                                               QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections

        self.selectionSetsWidget.selectionModified.connect(store_selections)
        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred,
                                                QSizePolicy.Maximum)

        def store_sort_order():
            self.storedSortingOrder = self.columnsSortingWidget.sortingOrder

        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)
        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(box,
                                              self,
                                              "rtypei",
                                              items=[],
                                              callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        self.commit_button = gui.button(self.controlArea,
                                        self,
                                        "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Authentication")

        gui.lineEdit(box,
                     self,
                     "username",
                     "Username:"******"password",
                                  "Password:"******"searchString",
                     "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget)

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel())

        self.mainArea.layout().addWidget(self.experimentsWidget)

        # Restore the selection states from the stored settings
        self.selectionSetsWidget.selections = self.storedSelections
        self.columnsSortingWidget.sortingOrder = self.storedSortingOrder

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.UpdateExperiments)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        self.UpdateExperiments(reload=True)

    def Connect(self):
        self.error(1)
        self.warning(1)

        def en(x):
            return x if len(x) else None

        self.dbc = dicty.PIPAx(cache=self.buffer,
                               username=en(self.username),
                               password=self.password)

        # check password
        if en(self.username) != None:
            try:
                self.dbc.mappings(reload=True)
            except dicty.AuthenticationError:
                self.error(1, "Wrong username or password")
                self.dbc = None
            except Exception as ex:
                print("Error when contacting the PIPA database", ex)
                sys.excepthook(*sys.exc_info())
                try:  # maybe cached?
                    self.dbc.mappings()
                    self.warning(
                        1, "Can not access database - using cached data.")
                except Exception as ex:
                    self.dbc = None
                    self.error(1, "Can not access database.")

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei][0]
        else:
            return "-1"

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [desc for _, desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateExperiments(self, reload=False):
        self.experimentsWidget.clear()
        self.items = []

        self.progressBarInit()

        if not self.dbc:
            self.Connect()

        mappings = {}
        result_types = []
        sucind = False  # success indicator for database index

        try:
            mappings = self.dbc.mappings(reload=reload)
            result_types = self.dbc.result_types(reload=reload)
            sucind = True
        except Exception as ex:
            try:
                mappings = self.dbc.mappings()
                result_types = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception as ex:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.mappings = mappings
        self.result_types = result_types

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(
                self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def UpdateResultsList(self, reload=False):

        results_list = {}
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception as ex:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception as ex:
                self.error(0, "Can not access database.")

        self.results_list = results_list
        mappings_key_dict = dict(((m["data_id"], m["id"]), key) \
                                 for key, m in self.mappings.items())

        def mapping_unique_id(annot):
            """Map annotations dict from results_list to unique
            `mappings` ids.
            """
            data_id, mappings_id = annot["data_id"], annot["mappings_id"]
            return mappings_key_dict[data_id, mappings_id]

        elements = []

        # softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(10))
            items_shown[c] = i

        items_to_show = dict((mapping_unique_id(annot), annot)
                             for annot in self.results_list.values())

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(10)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [
            it for i, it in enumerate(self.items) if i not in delete_ind
        ]

        for r_annot in [items_to_show[i] for i in add_items]:
            d = defaultdict(lambda: "?", r_annot)
            row_items = [""] + [d.get(key, "?") for key, _ in HEADER[1:]]
            try:
                time_dict = literal_eval(row_items[DATE_INDEX])
                date_rna = date(
                    time_dict["fullYearUTC"],
                    time_dict["monthUTC"] + 1,  # Why is month 0 based?
                    time_dict["dateUTC"])
                row_items[DATE_INDEX] = date_rna.strftime("%x")
            except Exception:
                row_items[DATE_INDEX] = ''

            row_items[ID_INDEX] = mapping_unique_id(r_annot)
            elements.append(row_items)

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)

            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        # which is the ok buffer version
        # FIXME: what attribute to use for version?
        self.wantbufver = \
            lambda x, ad=self.results_list: \
            defaultdict(lambda: "?", ad[x])["date"]

        self.wantbufver = lambda x: "0"

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:
            fn = self.dbc.download_key_function()
            result_id_key = dict(((m["data_id"], m["mappings_id"]), key) \
                                 for key, m in self.results_list.items())

            for item in self.items:
                c = str(item.text(10))
                mapping = self.mappings[c]
                data_id, mappings_id = mapping["data_id"], mapping["id"]
                r_id = result_id_key[data_id, mappings_id]
                # Get the buffered version
                buffered = self.dbc.inBuffer(fn(r_id))
                value = " " if buffered == self.wantbufver(r_id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(not all(s in item \
                                   for s in self.searchString.split())
                           )

    def Commit(self):
        if not self.dbc:
            self.Connect()

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(10))
            annots = self.mappings[unique_id]
            ids.append((annots["data_id"], annots["id"]))

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = dict((name, key) for key, name in HEADER)

        hview = self.experimentsWidget.header()
        shownHeaders = [label for i, label in \
                        list(enumerate(self.headerLabels))[1:] \
                        if not hview.isSectionHidden(i)
                        ]

        allowed_labels = [reverse_header_dict.get(label, label) \
                          for label in shownHeaders]

        if self.joinreplicates and "id" not in allowed_labels:
            # need 'id' labels in join_replicates for attribute names
            allowed_labels.append("id")

        if len(ids):
            table = self.dbc.get_data(
                ids=ids,
                result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                #                          bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                                              ignorenames=[
                                                  "replicate", "data_id",
                                                  "mappings_id", "data_name",
                                                  "id", "unique_id"
                                              ],
                                              namefn=None,
                                              avg=dicty.median)

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(
                        atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(attributes, table.domain.class_var,
                                        table.domain.metas)
            table = table.from_table(domain, table)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(1, 2, 3, 10))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
Пример #14
0
class OWWordEnrichment(OWWidget, ConcurrentWidgetMixin):
    # Basic widget info
    name = "Word Enrichment"
    description = "Word enrichment analysis for selected documents."
    icon = "icons/SetEnrichment.svg"
    priority = 600

    # Input/output
    class Inputs:
        selected_data = Input("Selected Data", Table)
        data = Input("Data", Table)

    want_main_area = True

    class Error(OWWidget.Error):
        no_bow_features = Msg('No bag-of-words features!')
        no_words_overlap = Msg('No words overlap!')
        empty_selection = Msg('Selected data is empty!')
        all_selected = Msg('All examples can not be selected!')

    # Settings
    filter_by_p: bool = Setting(False)
    filter_p_value: float = Setting(0.01)
    filter_by_fdr: bool = Setting(True)
    filter_fdr_value: float = Setting(0.2)

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        # Init data
        self.data = None
        self.selected_data = None
        # used for transforming the 'selected data' into the 'data' domain
        self.selected_data_transformed = None

        self.results = Result()

        # info box
        fbox = gui.widgetBox(self.controlArea, "Info")
        self.info_fil = gui.label(fbox, self, 'Words displayed: 0')

        # Filtering settings
        fbox = gui.widgetBox(self.controlArea, "Filter")
        hbox = gui.widgetBox(fbox, orientation=0)

        self.chb_p = gui.checkBox(hbox,
                                  self,
                                  "filter_by_p",
                                  "p-value",
                                  callback=self.filter_and_display,
                                  tooltip="Filter by word p-value")
        self.spin_p = gui.doubleSpin(hbox,
                                     self,
                                     'filter_p_value',
                                     1e-4,
                                     1,
                                     step=1e-4,
                                     labelWidth=15,
                                     callback=self.filter_and_display,
                                     tooltip="Max p-value for word")
        self.spin_p.setEnabled(self.filter_by_p)

        hbox = gui.widgetBox(fbox, orientation=0)
        self.chb_fdr = gui.checkBox(hbox,
                                    self,
                                    "filter_by_fdr",
                                    "FDR",
                                    callback=self.filter_and_display,
                                    tooltip="Filter by word FDR")
        self.spin_fdr = gui.doubleSpin(hbox,
                                       self,
                                       'filter_fdr_value',
                                       1e-4,
                                       1,
                                       step=1e-4,
                                       labelWidth=15,
                                       callback=self.filter_and_display,
                                       tooltip="Max p-value for word")
        self.spin_fdr.setEnabled(self.filter_by_fdr)
        gui.rubber(self.controlArea)

        # Word's list view
        self.cols = ['Word', 'p-value', 'FDR']
        self.sig_words = QTreeWidget()
        self.sig_words.setColumnCount(len(self.cols))
        self.sig_words.setHeaderLabels(self.cols)
        self.sig_words.setSortingEnabled(True)
        self.sig_words.setSelectionMode(QTreeView.NoSelection)
        self.sig_words.sortByColumn(2, 0)  # 0 is ascending order
        for i in range(len(self.cols)):
            self.sig_words.resizeColumnToContents(i)
        self.mainArea.layout().addWidget(self.sig_words)

    def sizeHint(self):
        return QSize(450, 240)

    @Inputs.data
    def set_data(self, data=None):
        self.data = data
        # selected data transformed depends on data domain
        self.selected_data_transformed = None

    @Inputs.selected_data
    def set_data_selected(self, data=None):
        self.selected_data = data

    def handleNewSignals(self):
        self.check_data()

    def get_bow_domain(self):
        domain = self.data.domain
        return Domain(attributes=[
            a for a in domain.attributes
            if a.attributes.get('bow-feature', False)
        ],
                      class_vars=domain.class_vars,
                      metas=domain.metas,
                      source=domain)

    def check_data(self):
        self.Error.clear()
        if isinstance(self.data, Table) and \
                isinstance(self.selected_data, Table):
            if len(self.selected_data) == 0:
                self.Error.empty_selection()
                self.clear()
                return

            # keep only BoW features
            bow_domain = self.get_bow_domain()
            if len(bow_domain.attributes) == 0:
                self.Error.no_bow_features()
                self.clear()
                return
            self.data = Corpus.from_table(bow_domain, self.data)
            self.selected_data_transformed = Corpus.from_table(
                bow_domain, self.selected_data)

            if np_sp_sum(self.selected_data_transformed.X) == 0:
                self.Error.no_words_overlap()
                self.clear()
            elif len(self.data) == len(self.selected_data):
                self.Error.all_selected()
                self.clear()
            else:
                self.set_input_info()
                self.apply()
        else:
            self.clear()

    def clear(self):
        self.sig_words.clear()
        self.info.set_input_summary(self.info.NoInput)
        self.set_displayed_info(0)

    def filter_enabled(self, b):
        self.chb_p.setEnabled(b)
        self.chb_fdr.setEnabled(b)
        self.spin_p.setEnabled(b)
        self.spin_fdr.setEnabled(b)

    def filter_and_display(self):
        self.spin_p.setEnabled(self.filter_by_p)
        self.spin_fdr.setEnabled(self.filter_by_fdr)
        self.sig_words.clear()

        if self.selected_data_transformed is None:  # do nothing when no Data
            return

        if self.results.words:
            count = self.build_tree()
        else:
            count = 0

        for i in range(len(self.cols)):
            self.sig_words.resizeColumnToContents(i)
        self.set_displayed_info(count)

    def build_tree(self) -> int:
        count = 0
        for word, pval, fval in zip(self.results.words, self.results.p_values,
                                    self.results.fdr_values):
            if ((not self.filter_by_p or pval <= self.filter_p_value) and
                (not self.filter_by_fdr or fval <= self.filter_fdr_value)):
                it = EATreeWidgetItem(word, pval, fval, self.sig_words)
                self.sig_words.addTopLevelItem(it)
                count += 1
        return count

    def set_input_info(self) -> None:
        cluster_words = len(self.selected_data_transformed.domain.attributes)
        selected_words = np.count_nonzero(
            np_sp_sum(self.selected_data_transformed.X, axis=0))

        self.info.set_input_summary(
            f"{cluster_words}|{selected_words}",
            f"Total words: {cluster_words}\n"
            f"Words in subset: {selected_words}")

    def set_displayed_info(self, count: int) -> None:
        self.info_fil.setText(f"Words displayed: {count}")

    def apply(self):
        self.sig_words.clear()
        self.filter_enabled(False)
        self.start(Runner.run, self.selected_data_transformed, self.data,
                   self.results)

    def on_done(self, result: Result) -> None:
        self.filter_and_display()
        self.filter_enabled(True)

    def on_exception(self, ex: Exception) -> None:
        self.filter_enabled(True)

    def tree_to_table(self):
        view = [self.cols]
        items = self.sig_words.topLevelItemCount()
        for i in range(items):
            line = []
            for j in range(3):
                line.append(self.sig_words.topLevelItem(i).text(j))
            view.append(line)
        return view

    def send_report(self):
        if self.results.words:
            self.report_table("Enriched words", self.tree_to_table())