def __init__(self): super().__init__() # Init data self.data = None self.selected_data = None self.selected_data_transformed = None # used for transforming the 'selected data' into the 'data' domain self.words = [] self.p_values = [] self.fdr_values = [] # Info section fbox = gui.widgetBox(self.controlArea, "Info") self.info_all = gui.label(fbox, self, 'Cluster words:') self.info_sel = gui.label(fbox, self, 'Selected words:') self.info_fil = gui.label(fbox, self, 'After filtering:') # Filtering settings fbox = gui.widgetBox(self.controlArea, "Filter") hbox = gui.widgetBox(fbox, orientation=0) self.chb_p = gui.checkBox(hbox, self, "filter_by_p", "p-value", callback=self.filter_and_display, tooltip="Filter by word p-value") self.spin_p = gui.doubleSpin(hbox, self, 'filter_p_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, callbackOnReturn=True, tooltip="Max p-value for word") self.spin_p.setEnabled(self.filter_by_p) hbox = gui.widgetBox(fbox, orientation=0) self.chb_fdr = gui.checkBox(hbox, self, "filter_by_fdr", "FDR", callback=self.filter_and_display, tooltip="Filter by word FDR") self.spin_fdr = gui.doubleSpin(hbox, self, 'filter_fdr_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, callbackOnReturn=True, tooltip="Max p-value for word") self.spin_fdr.setEnabled(self.filter_by_fdr) gui.rubber(self.controlArea) # Word's list view self.cols = ['Word', 'p-value', 'FDR'] self.sig_words = QTreeWidget() self.sig_words.setColumnCount(len(self.cols)) self.sig_words.setHeaderLabels(self.cols) self.sig_words.setSortingEnabled(True) self.sig_words.setSelectionMode(QTreeView.ExtendedSelection) self.sig_words.sortByColumn(2, 0) # 0 is ascending order for i in range(len(self.cols)): self.sig_words.resizeColumnToContents(i) self.mainArea.layout().addWidget(self.sig_words)
def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # Init data self.data = None self.selected_data = None # used for transforming the 'selected data' into the 'data' domain self.selected_data_transformed = None self.results = Result() # info box fbox = gui.widgetBox(self.controlArea, "Info") self.info_fil = gui.label(fbox, self, 'Words displayed: 0') # Filtering settings fbox = gui.widgetBox(self.controlArea, "Filter") hbox = gui.widgetBox(fbox, orientation=0) self.chb_p = gui.checkBox(hbox, self, "filter_by_p", "p-value", callback=self.filter_and_display, tooltip="Filter by word p-value") self.spin_p = gui.doubleSpin(hbox, self, 'filter_p_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, tooltip="Max p-value for word") self.spin_p.setEnabled(self.filter_by_p) hbox = gui.widgetBox(fbox, orientation=0) self.chb_fdr = gui.checkBox(hbox, self, "filter_by_fdr", "FDR", callback=self.filter_and_display, tooltip="Filter by word FDR") self.spin_fdr = gui.doubleSpin(hbox, self, 'filter_fdr_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, tooltip="Max p-value for word") self.spin_fdr.setEnabled(self.filter_by_fdr) gui.rubber(self.controlArea) gui.auto_send(self.buttonsArea, self, "auto_apply") # Word's list view self.cols = ['Word', 'p-value', 'FDR'] self.sig_words = QTreeWidget() self.sig_words.setColumnCount(len(self.cols)) self.sig_words.setHeaderLabels(self.cols) self.sig_words.setSortingEnabled(True) self.sig_words.setSelectionMode(QTreeView.NoSelection) self.sig_words.sortByColumn(1, 0) # 0 is ascending order for i in range(len(self.cols)): self.sig_words.resizeColumnToContents(i) self.mainArea.layout().addWidget(self.sig_words)
def __init__(self, box, parent, settings_var, **kwargs): # type: (Union[QGroupBox, QWidget], QWidget, str) -> None super().__init__(**kwargs) self.parent = parent self.stored_selection = settings_var # gene sets object self.gs_object = GeneSets() # type: GeneSets self.hierarchy_tree_widget = QTreeWidget(self) self.hierarchy_tree_widget.setHeaderHidden(True) self.hierarchy_tree_widget.setEditTriggers(QTreeView.NoEditTriggers) box.layout().addWidget(self.hierarchy_tree_widget) self.custom_set_hier = None self.default_selection = [('GO', 'molecular_function'), ('GO', 'biological_process'), ('GO', 'cellular_component')]
def setup_gui(self): # control area info_box = vBox(self.controlArea, 'Input info') self.input_info = widgetLabel(info_box) hierarchy_box = widgetBox(self.controlArea, "Entity Sets") self.hierarchy_widget = QTreeWidget(self) self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers) self.hierarchy_widget.setHeaderLabels(HIERARCHY_HEADER_LABELS) self.hierarchy_widget.itemClicked.connect(self.display_gene_sets) hierarchy_box.layout().addWidget(self.hierarchy_widget) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) # rubber(self.controlArea) # main area self.filter_proxy_model = QSortFilterProxyModel(self.data_view) self.filter_proxy_model.setFilterKeyColumn(3) self.data_view = QTreeView() self.data_view.setModel(self.filter_proxy_model) self.data_view.setAlternatingRowColors(True) self.data_view.sortByColumn(2, Qt.DescendingOrder) self.data_view.setSortingEnabled(True) self.data_view.setSelectionMode(QTreeView.ExtendedSelection) self.data_view.setEditTriggers(QTreeView.NoEditTriggers) self.data_view.viewport().setMouseTracking(True) self.data_view.setItemDelegateForColumn( TERM, LinkStyledItemDelegate(self.data_view)) self.data_view.selectionModel().selectionChanged.connect(self.commit) self.lineEdit_filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter gene sets:') self.lineEdit_filter.setPlaceholderText('search pattern ...') self.lineEdit_filter.textChanged.connect( self.filter_proxy_model.setFilterRegExp) self.mainArea.layout().addWidget(self.data_view)
def setup_control_area(self): info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) box = vBox(self.controlArea, "Custom Gene Sets") self.gs_label_combobox = comboBox(box, self, "gene_set_label", sendSelectedValue=True, model=self.feature_model, callback=self.invalidate) self.gs_label_combobox.setDisabled(True) self.reference_radio_box = radioButtonsInBox( self.controlArea, self, "use_reference_data", ["Entire genome", "Reference gene set (input)"], tooltips=[ "Use entire genome (for gene set enrichment)", "Use reference set of genes" ], box="Reference", callback=self.invalidate) self.reference_radio_box.setEnabled(False) hierarchy_box = widgetBox(self.controlArea, "Gene Set Categories") self.hierarchy_widget = QTreeWidget(self) self.hierarchy_widget.setEditTriggers(QTreeView.NoEditTriggers) self.hierarchy_widget.setHeaderLabels([' ']) self.hierarchy_widget.itemClicked.connect(self.update_tree_view) hierarchy_box.layout().addWidget(self.hierarchy_widget) self.commit_button = auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False)
def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None
class OWGOEnrichmentAnalysis(widget.OWWidget): name = "GO Browser" description = "Enrichment analysis for Gene Ontology terms." icon = "../widgets/icons/GOBrowser.svg" priority = 2020 inputs = [("Cluster Data", Orange.data.Table, "setDataset", widget.Single + widget.Default), ("Reference Data", Orange.data.Table, "setReferenceDataset")] outputs = [("Data on Selected Genes", Orange.data.Table), ("Data on Unselected Genes", Orange.data.Table), ("Data on Unknown Genes", Orange.data.Table), ("Enrichment Report", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() annotationIndex = settings.ContextSetting(0) geneAttrIndex = settings.ContextSetting(0) useAttrNames = settings.ContextSetting(False) geneMatcherSettings = settings.Setting([True, False, False, False]) useReferenceDataset = settings.Setting(False) aspectIndex = settings.Setting(0) useEvidenceType = settings.Setting( {et: True for et in go.evidenceTypesOrdered}) filterByNumOfInstances = settings.Setting(False) minNumOfInstances = settings.Setting(1) filterByPValue = settings.Setting(True) maxPValue = settings.Setting(0.2) filterByPValue_nofdr = settings.Setting(False) maxPValue_nofdr = settings.Setting(0.01) probFunc = settings.Setting(0) selectionDirectAnnotation = settings.Setting(0) selectionDisjoint = settings.Setting(0) selectionAddTermAsClass = settings.Setting(0) Ready, Initializing, Running = 0, 1, 2 def __init__(self, parent=None): super().__init__(self, parent) self.clusterDataset = None self.referenceDataset = None self.ontology = None self.annotations = None self.loadedAnnotationCode = "---" self.treeStructRootKey = None self.probFunctions = [stats.Binomial(), stats.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = OWGOEnrichmentAnalysis.Initializing self.annotationCodes = [] ############# ## GUI ############# self.tabs = gui.tabWidget(self.controlArea) ## Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button( box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") box = gui.widgetBox(self.inputTab, "Organism") self.annotationComboBox = gui.comboBox(box, self, "annotationIndex", items=self.annotationCodes, callback=self._updateEnrichment, tooltip="Select organism") genebox = gui.widgetBox(self.inputTab, "Gene Names") self.geneAttrIndexCombo = gui.comboBox( genebox, self, "geneAttrIndex", callback=self._updateEnrichment, tooltip="Use this attribute to extract gene names from input data") self.geneAttrIndexCombo.setDisabled(self.useAttrNames) cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names", tooltip="Use column names for gene names", callback=self._updateEnrichment) cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled) gui.button(genebox, self, "Gene matcher settings", callback=self.UpdateGeneMatcher, tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=[ "Use entire genome for reference", "Use genes from Referece Examples input signal as reference" ], box="Reference", callback=self._updateEnrichment) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self._updateEnrichment) ## Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox( box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") #use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=[ "Use binomial distribution test", "Use hypergeometric distribution test" ], callback=self._updateEnrichment) box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox(etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb ## Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox(self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=[ "All selected genes", "Term-specific genes", "Common term genes" ], tooltips=[ "Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms" ], callback=[self.ExampleSelection, self.UpdateAddClassButton]) self.addClassCB = gui.checkBox(box, self, "selectionAddTermAsClass", "Add GO Term as class", callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = [ 'GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment' ] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) self.setBlocking(True) self._executor = ThreadExecutor() self._init = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), ("GO", "taxonomy.pickle")]) self._init.finished.connect(self.__initialize_finish) self._executor.submit(self._init) def sizeHint(self): return QSize(1000, 700) def __initialize_finish(self): self.setBlocking(False) try: self.annotationFiles = listAvailable() except ConnectTimeout: self.error(2, "Internet connection error, unable to load data. " + \ "Check connection and create a new GO Browser widget.") self.filterTab.setEnabled(False) self.inputTab.setEnabled(False) self.selectTab.setEnabled(False) self.listView.setEnabled(False) self.sigTerms.setEnabled(False) else: self.annotationCodes = sorted(self.annotationFiles.keys()) self.annotationComboBox.clear() self.annotationComboBox.addItems(self.annotationCodes) self.annotationComboBox.setCurrentIndex(self.annotationIndex) self.__state = OWGOEnrichmentAnalysis.Ready def __on_evidenceChanged(self): for etype, cb in self.evidenceCheckBoxDict.items(): self.useEvidenceType[etype] = cb.isChecked() self._updateEnrichment() def UpdateGeneMatcher(self): """Open the Gene matcher settings dialog.""" dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, modal=True) if dialog.exec_() != QDialog.Rejected: self.geneMatcherSettings = [ getattr(dialog, item[0]) for item in dialog.items ] if self.annotations: self.SetGeneMatcher() self._updateEnrichment() def clear(self): self.infoLabel.setText("No data on input\n") self.warning(0) self.warning(1) self.geneAttrIndexCombo.clear() self.ClearGraph() self.send("Data on Selected Genes", None) self.send("Data on Unselected Genes", None) self.send("Data on Unknown Genes", None) self.send("Enrichment Report", None) def setDataset(self, data=None): if self.__state == OWGOEnrichmentAnalysis.Initializing: self.__initialize_finish() self.closeContext() self.clear() self.clusterDataset = data if data is not None: domain = data.domain allvars = domain.variables + domain.metas self.candidateGeneAttrs = [var for var in allvars if isstring(var)] self.geneAttrIndexCombo.clear() for var in self.candidateGeneAttrs: self.geneAttrIndexCombo.addItem(*gui.attributeItem(var)) taxid = data_hints.get_hint(data, "taxid", "") code = None try: code = go.from_taxid(taxid) except KeyError: pass except Exception as ex: print(ex) if code is not None: filename = "gene_association.%s.tar.gz" % code if filename in self.annotationFiles.values(): self.annotationIndex = \ [i for i, name in enumerate(self.annotationCodes) \ if self.annotationFiles[name] == filename].pop() self.useAttrNames = data_hints.get_hint(data, "genesinrows", self.useAttrNames) self.openContext(data) self.geneAttrIndex = min(self.geneAttrIndex, len(self.candidateGeneAttrs) - 1) if len(self.candidateGeneAttrs) == 0: self.useAttrNames = True self.geneAttrIndex = -1 elif self.geneAttrIndex < len(self.candidateGeneAttrs): self.geneAttrIndex = len(self.candidateGeneAttrs) - 1 self._updateEnrichment() def setReferenceDataset(self, data=None): self.referenceDataset = data self.referenceRadioBox.buttons[1].setDisabled(not bool(data)) self.referenceRadioBox.buttons[1].setText("Reference set") if self.clusterDataset is not None and self.useReferenceDataset: self.useReferenceDataset = 0 if not data else 1 graph = self.Enrichment() self.SetGraph(graph) elif self.clusterDataset: self.__updateReferenceSetButton() def handleNewSignals(self): super().handleNewSignals() def _updateEnrichment(self): if self.clusterDataset is not None and \ self.__state == OWGOEnrichmentAnalysis.Ready: pb = gui.ProgressBar(self, 100) self.Load(pb=pb) graph = self.Enrichment(pb=pb) self.FilterUnknownGenes() self.SetGraph(graph) def __updateReferenceSetButton(self): allgenes, refgenes = None, None if self.referenceDataset: try: allgenes = self.genesFromTable(self.referenceDataset) except Exception: allgenes = [] refgenes, unknown = self.FilterAnnotatedGenes(allgenes) self.referenceRadioBox.buttons[1].setDisabled(not bool(allgenes)) self.referenceRadioBox.buttons[1].setText("Reference set " + ( "(%i genes, %i matched)" % (len(allgenes), len(refgenes)) if allgenes and refgenes else "")) def genesFromTable(self, data): if self.useAttrNames: genes = [v.name for v in data.domain.variables] else: attr = self.candidateGeneAttrs[min( self.geneAttrIndex, len(self.candidateGeneAttrs) - 1)] genes = [str(ex[attr]) for ex in data if not numpy.isnan(ex[attr])] if any("," in gene for gene in genes): self.information( 0, "Separators detected in gene names. Assuming multiple genes per example." ) genes = reduce(operator.iadd, (genes.split(",") for genes in genes), []) return genes def FilterAnnotatedGenes(self, genes): matchedgenes = self.annotations.get_gene_names_translator( genes).values() return matchedgenes, [ gene for gene in genes if gene not in matchedgenes ] def FilterUnknownGenes(self): if not self.useAttrNames and self.candidateGeneAttrs: geneAttr = self.candidateGeneAttrs[min( self.geneAttrIndex, len(self.candidateGeneAttrs) - 1)] indices = [] for i, ex in enumerate(self.clusterDataset): if not any( self.annotations.genematcher.match(n.strip()) for n in str(ex[geneAttr]).split(",")): indices.append(i) if indices: data = self.clusterDataset[indices] else: data = None self.send("Data on Unknown Genes", data) else: self.send("Data on Unknown Genes", None) def Load(self, pb=None): if self.__state == OWGOEnrichmentAnalysis.Ready: go_files, tax_files = serverfiles.listfiles( "GO"), serverfiles.listfiles("Taxonomy") calls = [] pb, finish = (gui.ProgressBar(self, 0), True) if pb is None else (pb, False) count = 0 if not tax_files: calls.append(("Taxonomy", "ncbi_taxnomy.tar.gz")) count += 1 org = self.annotationCodes[min(self.annotationIndex, len(self.annotationCodes) - 1)] if org != self.loadedAnnotationCode: count += 1 if self.annotationFiles[org] not in go_files: calls.append(("GO", self.annotationFiles[org])) count += 1 if "gene_ontology_edit.obo.tar.gz" not in go_files: calls.append(("GO", "gene_ontology_edit.obo.tar.gz")) count += 1 if not self.ontology: count += 1 pb.iter += count * 100 for args in calls: serverfiles.localpath_download(*args, **dict(callback=pb.advance)) i = len(calls) if not self.ontology: self.ontology = go.Ontology( progress_callback=lambda value: pb.advance()) i += 1 if org != self.loadedAnnotationCode: self.annotations = None gc.collect() # Force run garbage collection code = self.annotationFiles[org].split(".")[-3] self.annotations = go.Annotations( code, genematcher=gene.GMDirect(), progress_callback=lambda value: pb.advance()) i += 1 self.loadedAnnotationCode = org count = defaultdict(int) geneSets = defaultdict(set) for anno in self.annotations.annotations: count[anno.evidence] += 1 geneSets[anno.evidence].add(anno.geneName) for etype in go.evidenceTypesOrdered: ecb = self.evidenceCheckBoxDict[etype] ecb.setEnabled(bool(count[etype])) ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype]))) if finish: pb.finish() def SetGeneMatcher(self): if self.annotations: taxid = self.annotations.taxid matchers = [] for matcher, use in zip( [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy], self.geneMatcherSettings): if use: try: if taxid == "352472": matchers.extend([ matcher(taxid), gene.GMDicty(), [matcher(taxid), gene.GMDicty()] ]) # The reason machers are duplicated is that we want `matcher` or `GMDicty` to # match genes by them self if possible. Only use the joint matcher if they fail. else: matchers.append(matcher(taxid)) except Exception as ex: print(ex) self.annotations.genematcher = gene.matcher(matchers) self.annotations.genematcher.set_targets( self.annotations.gene_names) def Enrichment(self, pb=None): assert self.clusterDataset is not None pb = gui.ProgressBar(self, 100) if pb is None else pb if not self.annotations.ontology: self.annotations.ontology = self.ontology if isinstance(self.annotations.genematcher, gene.GMDirect): self.SetGeneMatcher() self.error(1) self.warning([0, 1]) if self.useAttrNames: clusterGenes = [ v.name for v in self.clusterDataset.domain.attributes ] self.information(0) elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs): geneAttr = self.candidateGeneAttrs[self.geneAttrIndex] clusterGenes = [ str(ex[geneAttr]) for ex in self.clusterDataset if not numpy.isnan(ex[geneAttr]) ] if any("," in gene for gene in clusterGenes): self.information( 0, "Separators detected in cluster gene names. Assuming multiple genes per example." ) clusterGenes = reduce(operator.iadd, (genes.split(",") for genes in clusterGenes), []) else: self.information(0) else: self.error(1, "Failed to extract gene names from input dataset!") return {} genesSetCount = len(set(clusterGenes)) self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator( clusterGenes).values() self.infoLabel.setText( "%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (genesSetCount, len(clusterGenes), 100.0 * len(clusterGenes) / genesSetCount if genesSetCount else 0.0)) referenceGenes = None if not self.useReferenceDataset or self.referenceDataset is None: self.information(2) self.information(1) referenceGenes = self.annotations.gene_names elif self.referenceDataset is not None: if self.useAttrNames: referenceGenes = [ v.name for v in self.referenceDataset.domain.attributes ] self.information(1) elif geneAttr in (self.referenceDataset.domain.variables + self.referenceDataset.domain.metas): referenceGenes = [ str(ex[geneAttr]) for ex in self.referenceDataset if not numpy.isnan(ex[geneAttr]) ] if any("," in gene for gene in clusterGenes): self.information( 1, "Separators detected in reference gene names. Assuming multiple genes per example." ) referenceGenes = reduce(operator.iadd, (genes.split(",") for genes in referenceGenes), []) else: self.information(1) else: self.information(1) referenceGenes = None if referenceGenes is None: referenceGenes = list(self.annotations.gene_names) self.referenceRadioBox.buttons[1].setText("Reference set") self.referenceRadioBox.buttons[1].setDisabled(True) self.information( 2, "Unable to extract gene names from reference dataset. Using entire genome for reference" ) self.useReferenceDataset = 0 else: refc = len(referenceGenes) referenceGenes = self.annotations.get_gene_names_translator( referenceGenes).values() self.referenceRadioBox.buttons[1].setText( "Reference set (%i genes, %i matched)" % (refc, len(referenceGenes))) self.referenceRadioBox.buttons[1].setDisabled(False) self.information(2) else: self.useReferenceDataset = 0 if not referenceGenes: self.error(1, "No valid reference set") return {} self.referenceGenes = referenceGenes evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) aspect = ["P", "C", "F"][self.aspectIndex] if clusterGenes: self.terms = terms = self.annotations.get_enriched_terms( clusterGenes, referenceGenes, evidences, aspect=aspect, prob=self.probFunctions[self.probFunc], use_fdr=False, progress_callback=lambda value: pb.advance()) ids = [] pvals = [] for i, d in self.terms.items(): ids.append(i) pvals.append(d[1]) for i, fdr in zip(ids, stats.FDR( pvals)): # save FDR as the last part of the tuple terms[i] = tuple(list(terms[i]) + [fdr]) else: self.terms = terms = {} if not self.terms: self.warning(0, "No enriched terms found.") else: self.warning(0) pb.finish() self.treeStructDict = {} ids = self.terms.keys() self.treeStructRootKey = None parents = {} for id in ids: parents[id] = set([term for _, term in self.ontology[id].related]) children = {} for term in self.terms: children[term] = set([id for id in ids if term in parents[id]]) for term in self.terms: self.treeStructDict[term] = TreeNode(self.terms[term], children[term]) if not self.ontology[term].related and not getattr( self.ontology[term], "is_obsolete", False): self.treeStructRootKey = term return terms def FilterGraph(self, graph): if self.filterByPValue_nofdr: graph = go.filterByPValue(graph, self.maxPValue_nofdr) if self.filterByPValue: #FDR graph = dict( filter(lambda item: item[1][3] <= self.maxPValue, graph.items())) if self.filterByNumOfInstances: graph = dict( filter(lambda item: len(item[1][0]) >= self.minNumOfInstances, graph.items())) return graph def FilterAndDisplayGraph(self): if self.clusterDataset: self.graph = self.FilterGraph(self.originalGraph) if self.originalGraph and not self.graph: self.warning(1, "All found terms were filtered out.") else: self.warning(1) self.ClearGraph() self.DisplayGraph() def SetGraph(self, graph=None): self.originalGraph = graph if graph: self.FilterAndDisplayGraph() else: self.graph = {} self.ClearGraph() def ClearGraph(self): self.listView.clear() self.listViewItems = [] self.sigTerms.clear() def DisplayGraph(self): fromParentDict = {} self.termListViewItemDict = {} self.listViewItems = [] enrichment = lambda t: len(t[0]) / t[2] * (len(self.referenceGenes) / len(self.clusterGenes)) maxFoldEnrichment = max( [enrichment(term) for term in self.graph.values()] or [1]) def addNode(term, parent, parentDisplayNode): if (parent, term) in fromParentDict: return if term in self.graph: displayNode = GOTreeWidgetItem(self.ontology[term], self.graph[term], len(self.clusterGenes), len(self.referenceGenes), maxFoldEnrichment, parentDisplayNode) displayNode.goId = term self.listViewItems.append(displayNode) if term in self.termListViewItemDict: self.termListViewItemDict[term].append(displayNode) else: self.termListViewItemDict[term] = [displayNode] fromParentDict[(parent, term)] = True parent = term else: displayNode = parentDisplayNode for c in self.treeStructDict[term].children: addNode(c, parent, displayNode) if self.treeStructDict: addNode(self.treeStructRootKey, None, self.listView) terms = self.graph.items() terms = sorted(terms, key=lambda item: item[1][1]) self.sigTableTermsSorted = [t[0] for t in terms] self.sigTerms.clear() for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms): item = GOTreeWidgetItem(self.ontology[t_id], (genes, p_value, refCount, fdr), len(self.clusterGenes), len(self.referenceGenes), maxFoldEnrichment, self.sigTerms) item.goId = t_id self.listView.expandAll() for i in range(5): self.listView.resizeColumnToContents(i) self.sigTerms.resizeColumnToContents(i) self.sigTerms.resizeColumnToContents(6) width = min(self.listView.columnWidth(0), 350) self.listView.setColumnWidth(0, width) self.sigTerms.setColumnWidth(0, width) # Create and send the enrichemnt report table. termsDomain = Orange.data.Domain( [], [], # All is meta! [ Orange.data.StringVariable("GO Term Id"), Orange.data.StringVariable("GO Term Name"), Orange.data.ContinuousVariable("Cluster Frequency"), Orange.data.ContinuousVariable("Genes in Cluster", number_of_decimals=0), Orange.data.ContinuousVariable("Reference Frequency"), Orange.data.ContinuousVariable("Genes in Reference", number_of_decimals=0), Orange.data.ContinuousVariable("p-value"), Orange.data.ContinuousVariable("FDR"), Orange.data.ContinuousVariable("Enrichment"), Orange.data.StringVariable("Genes") ]) terms = [[t_id, self.ontology[t_id].name, len(genes) / len(self.clusterGenes), len(genes), r_count / len(self.referenceGenes), r_count, p_value, fdr, len(genes) / len(self.clusterGenes) * \ len(self.referenceGenes) / r_count, ",".join(genes) ] for t_id, (genes, p_value, r_count, fdr) in terms] if terms: X = numpy.empty((len(terms), 0)) M = numpy.array(terms, dtype=object) termsTable = Orange.data.Table.from_numpy(termsDomain, X, metas=M) else: termsTable = Orange.data.Table(termsDomain) self.send("Enrichment Report", termsTable) def ViewSelectionChanged(self): if self.selectionChanging: return self.selectionChanging = 1 self.selectedTerms = [] selected = self.listView.selectedItems() self.selectedTerms = list(set([lvi.term.id for lvi in selected])) self.ExampleSelection() self.selectionChanging = 0 def TableSelectionChanged(self): if self.selectionChanging: return self.selectionChanging = 1 self.selectedTerms = [] selectedIds = set([ self.sigTerms.itemFromIndex(index).goId for index in self.sigTerms.selectedIndexes() ]) for i in range(self.sigTerms.topLevelItemCount()): item = self.sigTerms.topLevelItem(i) selected = item.goId in selectedIds term = item.goId if selected: self.selectedTerms.append(term) for lvi in self.termListViewItemDict[term]: try: lvi.setSelected(selected) if selected: lvi.setExpanded(True) except RuntimeError: # Underlying C/C++ object deleted pass self.ExampleSelection() self.selectionChanging = 0 def UpdateAddClassButton(self): self.addClassCB.setEnabled(self.selectionDisjoint == 1) def ExampleSelection(self): self.commit() def commit(self): if self.clusterDataset is None: return terms = set(self.selectedTerms) genes = reduce(operator.ior, (set(self.graph[term][0]) for term in terms), set()) evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: # if getattr(self, "useEvidence" + etype): evidences.append(etype) allTerms = self.annotations.get_annotated_terms( genes, direct_annotation_only=self.selectionDirectAnnotation, evidence_codes=evidences) if self.selectionDisjoint > 0: count = defaultdict(int) for term in self.selectedTerms: for g in allTerms.get(term, []): count[g] += 1 ccount = 1 if self.selectionDisjoint == 1 else len( self.selectedTerms) selectedGenes = [ gene for gene, c in count.items() if c == ccount and gene in genes ] else: selectedGenes = reduce(operator.ior, (set(allTerms.get(term, [])) for term in self.selectedTerms), set()) if self.useAttrNames: vars = [ self.clusterDataset.domain[gene] for gene in set(selectedGenes) ] domain = Orange.data.Domain(vars, self.clusterDataset.domain.class_vars, self.clusterDataset.domain.metas) newdata = self.clusterDataset.from_table(domain, self.clusterDataset) self.send("Data on Selected Genes", newdata) self.send("Data on Unselected Genes", None) elif self.candidateGeneAttrs: selectedExamples = [] unselectedExamples = [] geneAttr = self.candidateGeneAttrs[min( self.geneAttrIndex, len(self.candidateGeneAttrs) - 1)] if self.selectionDisjoint == 1: goVar = Orange.data.DiscreteVariable("GO Term", values=list( self.selectedTerms)) newDomain = Orange.data.Domain( self.clusterDataset.domain.variables, goVar, self.clusterDataset.domain.metas) goColumn = [] for i, ex in enumerate(self.clusterDataset): if not numpy.isnan(ex[geneAttr]) and any( gene in selectedGenes for gene in str(ex[geneAttr]).split(",")): if self.selectionDisjoint == 1 and self.selectionAddTermAsClass: terms = filter( lambda term: any(gene in self.graph[term][0] for gene in str(ex[geneAttr]). split(",")), self.selectedTerms) term = sorted(terms)[0] goColumn.append(goVar.values.index(term)) selectedExamples.append(i) else: unselectedExamples.append(i) if selectedExamples: selectedExamples = self.clusterDataset[selectedExamples] if self.selectionDisjoint == 1 and self.selectionAddTermAsClass: selectedExamples = Orange.data.Table.from_table( newDomain, selectedExamples) view, issparse = selectedExamples.get_column_view(goVar) assert not issparse view[:] = goColumn else: selectedExamples = None if unselectedExamples: unselectedExamples = self.clusterDataset[unselectedExamples] else: unselectedExamples = None self.send("Data on Selected Genes", selectedExamples) self.send("Data on Unselected Genes", unselectedExamples) def ShowInfo(self): dialog = QDialog(self) dialog.setModal(False) dialog.setLayout(QVBoxLayout()) label = QLabel(dialog) label.setText( "Ontology:\n" + self.ontology.header if self.ontology else "Ontology not loaded!") dialog.layout().addWidget(label) label = QLabel(dialog) label.setText("Annotations:\n" + self.annotations.header.replace("!", "") if self. annotations else "Annotations not loaded!") dialog.layout().addWidget(label) dialog.show() def onDeleteWidget(self): """Called before the widget is removed from the canvas. """ self.annotations = None self.ontology = None gc.collect() # Force collection
def __init__(self, parent=None): super().__init__(self, parent) self.input_data = None self.ref_data = None self.ontology = None self.annotations = None self.loaded_annotation_code = None self.treeStructRootKey = None self.probFunctions = [statistics.Binomial(), statistics.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = State.Ready self.__scheduletimer = QTimer(self, singleShot=True) self.__scheduletimer.timeout.connect(self.__update) ############# # GUI ############# self.tabs = gui.tabWidget(self.controlArea) # Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button(box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=["Use entire genome for reference", "Use genes from Referece Examples input signal as reference"], box="Reference", callback=self.__invalidate) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self.__invalidate) # Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox(box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") # use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=["Use binomial distribution test", "Use hypergeometric distribution test"], callback=self.__invalidate) # TODO: only update the p values box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox( etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb # Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox( self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=["All selected genes", "Term-specific genes", "Common term genes"], tooltips=["Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms"], callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment'] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.originalGraph = None self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) class AnnotationSlot(SimpleNamespace): taxid = ... # type: str name = ... # type: str filename = ... # type:str @staticmethod def parse_tax_id(f_name): return f_name.split('.')[1] try: remote_files = serverfiles.ServerFiles().listfiles(DOMAIN) except (ConnectTimeout, RequestException, ConnectionError): # TODO: Warn user about failed connection to the remote server remote_files = [] self.available_annotations = [ AnnotationSlot( taxid=AnnotationSlot.parse_tax_id(annotation_file), name=taxonomy.common_taxid_to_name(AnnotationSlot.parse_tax_id(annotation_file)), filename=FILENAME_ANNOTATION.format(AnnotationSlot.parse_tax_id(annotation_file)) ) for _, annotation_file in set(remote_files + serverfiles.listfiles(DOMAIN)) if annotation_file != FILENAME_ONTOLOGY ] self._executor = ThreadExecutor()
def __init__(self, parent=None): super().__init__(parent) self.servers = [ ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'), ('https://cloud.genialis.com/', 'Genialis'), ] self.selectedExperiments = [] self.buffer = dicty.CacheSQLite(bufferfile) self.searchString = "" self.items = [] self.result_types = [] self.controlArea.setMaximumWidth(250) self.controlArea.setMinimumWidth(250) box = gui.widgetBox(self.controlArea, 'Project') self.projectCB = gui.comboBox(box, self, "projecti", items=[], callback=self.ProjectChosen) self.projects = [] b = gui.widgetBox(self.controlArea, "Selection bookmarks") self.selectionSetsWidget = SelectionSetsWidget(self) self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) def store_selections(modified): if not modified: self.storedSelections = self.selectionSetsWidget.selections self.selectionSetsWidget.selectionModified.connect(store_selections) b.layout().addWidget(self.selectionSetsWidget) gui.separator(self.controlArea) b = gui.widgetBox(self.controlArea, "Sort output columns") self.columnsSortingWidget = SortedListWidget(self) self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) box = gui.widgetBox(self.controlArea, 'Experiment name') self.experimentNameCB = gui.comboBox(box, self, "exnamei", items=SORTING_MODEL_LIST) b.layout().addWidget(self.columnsSortingWidget) sorting_model = QStringListModel(SORTING_MODEL_LIST) self.columnsSortingWidget.setModel(sorting_model) self.columnsSortingWidget.sortingOrder = self.storedSortOrder def store_sort_order(): self.storedSortOrder = self.columnsSortingWidget.sortingOrder self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order) gui.separator(self.controlArea) box = gui.widgetBox(self.controlArea, 'Expression Type') self.expressionTypesCB = gui.comboBox(box, self, "rtypei", items=[], callback=self.UpdateResultsList) gui.checkBox(self.controlArea, self, "excludeconstant", "Exclude labels with constant values") gui.checkBox(self.controlArea, self, "joinreplicates", "Average replicates (use median)") gui.checkBox(self.controlArea, self, "log2", "Logarithmic (base 2) transformation") gui.checkBox(self.controlArea, self, "transpose", "Genes as columns") self.commit_button = gui.button(self.controlArea, self, "&Commit", callback=self.Commit) self.commit_button.setDisabled(True) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, 'Server') gui.comboBox(box, self, "serveri", items=[title for url, title in self.servers], callback=self.ServerChosen) gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Clear cache", callback=self.clear_cache) gui.lineEdit(self.mainArea, self, "searchString", "Search", callbackOnType=True, callback=self.SearchUpdate) self.headerLabels = [t[1] for t in HEADER] self.experimentsWidget = QTreeWidget() self.experimentsWidget.setHeaderLabels(self.headerLabels) self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection) self.experimentsWidget.setRootIsDecorated(False) self.experimentsWidget.setSortingEnabled(True) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.experimentsWidget, self.experimentsWidget) self.experimentsWidget.header().installEventFilter(contextEventFilter) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.setAlternatingRowColors(True) self.experimentsWidget.selectionModel().selectionChanged.connect( self.onSelectionChanged) self.selectionSetsWidget.setSelectionModel( self.experimentsWidget.selectionModel()) self.selectionSetsWidget.setSelections(self.storedSelections) self.mainArea.layout().addWidget(self.experimentsWidget) self.restoreHeaderState() self.experimentsWidget.header().geometriesChanged.connect( self.saveHeaderState) self.dbc = None self.AuthSet() QTimer.singleShot(100, self.ConnectAndUpdate)
def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels( ["", "Data Source", "Update", "Last Updated", "Size"]) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(1, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.SearchUpdate) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.UpdateAll, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.DownloadFiltered, tooltip="Download all filtered files shown" ) self.cancelButton = gui.button( box, self, "Cancel", callback=self.Cancel, tooltip="Cancel scheduled downloads/updates." ) self.retryButton = gui.button( box, self, "Reconnect", callback=self.RetrieveFilesList ) self.retryButton.hide() gui.rubber(box) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.updateItems = [] self.resize(800, 600) self.progress = ProgressState(self, maximum=3) self.progress.valueChanged.connect(self._updateProgress) self.progress.rangeChanged.connect(self._updateProgress) self.executor = ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) task = Task(self, function=self.RetrieveFilesList) task.exceptionReady.connect(self.HandleError) task.start() self._tasks = [] self._haveProgress = False
class GeneSetsSelection(QWidget): def __init__(self, box, parent, settings_var, **kwargs): # type: (Union[QGroupBox, QWidget], QWidget, str) -> None super().__init__(**kwargs) self.parent = parent self.stored_selection = settings_var # gene sets object self.gs_object = GeneSets() # type: GeneSets self.hierarchy_tree_widget = QTreeWidget(self) self.hierarchy_tree_widget.setHeaderHidden(True) self.hierarchy_tree_widget.setEditTriggers(QTreeView.NoEditTriggers) box.layout().addWidget(self.hierarchy_tree_widget) self.custom_set_hier = None self.default_selection = [('GO', 'molecular_function'), ('GO', 'biological_process'), ('GO', 'cellular_component')] def clear_custom_sets(self): # delete any custom sets if they exists self.gs_object.delete_sets_by_hierarchy(self.custom_set_hier) def add_custom_sets(self, gene_sets_names, gene_names, hierarchy_title=None, select_customs_flag=False): # type: (np.ndarray, np.ndarray) -> None self.custom_set_hier = hierarchy_title self.clear_custom_sets() temp_dict = defaultdict(list) for set_name, gene_name in zip(gene_sets_names, gene_names): temp_dict[set_name].append(gene_name) g_sets = [] for key, value in temp_dict.items(): g_sets.append( GeneSet(gs_id=key, hierarchy=self.custom_set_hier, organism=self.gs_object.common_org(), name=key, genes=set(value))) self.gs_object.update(g_sets) self.update_gs_hierarchy(select_customs_flag=select_customs_flag) def load_gene_sets(self, tax_id): # type: (str) -> None self.gs_object = GeneSets() self.clear() gene_sets = list_all(organism=tax_id) self.set_hierarchy_model(self.hierarchy_tree_widget, self.hierarchy_tree(gene_sets)) for gene_set in gene_sets: g_sets = load_gene_sets(gene_set, tax_id) self.gs_object.update([g_set for g_set in g_sets]) self.set_selected_hierarchies() def clear_gene_sets(self): self.gs_object = GeneSets() def clear(self): # reset hierarchy widget state self.hierarchy_tree_widget.clear() def update_gs_hierarchy(self, select_customs_flag=False): self.clear() self.set_hierarchy_model( self.hierarchy_tree_widget, self.hierarchy_tree(self.gs_object.hierarchies())) if select_customs_flag: self.set_custom_sets() else: self.set_selected_hierarchies() def set_hierarchy_model(self, tree_widget, sets): def beautify_displayed_text(text): if '_' in text: return text.replace('_', ' ').title() else: return text # TODO: maybe optimize this code? for key, value in sets.items(): item = QTreeWidgetItem(tree_widget, [beautify_displayed_text(key)]) item.setFlags(item.flags() & (Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled)) item.setExpanded(True) item.hierarchy = key if value: item.setFlags(item.flags() | Qt.ItemIsTristate) self.set_hierarchy_model(item, value) else: if item.parent(): item.hierarchy = (item.parent().hierarchy, key) if not item.childCount() and not item.parent(): item.hierarchy = (key, ) def get_hierarchies(self, **kwargs): """ return selected hierarchy """ only_selected = kwargs.get('only_selected', None) sets_to_display = list() if only_selected: iterator = QTreeWidgetItemIterator(self.hierarchy_tree_widget, QTreeWidgetItemIterator.Checked) else: iterator = QTreeWidgetItemIterator(self.hierarchy_tree_widget) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if not only_selected: sets_to_display.append(iterator.value().hierarchy) else: if not iterator.value().isDisabled(): sets_to_display.append(iterator.value().hierarchy) iterator += 1 return sets_to_display def set_selected_hierarchies(self): iterator = QTreeWidgetItemIterator(self.hierarchy_tree_widget, QTreeWidgetItemIterator.All) defaults = [] while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if iterator.value().hierarchy in self.parent.__getattribute__( self.stored_selection): iterator.value().setCheckState(0, Qt.Checked) else: iterator.value().setCheckState(0, Qt.Unchecked) # if no items are checked, set defaults if iterator.value().hierarchy in self.default_selection: defaults.append(iterator.value()) iterator += 1 if len(self.get_hierarchies(only_selected=True)) == 0: [item.setCheckState(0, Qt.Checked) for item in defaults] def set_custom_sets(self): iterator = QTreeWidgetItemIterator(self.hierarchy_tree_widget, QTreeWidgetItemIterator.All) while iterator.value(): # note: if hierarchy value is not a tuple, then this is just top level qTreeWidgetItem that # holds subcategories. We don't want to display all sets from category if type(iterator.value().hierarchy) is not str: if iterator.value().hierarchy == self.custom_set_hier: iterator.value().setCheckState(0, Qt.Checked) else: iterator.value().setCheckState(0, Qt.Unchecked) iterator += 1 @staticmethod def hierarchy_tree(gene_sets): def tree(): return defaultdict(tree) collection = tree() def collect(col, set_hierarchy): if set_hierarchy: collect(col[set_hierarchy[0]], set_hierarchy[1:]) for hierarchy in gene_sets: collect(collection, hierarchy) return collection
def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.search_update) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels(header_labels) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(header.Title, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.search_update) box.layout().addWidget(self.filesView) layout = QHBoxLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) self.updateButton = gui.button( box, self, "Update all", callback=self.update_all, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.download_filtered, tooltip="Download all filtered files shown") self.cancelButton = gui.button( box, self, "Cancel", callback=self.cancel_active_threads, tooltip="Cancel scheduled downloads/updates.") self.addButton = gui.button(box, self, "Add ...", callback=self.__handle_dialog, tooltip="Add files for personal use.") layout.addWidget(self.updateButton) layout.addWidget(self.downloadButton) layout.addWidget(self.cancelButton) layout.addStretch() layout.addWidget(self.addButton) # Enable retryButton once connection is established # self.retryButton = gui.button( # box, self, "Reconnect", callback=self.initialize_files_view # ) # self.retryButton.hide() self.resize(800, 600) self.update_items = [] self._dialog = None self.progress_bar = None # threads self.threadpool = QThreadPool(self) #self.threadpool.setMaxThreadCount(1) self.workers = list() self.initialize_files_view()
def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.search_update) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels(header_labels) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(header.Title, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.search_update) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.update_all, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.download_filtered, tooltip="Download all filtered files shown") self.cancelButton = gui.button( box, self, "Cancel", callback=self.cancel_active_threads, tooltip="Cancel scheduled downloads/updates.") # add empty label to separate button. # TODO: is there better way of doing this? box.layout().addWidget(QLabel(), Qt.AlignRight) self.addButton = gui.button(box, self, "Add ...", callback=self.__handle_dialog, tooltip="Add files for personal use.") self.retryButton = gui.button(box, self, "Reconnect", callback=self.initialize_files_view) self.retryButton.hide() # gui.rubber(box) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.resize(800, 600) self.update_items = [] self._dialog = None self.progress_bar = None # threads self.threadpool = QThreadPool(self) #self.threadpool.setMaxThreadCount(1) self.workers = list() self.initialize_files_view()
def __init__(self, parent=None): super().__init__(parent) self.organismCodes = [] self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") # Organism selection. box = gui.widgetBox(self.controlArea, "Organism") self.organismComboBox = gui.comboBox( box, self, "organismIndex", items=[], callback=self.Update, addSpace=True, tooltip="Select the organism of the input genes", ) # Selection of genes attribute box = gui.widgetBox(self.controlArea, "Gene attribute") self.geneAttrCandidates = itemmodels.VariableListModel(parent=self) self.geneAttrCombo = gui.comboBox(box, self, "geneAttrIndex", callback=self.Update) self.geneAttrCombo.setModel(self.geneAttrCandidates) gui.checkBox( box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update ) self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox( self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView, ) gui.checkBox( self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform ) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button( box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False, ) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect(self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget( allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200 ) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels(["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.refData = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float,)) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task)
class OWKEGGPathwayBrowser(widget.OWWidget): name = "KEGG Pathways" description = "Browse KEGG pathways that include an input set of genes." icon = "../widgets/icons/KEGGPathways.svg" priority = 2030 inputs = [("Data", Orange.data.Table, "SetData", widget.Default), ("Reference", Orange.data.Table, "SetRefData")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Unselected Data", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organismIndex = settings.ContextSetting(0) geneAttrIndex = settings.ContextSetting(0) useAttrNames = settings.ContextSetting(False) autoCommit = settings.Setting(False) autoResize = settings.Setting(True) useReference = settings.Setting(False) showOrthology = settings.Setting(True) Ready, Initializing, Running = 0, 1, 2 def __init__(self, parent=None): super().__init__(parent) self.organismCodes = [] self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") # Organism selection. box = gui.widgetBox(self.controlArea, "Organism") self.organismComboBox = gui.comboBox( box, self, "organismIndex", items=[], callback=self.Update, addSpace=True, tooltip="Select the organism of the input genes", ) # Selection of genes attribute box = gui.widgetBox(self.controlArea, "Gene attribute") self.geneAttrCandidates = itemmodels.VariableListModel(parent=self) self.geneAttrCombo = gui.comboBox(box, self, "geneAttrIndex", callback=self.Update) self.geneAttrCombo.setModel(self.geneAttrCandidates) gui.checkBox( box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update ) self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox( self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView, ) gui.checkBox( self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform ) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button( box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False, ) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect(self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget( allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200 ) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels(["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.refData = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float,)) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task) def __initialize_finish(self): if self.__runstate != OWKEGGPathwayBrowser.Initializing: return try: keys, genome = self._genomeTask.result() except Exception as err: self.error(0, str(err)) raise self.progressBarFinished() self.setEnabled(True) self.setBlocking(False) entries = [genome[key] for key in keys] items = [entry.definition for entry in entries] codes = [entry.organism_code for entry in entries] self.organismCodes = codes self.organismComboBox.clear() self.organismComboBox.addItems(items) self.organismComboBox.setCurrentIndex(self.organismIndex) self.infoLabel.setText("No data on input\n") def Clear(self): """ Clear the widget state. """ self.queryGenes = [] self.referenceGenes = [] self.genes = {} self.uniqueGenesDict = {} self.revUniqueGenesDict = {} self.pathways = {} self.org = None self.geneAttrCandidates[:] = [] self.infoLabel.setText("No data on input\n") self.listView.clear() self.pathwayView.SetPathway(None) self.send("Selected Data", None) self.send("Unselected Data", None) def SetData(self, data=None): if self.__runstate == OWKEGGPathwayBrowser.Initializing: self.__initialize_finish() self.closeContext() self.data = data self.warning(0) self.error(0) self.information(0) if data is not None: vars = data.domain.variables + data.domain.metas vars = [var for var in vars if isinstance(var, Orange.data.StringVariable)] self.geneAttrCandidates[:] = vars # Try to guess the gene name variable if vars: names_lower = [v.name.lower() for v in vars] scores = [(name == "gene", "gene" in name) for name in names_lower] imax, _ = max(enumerate(scores), key=itemgetter(1)) else: imax = -1 self.geneAttrIndex = imax taxid = data_hints.get_hint(data, "taxid", None) if taxid: try: code = kegg.from_taxid(taxid) self.organismIndex = self.organismCodes.index(code) except Exception as ex: print(ex, taxid) self.useAttrNames = data_hints.get_hint(data, "genesinrows", self.useAttrNames) self.openContext(data) if len(self.geneAttrCandidates) == 0: self.useAttrNames = True self.geneAttrIndex = -1 else: self.geneAttrIndex = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1) else: self.Clear() self.__invalidated = True def SetRefData(self, data=None): self.refData = data self.information(1) if data is not None and self.useReference: self.__invalidated = True def handleNewSignals(self): if self.__invalidated: self.Update() self.__invalidated = False def UpdateListView(self): self.bestPValueItem = None self.listView.clear() if not self.data: return allPathways = self.org.pathways() allRefPathways = kegg.pathways("map") items = [] kegg_pathways = kegg.KEGGPathways() org_code = self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] if self.showOrthology: self.koOrthology = kegg.KEGGBrite("ko00001") self.listView.setRootIsDecorated(True) path_ids = set([s[-5:] for s in self.pathways.keys()]) def _walkCollect(koEntry): num = koEntry.title[:5] if koEntry.title else None if num in path_ids: return [koEntry] + reduce( lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], [] ) else: c = reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []) return c + (c and [koEntry] or []) allClasses = reduce(lambda li1, li2: li1 + li2, [_walkCollect(c) for c in self.koOrthology], []) def _walkCreate(koEntry, lvItem): item = QTreeWidgetItem(lvItem) id = "path:" + org_code + koEntry.title[:5] if koEntry.title[:5] in path_ids: p = kegg_pathways.get_entry(id) if p is None: # In case the genesets still have obsolete entries name = koEntry.title else: name = p.name genes, p_value, ref = self.pathways[id] item.setText(0, name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id if p is not None else None else: if id in allPathways: text = kegg_pathways.get_entry(id).name else: text = koEntry.title item.setText(0, text) if id in allPathways: item.pathway_id = id elif "path:map" + koEntry.title[:5] in allRefPathways: item.pathway_id = "path:map" + koEntry.title[:5] else: item.pathway_id = None for child in koEntry.entries: if child in allClasses: _walkCreate(child, item) for koEntry in self.koOrthology: if koEntry in allClasses: _walkCreate(koEntry, self.listView) self.listView.update() else: self.listView.setRootIsDecorated(False) pathways = self.pathways.items() pathways = sorted(pathways, key=lambda item: item[1][1]) for id, (genes, p_value, ref) in pathways: item = QTreeWidgetItem(self.listView) item.setText(0, kegg_pathways.get_entry(id).name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id items.append(item) self.bestPValueItem = items and items[0] or None self.listView.expandAll() for i in range(4): self.listView.resizeColumnToContents(i) if self.bestPValueItem: index = self.listView.indexFromItem(self.bestPValueItem) self.listView.selectionModel().select(index, QItemSelectionModel.ClearAndSelect) def UpdatePathwayView(self): items = self.listView.selectedItems() if len(items) > 0: item = items[0] else: item = None self.commit() item = item or self.bestPValueItem if not item or not item.pathway_id: self.pathwayView.SetPathway(None) return def get_kgml_and_image(pathway_id): """Return an initialized KEGGPathway with pre-cached data""" p = kegg.KEGGPathway(pathway_id) p._get_kgml() # makes sure the kgml file is downloaded p._get_image_filename() # makes sure the image is downloaded return (pathway_id, p) self.setEnabled(False) self._pathwayTask = concurrent.Task(function=lambda: get_kgml_and_image(item.pathway_id)) self._pathwayTask.finished.connect(self._onPathwayTaskFinshed) self._executor.submit(self._pathwayTask) def _onPathwayTaskFinshed(self): self.setEnabled(True) pathway_id, self.pathway = self._pathwayTask.result() self.pathwayView.SetPathway(self.pathway, self.pathways.get(pathway_id, [[]])[0]) def UpdatePathwayViewTransform(self): self.pathwayView.updateTransform() def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even alow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information(0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information( 1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance." ) org_code = self.SelectedOrganismCode() def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_genes() # Map 'genes' and 'reference' sets to unique KEGG identifiers unique_genes, _, _ = org.get_unique_gene_ids(set(genes)) unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference)) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict((v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. # Ensure we are using the latest genesets # TODO: ?? Is updating the index enough? serverfiles.update(geneset.sfdomain, "index.pck") kegg_gs_collections = geneset.collections((("KEGG", "pathways"), taxid)) pathways = pathway_enrichment( kegg_gs_collections, unique_genes.keys(), unique_ref_genes.keys(), callback=progress ) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float,)) self._enrichTask = concurrent.Task(function=lambda: run_enrichment(org_code, genes, reference, progress)) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask) def _onEnrichTaskFinished(self): self.setEnabled(True) self.setBlocking(False) try: pathways, org, unique_genes, unique_ref_genes = self._enrichTask.result() except Exception: raise self.progressBarFinished() self.org = org self.genes = unique_genes.keys() self.uniqueGenesDict = unique_genes self.revUniqueGenesDict = dict([(val, key) for key, val in self.uniqueGenesDict.items()]) self.referenceGenes = unique_ref_genes.keys() self.pathways = pathways if not self.pathways: self.warning(0, "No enriched pathways found.") else: self.warning(0) count = len(set(self.queryGenes)) self.infoLabel.setText( "%i unique gene names on input\n" "%i (%.1f%%) genes names matched" % (count, len(unique_genes), 100.0 * len(unique_genes) / count if count else 0.0) ) self.UpdateListView() @Slot(float) def setProgress(self, value): if self.__in_setProgress: return self.__in_setProgress = True self.progressBarSet(value) self.__in_setProgress = False def GeneNamesFromData(self, data): """ Extract and return gene names from `data`. """ if self.useAttrNames: genes = [str(v.name).strip() for v in data.domain.attributes] elif self.geneAttrCandidates: assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates) geneAttr = self.geneAttrCandidates[self.geneAttrIndex] genes = [str(e[geneAttr]) for e in data if not numpy.isnan(e[geneAttr])] else: raise ValueError("No gene names in data.") return genes def SelectedOrganismCode(self): """ Return the selected organism code. """ return self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] def selectAll(self): """ Select all items in the pathway view. """ changed = False scene = self.pathwayView.scene() with disconnected(scene.selectionChanged, self._onSelectionChanged): for item in scene.items(): if item.flags() & QGraphicsItem.ItemIsSelectable and not item.isSelected(): item.setSelected(True) changed = True if changed: self._onSelectionChanged() def _onSelectionChanged(self): # Item selection in the pathwayView/scene has changed self.commit() def commit(self): if self.data: selectedItems = self.pathwayView.scene().selectedItems() selectedGenes = reduce(set.union, [item.marked_objects for item in selectedItems], set()) if self.useAttrNames: selected = [self.data.domain[self.uniqueGenesDict[gene]] for gene in selectedGenes] # newDomain = Orange.data.Domain(selectedVars, 0) data = self.data[:, selected] # data = Orange.data.Table(newDomain, self.data) self.send("Selected Data", data) elif self.geneAttrCandidates: assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates) geneAttr = self.geneAttrCandidates[self.geneAttrIndex] selectedIndices = [] otherIndices = [] for i, ex in enumerate(self.data): names = [ self.revUniqueGenesDict.get(name, None) for name in split_and_strip(str(ex[geneAttr]), ",") ] if any(name and name in selectedGenes for name in names): selectedIndices.append(i) else: otherIndices.append(i) if selectedIndices: selected = self.data[selectedIndices] else: selected = None if otherIndices: other = self.data[otherIndices] else: other = None self.send("Selected Data", selected) self.send("Unselected Data", other) else: self.send("Selected Data", None) self.send("Unselected Data", None) def ClearCache(self): kegg.caching.clear_cache() def onDeleteWidget(self): """ Called before the widget is removed from the canvas. """ super().onDeleteWidget() self.org = None self._executor.shutdown(wait=False) gc.collect() # Force collection (WHY?) def sizeHint(self): return QSize(1024, 720)
class OWKEGGPathwayBrowser(widget.OWWidget): name = "KEGG Pathways" description = "Browse KEGG pathways that include an input set of genes." icon = "../widgets/icons/OWKEGGPathwayBrowser.svg" priority = 8 inputs = [("Data", Orange.data.Table, "SetData", widget.Default), ("Reference", Orange.data.Table, "SetRefData")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Unselected Data", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organismIndex = settings.ContextSetting(0) geneAttrIndex = settings.ContextSetting(0) useAttrNames = settings.ContextSetting(False) autoCommit = settings.Setting(False) autoResize = settings.Setting(True) useReference = settings.Setting(False) showOrthology = settings.Setting(True) Ready, Initializing, Running = 0, 1, 2 def __init__(self, parent=None): super().__init__(parent) self.organismCodes = [] self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") # Organism selection. box = gui.widgetBox(self.controlArea, "Organism") self.organismComboBox = gui.comboBox( box, self, "organismIndex", items=[], callback=self.Update, addSpace=True, tooltip="Select the organism of the input genes") # Selection of genes attribute box = gui.widgetBox(self.controlArea, "Gene attribute") self.geneAttrCandidates = itemmodels.VariableListModel(parent=self) self.geneAttrCombo = gui.comboBox( box, self, "geneAttrIndex", callback=self.Update) self.geneAttrCombo.setModel(self.geneAttrCandidates) gui.checkBox(box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update) self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView) gui.checkBox(self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button(box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged ) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget( allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction( "Select All", self, shortcut=QKeySequence.SelectAll ) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.refData = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float,)) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task) def __initialize_finish(self): if self.__runstate != OWKEGGPathwayBrowser.Initializing: return try: keys, genome = self._genomeTask.result() except Exception as err: self.error(0, str(err)) raise self.progressBarFinished() self.setEnabled(True) self.setBlocking(False) entries = [genome[key] for key in keys] items = [entry.definition for entry in entries] codes = [entry.organism_code for entry in entries] self.organismCodes = codes self.organismComboBox.clear() self.organismComboBox.addItems(items) self.organismComboBox.setCurrentIndex(self.organismIndex) self.infoLabel.setText("No data on input\n") def Clear(self): """ Clear the widget state. """ self.queryGenes = [] self.referenceGenes = [] self.genes = {} self.uniqueGenesDict = {} self.revUniqueGenesDict = {} self.pathways = {} self.org = None self.geneAttrCandidates[:] = [] self.infoLabel.setText("No data on input\n") self.listView.clear() self.pathwayView.SetPathway(None) self.send("Selected Data", None) self.send("Unselected Data", None) def SetData(self, data=None): if self.__runstate == OWKEGGPathwayBrowser.Initializing: self.__initialize_finish() self.data = data self.warning(0) self.error(0) self.information(0) if data is not None: vars = data.domain.variables + data.domain.metas vars = [var for var in vars if isinstance(var, Orange.data.StringVariable)] self.geneAttrCandidates[:] = vars # Try to guess the gene name variable if vars: names_lower = [v.name.lower() for v in vars] scores = [(name == "gene", "gene" in name) for name in names_lower] imax, _ = max(enumerate(scores), key=itemgetter(1)) else: imax = -1 self.geneAttrIndex = imax taxid = data_hints.get_hint(data, TAX_ID, None) if taxid: try: code = kegg.from_taxid(taxid) self.organismIndex = self.organismCodes.index(code) except Exception as ex: print(ex, taxid) self.useAttrNames = data_hints.get_hint(data, GENE_NAME, self.useAttrNames) if len(self.geneAttrCandidates) == 0: self.useAttrNames = True self.geneAttrIndex = -1 else: self.geneAttrIndex = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1) else: self.Clear() self.__invalidated = True def SetRefData(self, data=None): self.refData = data self.information(1) if data is not None and self.useReference: self.__invalidated = True def handleNewSignals(self): if self.__invalidated: self.Update() self.__invalidated = False def UpdateListView(self): self.bestPValueItem = None self.listView.clear() if not self.data: return allPathways = self.org.pathways() allRefPathways = kegg.pathways("map") items = [] kegg_pathways = kegg.KEGGPathways() org_code = self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] if self.showOrthology: self.koOrthology = kegg.KEGGBrite("ko00001") self.listView.setRootIsDecorated(True) path_ids = set([s[-5:] for s in self.pathways.keys()]) def _walkCollect(koEntry): num = koEntry.title[:5] if koEntry.title else None if num in path_ids: return ([koEntry] + reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], [])) else: c = reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []) return c + (c and [koEntry] or []) allClasses = reduce(lambda li1, li2: li1 + li2, [_walkCollect(c) for c in self.koOrthology], []) def _walkCreate(koEntry, lvItem): item = QTreeWidgetItem(lvItem) id = "path:" + org_code + koEntry.title[:5] if koEntry.title[:5] in path_ids: p = kegg_pathways.get_entry(id) if p is None: # In case the genesets still have obsolete entries name = koEntry.title else: name = p.name genes, p_value, ref = self.pathways[id] item.setText(0, name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id if p is not None else None else: if id in allPathways: text = kegg_pathways.get_entry(id).name else: text = koEntry.title item.setText(0, text) if id in allPathways: item.pathway_id = id elif "path:map" + koEntry.title[:5] in allRefPathways: item.pathway_id = "path:map" + koEntry.title[:5] else: item.pathway_id = None for child in koEntry.entries: if child in allClasses: _walkCreate(child, item) for koEntry in self.koOrthology: if koEntry in allClasses: _walkCreate(koEntry, self.listView) self.listView.update() else: self.listView.setRootIsDecorated(False) pathways = self.pathways.items() pathways = sorted(pathways, key=lambda item: item[1][1]) for id, (genes, p_value, ref) in pathways: item = QTreeWidgetItem(self.listView) item.setText(0, kegg_pathways.get_entry(id).name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id items.append(item) self.bestPValueItem = items and items[0] or None self.listView.expandAll() for i in range(4): self.listView.resizeColumnToContents(i) if self.bestPValueItem: index = self.listView.indexFromItem(self.bestPValueItem) self.listView.selectionModel().select( index, QItemSelectionModel.ClearAndSelect ) def UpdatePathwayView(self): items = self.listView.selectedItems() if len(items) > 0: item = items[0] else: item = None self.commit() item = item or self.bestPValueItem if not item or not item.pathway_id: self.pathwayView.SetPathway(None) return def get_kgml_and_image(pathway_id): """Return an initialized KEGGPathway with pre-cached data""" p = kegg.KEGGPathway(pathway_id) p._get_kgml() # makes sure the kgml file is downloaded p._get_image_filename() # makes sure the image is downloaded return (pathway_id, p) self.setEnabled(False) self._pathwayTask = concurrent.Task( function=lambda: get_kgml_and_image(item.pathway_id) ) self._pathwayTask.finished.connect(self._onPathwayTaskFinshed) self._executor.submit(self._pathwayTask) def _onPathwayTaskFinshed(self): self.setEnabled(True) pathway_id, self.pathway = self._pathwayTask.result() self.pathwayView.SetPathway( self.pathway, self.pathways.get(pathway_id, [[]])[0] ) def UpdatePathwayViewTransform(self): self.pathwayView.updateTransform() def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even allow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information(0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames \ and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information(1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance.") org_code = self.SelectedOrganismCode() from orangecontrib.bioinformatics.ncbi.gene import GeneMatcher gm = GeneMatcher(kegg.to_taxid(org_code)) gm.genes = genes gm.run_matcher() mapped_genes = {gene: str(ncbi_id) for gene, ncbi_id in gm.map_input_to_ncbi().items()} def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_ncbi_ids() # This is here just to keep widget working without any major changes. # map not needed, geneMatcher will not work on widget level. unique_genes = genes unique_ref_genes = dict([(gene, gene) for gene in set(reference)]) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict((v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. kegg_api = kegg.api.CachedKeggApi() linkmap = kegg_api.link(org.org_code, "pathway") converted_ids = kegg_api.conv(org.org_code, 'ncbi-geneid') kegg_sets = relation_list_to_multimap(linkmap, dict((gene.upper(), ncbi.split(':')[-1]) for ncbi, gene in converted_ids)) kegg_sets = geneset.GeneSets(input=kegg_sets) pathways = pathway_enrichment( kegg_sets, unique_genes.values(), unique_ref_genes.keys(), callback=progress ) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache( pathways.keys(), progress_callback=progress ) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float,)) self._enrichTask = concurrent.Task( function=lambda: run_enrichment(org_code, mapped_genes, reference, progress) ) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask) def _onEnrichTaskFinished(self): self.setEnabled(True) self.setBlocking(False) try: pathways, org, unique_genes, unique_ref_genes = \ self._enrichTask.result() except Exception: raise self.progressBarFinished() self.org = org self.genes = unique_genes.keys() self.uniqueGenesDict = {ncbi_id: input_name for input_name, ncbi_id in unique_genes.items()} self.revUniqueGenesDict = dict([(val, key) for key, val in self.uniqueGenesDict.items()]) self.referenceGenes = unique_ref_genes.keys() self.pathways = pathways if not self.pathways: self.warning(0, "No enriched pathways found.") else: self.warning(0) count = len(set(self.queryGenes)) self.infoLabel.setText( "%i unique gene names on input\n" "%i (%.1f%%) genes names matched" % (count, len(unique_genes), 100.0 * len(unique_genes) / count if count else 0.0) ) self.UpdateListView() @Slot(float) def setProgress(self, value): if self.__in_setProgress: return self.__in_setProgress = True self.progressBarSet(value) self.__in_setProgress = False def GeneNamesFromData(self, data): """ Extract and return gene names from `data`. """ if self.useAttrNames: genes = [str(v.name).strip() for v in data.domain.attributes] elif self.geneAttrCandidates: assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates) geneAttr = self.geneAttrCandidates[self.geneAttrIndex] genes = [str(e[geneAttr]) for e in data if not numpy.isnan(e[geneAttr])] else: raise ValueError("No gene names in data.") return genes def SelectedOrganismCode(self): """ Return the selected organism code. """ return self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] def selectAll(self): """ Select all items in the pathway view. """ changed = False scene = self.pathwayView.scene() with disconnected(scene.selectionChanged, self._onSelectionChanged): for item in scene.items(): if item.flags() & QGraphicsItem.ItemIsSelectable and \ not item.isSelected(): item.setSelected(True) changed = True if changed: self._onSelectionChanged() def _onSelectionChanged(self): # Item selection in the pathwayView/scene has changed self.commit() def commit(self): if self.data: selectedItems = self.pathwayView.scene().selectedItems() selectedGenes = reduce(set.union, [item.marked_objects for item in selectedItems], set()) if self.useAttrNames: selected = [self.data.domain[self.uniqueGenesDict[gene]] for gene in selectedGenes] # newDomain = Orange.data.Domain(selectedVars, 0) data = self.data[:, selected] # data = Orange.data.Table(newDomain, self.data) self.send("Selected Data", data) elif self.geneAttrCandidates: assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates) geneAttr = self.geneAttrCandidates[self.geneAttrIndex] selectedIndices = [] otherIndices = [] for i, ex in enumerate(self.data): names = [self.revUniqueGenesDict.get(name, None) for name in split_and_strip(str(ex[geneAttr]), ",")] if any(name and name in selectedGenes for name in names): selectedIndices.append(i) else: otherIndices.append(i) if selectedIndices: selected = self.data[selectedIndices] else: selected = None if otherIndices: other = self.data[otherIndices] else: other = None self.send("Selected Data", selected) self.send("Unselected Data", other) else: self.send("Selected Data", None) self.send("Unselected Data", None) def ClearCache(self): kegg.caching.clear_cache() def onDeleteWidget(self): """ Called before the widget is removed from the canvas. """ super().onDeleteWidget() self.org = None self._executor.shutdown(wait=False) gc.collect() # Force collection (WHY?) def sizeHint(self): return QSize(1024, 720)
def __repr__(self): return QTreeWidget.__repr__(self)
def __init__(self, parent=None, signalManager=None, name="PIPAx"): super().__init__(parent) self.selectedExperiments = [] self.buffer = dicty.CacheSQLite(bufferfile) self.searchString = "" self.result_types = [] self.mappings = {} self.controlArea.setMaximumWidth(250) self.controlArea.setMinimumWidth(250) gui.button(self.controlArea, self, "Reload", callback=self.Reload) gui.button(self.controlArea, self, "Clear cache", callback=self.clear_cache) b = gui.widgetBox(self.controlArea, "Experiment Sets") self.selectionSetsWidget = SelectionSetsWidget(self) self.selectionSetsWidget.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Maximum) def store_selections(modified): if not modified: self.storedSelections = self.selectionSetsWidget.selections self.selectionSetsWidget.selectionModified.connect(store_selections) b.layout().addWidget(self.selectionSetsWidget) gui.separator(self.controlArea) b = gui.widgetBox(self.controlArea, "Sort output columns") self.columnsSortingWidget = SortedListWidget(self) self.columnsSortingWidget.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Maximum) def store_sort_order(): self.storedSortingOrder = self.columnsSortingWidget.sortingOrder self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order) b.layout().addWidget(self.columnsSortingWidget) sorting_model = QStringListModel(SORTING_MODEL_LIST) self.columnsSortingWidget.setModel(sorting_model) gui.separator(self.controlArea) box = gui.widgetBox(self.controlArea, 'Expression Type') self.expressionTypesCB = gui.comboBox( box, self, "rtypei", items=[], callback=self.UpdateResultsList) gui.checkBox(self.controlArea, self, "excludeconstant", "Exclude labels with constant values") gui.checkBox(self.controlArea, self, "joinreplicates", "Average replicates (use median)") gui.checkBox(self.controlArea, self, "log2", "Logarithmic (base 2) transformation") self.commit_button = gui.button(self.controlArea, self, "&Commit", callback=self.Commit) self.commit_button.setDisabled(True) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, "Authentication") gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"searchString", "Search", callbackOnType=True, callback=self.SearchUpdate) self.headerLabels = [t[1] for t in HEADER] self.experimentsWidget = QTreeWidget() self.experimentsWidget.setHeaderLabels(self.headerLabels) self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection) self.experimentsWidget.setRootIsDecorated(False) self.experimentsWidget.setSortingEnabled(True) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.experimentsWidget, self.experimentsWidget ) self.experimentsWidget.header().installEventFilter(contextEventFilter) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.setAlternatingRowColors(True) self.experimentsWidget.selectionModel().selectionChanged.connect( self.onSelectionChanged) self.selectionSetsWidget.setSelectionModel( self.experimentsWidget.selectionModel() ) self.mainArea.layout().addWidget(self.experimentsWidget) # Restore the selection states from the stored settings self.selectionSetsWidget.selections = self.storedSelections self.columnsSortingWidget.sortingOrder = self.storedSortingOrder self.restoreHeaderState() self.experimentsWidget.header().geometriesChanged.connect( self.saveHeaderState) self.dbc = None self.AuthSet() QTimer.singleShot(100, self.UpdateExperiments)
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox( box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox( box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=["Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference"], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit( self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter(contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")] ) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor( parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable)] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint( data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key,) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get( (full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name,) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount())] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [m for m, use in zip(matchers, self.geneMatcherSettings) if use] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list(filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float,)) info = methodinvoke(self, "_setRunInfo", (str,)) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels( ["Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment"]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item(), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit ) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView) ) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.error(0,) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def filterAnnotationsChartView(self, filterString=""): if self.__state & OWSetEnrichment.RunningEnrichment: return # TODO: Move filtering to a filter proxy model # TODO: Re-enable string search categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) # filterString = str(self.filterLineEdit.text()).lower() model = self.annotationsChartView.model() def ishidden(index): # Is item at index (row) hidden item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [model.item(i, 4).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden] fdrs = utils.stats.FDR(pvals) # update FDR for the selected collections and apply filtering rules itemsHidden = [] fdriter = iter(fdrs) for index, hidden in enumerate(hidemask): if not hidden: fdr = next(fdriter) pval = model.index(index, 4).data(Qt.UserRole) count = model.index(index, 2).data(Qt.ToolTipRole) hidden = (self.useMinCountFilter and count < self.minClusterCount) or \ (self.useMaxPValFilter and pval > self.maxPValue) or \ (self.useMaxFDRFilter and fdr > self.maxFDR) if not hidden: fdr_item = model.item(index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) self.annotationsChartView.setRowHidden( index, QModelIndex(), hidden) itemsHidden.append(hidden) if model.rowCount() and all(itemsHidden): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.annotationsChartView.model() rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped] newdomain = Orange.data.Domain( mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWGOEnrichmentAnalysis(widget.OWWidget): name = "GO Browser" description = "Enrichment analysis for Gene Ontology terms." icon = "../widgets/icons/GOBrowser.svg" priority = 2020 inputs = [("Cluster Data", Orange.data.Table, "setDataset", widget.Single + widget.Default), ("Reference Data", Orange.data.Table, "setReferenceDataset")] outputs = [("Data on Selected Genes", Orange.data.Table), ("Data on Unselected Genes", Orange.data.Table), ("Data on Unknown Genes", Orange.data.Table), ("Enrichment Report", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() annotationIndex = settings.ContextSetting(0) geneAttrIndex = settings.ContextSetting(0) useAttrNames = settings.ContextSetting(False) geneMatcherSettings = settings.Setting([True, False, False, False]) useReferenceDataset = settings.Setting(False) aspectIndex = settings.Setting(0) useEvidenceType = settings.Setting( {et: True for et in go.evidenceTypesOrdered}) filterByNumOfInstances = settings.Setting(False) minNumOfInstances = settings.Setting(1) filterByPValue = settings.Setting(True) maxPValue = settings.Setting(0.2) filterByPValue_nofdr = settings.Setting(False) maxPValue_nofdr = settings.Setting(0.01) probFunc = settings.Setting(0) selectionDirectAnnotation = settings.Setting(0) selectionDisjoint = settings.Setting(0) selectionAddTermAsClass = settings.Setting(0) Ready, Initializing, Running = 0, 1, 2 def __init__(self, parent=None): super().__init__(self, parent) self.clusterDataset = None self.referenceDataset = None self.ontology = None self.annotations = None self.loadedAnnotationCode = "---" self.treeStructRootKey = None self.probFunctions = [stats.Binomial(), stats.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = OWGOEnrichmentAnalysis.Initializing self.annotationCodes = [] ############# ## GUI ############# self.tabs = gui.tabWidget(self.controlArea) ## Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button(box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") box = gui.widgetBox(self.inputTab, "Organism") self.annotationComboBox = gui.comboBox( box, self, "annotationIndex", items=self.annotationCodes, callback=self._updateEnrichment, tooltip="Select organism") genebox = gui.widgetBox(self.inputTab, "Gene Names") self.geneAttrIndexCombo = gui.comboBox( genebox, self, "geneAttrIndex", callback=self._updateEnrichment, tooltip="Use this attribute to extract gene names from input data") self.geneAttrIndexCombo.setDisabled(self.useAttrNames) cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names", tooltip="Use column names for gene names", callback=self._updateEnrichment) cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled) gui.button(genebox, self, "Gene matcher settings", callback=self.UpdateGeneMatcher, tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=["Use entire genome for reference", "Use genes from Referece Examples input signal as reference"], box="Reference", callback=self._updateEnrichment) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self._updateEnrichment) ## Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox(box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") #use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=["Use binomial distribution test", "Use hypergeometric distribution test"], callback=self._updateEnrichment) box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox( etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb ## Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox( self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=["All selected genes", "Term-specific genes", "Common term genes"], tooltips=["Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms"], callback=[self.ExampleSelection, self.UpdateAddClassButton]) self.addClassCB = gui.checkBox( box, self, "selectionAddTermAsClass", "Add GO Term as class", callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment'] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) self.setBlocking(True) self._executor = ThreadExecutor() self._init = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), ("GO", "taxonomy.pickle")] ) self._init.finished.connect(self.__initialize_finish) self._executor.submit(self._init) def sizeHint(self): return QSize(1000, 700) def __initialize_finish(self): self.setBlocking(False) try: self.annotationFiles = listAvailable() except ConnectTimeout: self.error(2, "Internet connection error, unable to load data. " + \ "Check connection and create a new GO Browser widget.") self.filterTab.setEnabled(False) self.inputTab.setEnabled(False) self.selectTab.setEnabled(False) self.listView.setEnabled(False) self.sigTerms.setEnabled(False) else: self.annotationCodes = sorted(self.annotationFiles.keys()) self.annotationComboBox.clear() self.annotationComboBox.addItems(self.annotationCodes) self.annotationComboBox.setCurrentIndex(self.annotationIndex) self.__state = OWGOEnrichmentAnalysis.Ready def __on_evidenceChanged(self): for etype, cb in self.evidenceCheckBoxDict.items(): self.useEvidenceType[etype] = cb.isChecked() self._updateEnrichment() def UpdateGeneMatcher(self): """Open the Gene matcher settings dialog.""" dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, modal=True) if dialog.exec_() != QDialog.Rejected: self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items] if self.annotations: self.SetGeneMatcher() self._updateEnrichment() def clear(self): self.infoLabel.setText("No data on input\n") self.warning(0) self.warning(1) self.geneAttrIndexCombo.clear() self.ClearGraph() self.send("Data on Selected Genes", None) self.send("Data on Unselected Genes", None) self.send("Data on Unknown Genes", None) self.send("Enrichment Report", None) def setDataset(self, data=None): if self.__state == OWGOEnrichmentAnalysis.Initializing: self.__initialize_finish() self.closeContext() self.clear() self.clusterDataset = data if data is not None: domain = data.domain allvars = domain.variables + domain.metas self.candidateGeneAttrs = [var for var in allvars if isstring(var)] self.geneAttrIndexCombo.clear() for var in self.candidateGeneAttrs: self.geneAttrIndexCombo.addItem(*gui.attributeItem(var)) taxid = data_hints.get_hint(data, "taxid", "") code = None try: code = go.from_taxid(taxid) except KeyError: pass except Exception as ex: print(ex) if code is not None: filename = "gene_association.%s.tar.gz" % code if filename in self.annotationFiles.values(): self.annotationIndex = \ [i for i, name in enumerate(self.annotationCodes) \ if self.annotationFiles[name] == filename].pop() self.useAttrNames = data_hints.get_hint(data, "genesinrows", self.useAttrNames) self.openContext(data) self.geneAttrIndex = min(self.geneAttrIndex, len(self.candidateGeneAttrs) - 1) if len(self.candidateGeneAttrs) == 0: self.useAttrNames = True self.geneAttrIndex = -1 elif self.geneAttrIndex < len(self.candidateGeneAttrs): self.geneAttrIndex = len(self.candidateGeneAttrs) - 1 self._updateEnrichment() def setReferenceDataset(self, data=None): self.referenceDataset = data self.referenceRadioBox.buttons[1].setDisabled(not bool(data)) self.referenceRadioBox.buttons[1].setText("Reference set") if self.clusterDataset is not None and self.useReferenceDataset: self.useReferenceDataset = 0 if not data else 1 graph = self.Enrichment() self.SetGraph(graph) elif self.clusterDataset: self.__updateReferenceSetButton() def handleNewSignals(self): super().handleNewSignals() def _updateEnrichment(self): if self.clusterDataset is not None and \ self.__state == OWGOEnrichmentAnalysis.Ready: pb = gui.ProgressBar(self, 100) self.Load(pb=pb) graph = self.Enrichment(pb=pb) self.FilterUnknownGenes() self.SetGraph(graph) def __updateReferenceSetButton(self): allgenes, refgenes = None, None if self.referenceDataset: try: allgenes = self.genesFromTable(self.referenceDataset) except Exception: allgenes = [] refgenes, unknown = self.FilterAnnotatedGenes(allgenes) self.referenceRadioBox.buttons[1].setDisabled(not bool(allgenes)) self.referenceRadioBox.buttons[1].setText("Reference set " + ("(%i genes, %i matched)" % (len(allgenes), len(refgenes)) if allgenes and refgenes else "")) def genesFromTable(self, data): if self.useAttrNames: genes = [v.name for v in data.domain.variables] else: attr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs) - 1)] genes = [str(ex[attr]) for ex in data if not numpy.isnan(ex[attr])] if any("," in gene for gene in genes): self.information(0, "Separators detected in gene names. Assuming multiple genes per example.") genes = reduce(operator.iadd, (genes.split(",") for genes in genes), []) return genes def FilterAnnotatedGenes(self, genes): matchedgenes = self.annotations.get_gene_names_translator(genes).values() return matchedgenes, [gene for gene in genes if gene not in matchedgenes] def FilterUnknownGenes(self): if not self.useAttrNames and self.candidateGeneAttrs: geneAttr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs)-1)] indices = [] for i, ex in enumerate(self.clusterDataset): if not any(self.annotations.genematcher.match(n.strip()) for n in str(ex[geneAttr]).split(",")): indices.append(i) if indices: data = self.clusterDataset[indices] else: data = None self.send("Data on Unknown Genes", data) else: self.send("Data on Unknown Genes", None) def Load(self, pb=None): if self.__state == OWGOEnrichmentAnalysis.Ready: go_files, tax_files = serverfiles.listfiles("GO"), serverfiles.listfiles("Taxonomy") calls = [] pb, finish = (gui.ProgressBar(self, 0), True) if pb is None else (pb, False) count = 0 if not tax_files: calls.append(("Taxonomy", "ncbi_taxnomy.tar.gz")) count += 1 org = self.annotationCodes[min(self.annotationIndex, len(self.annotationCodes)-1)] if org != self.loadedAnnotationCode: count += 1 if self.annotationFiles[org] not in go_files: calls.append(("GO", self.annotationFiles[org])) count += 1 if "gene_ontology_edit.obo.tar.gz" not in go_files: calls.append(("GO", "gene_ontology_edit.obo.tar.gz")) count += 1 if not self.ontology: count += 1 pb.iter += count * 100 for args in calls: serverfiles.localpath_download(*args, **dict(callback=pb.advance)) i = len(calls) if not self.ontology: self.ontology = go.Ontology(progress_callback=lambda value: pb.advance()) i += 1 if org != self.loadedAnnotationCode: self.annotations = None gc.collect() # Force run garbage collection code = self.annotationFiles[org].split(".")[-3] self.annotations = go.Annotations(code, genematcher=gene.GMDirect(), progress_callback=lambda value: pb.advance()) i += 1 self.loadedAnnotationCode = org count = defaultdict(int) geneSets = defaultdict(set) for anno in self.annotations.annotations: count[anno.evidence] += 1 geneSets[anno.evidence].add(anno.geneName) for etype in go.evidenceTypesOrdered: ecb = self.evidenceCheckBoxDict[etype] ecb.setEnabled(bool(count[etype])) ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype]))) if finish: pb.finish() def SetGeneMatcher(self): if self.annotations: taxid = self.annotations.taxid matchers = [] for matcher, use in zip([gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy], self.geneMatcherSettings): if use: try: if taxid == "352472": matchers.extend([matcher(taxid), gene.GMDicty(), [matcher(taxid), gene.GMDicty()]]) # The reason machers are duplicated is that we want `matcher` or `GMDicty` to # match genes by them self if possible. Only use the joint matcher if they fail. else: matchers.append(matcher(taxid)) except Exception as ex: print(ex) self.annotations.genematcher = gene.matcher(matchers) self.annotations.genematcher.set_targets(self.annotations.gene_names) def Enrichment(self, pb=None): assert self.clusterDataset is not None pb = gui.ProgressBar(self, 100) if pb is None else pb if not self.annotations.ontology: self.annotations.ontology = self.ontology if isinstance(self.annotations.genematcher, gene.GMDirect): self.SetGeneMatcher() self.error(1) self.warning([0, 1]) if self.useAttrNames: clusterGenes = [v.name for v in self.clusterDataset.domain.attributes] self.information(0) elif 0 <= self.geneAttrIndex < len(self.candidateGeneAttrs): geneAttr = self.candidateGeneAttrs[self.geneAttrIndex] clusterGenes = [str(ex[geneAttr]) for ex in self.clusterDataset if not numpy.isnan(ex[geneAttr])] if any("," in gene for gene in clusterGenes): self.information(0, "Separators detected in cluster gene names. Assuming multiple genes per example.") clusterGenes = reduce(operator.iadd, (genes.split(",") for genes in clusterGenes), []) else: self.information(0) else: self.error(1, "Failed to extract gene names from input dataset!") return {} genesSetCount = len(set(clusterGenes)) self.clusterGenes = clusterGenes = self.annotations.get_gene_names_translator(clusterGenes).values() self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (genesSetCount, len(clusterGenes), 100.0*len(clusterGenes)/genesSetCount if genesSetCount else 0.0)) referenceGenes = None if not self.useReferenceDataset or self.referenceDataset is None: self.information(2) self.information(1) referenceGenes = self.annotations.gene_names elif self.referenceDataset is not None: if self.useAttrNames: referenceGenes = [v.name for v in self.referenceDataset.domain.attributes] self.information(1) elif geneAttr in (self.referenceDataset.domain.variables + self.referenceDataset.domain.metas): referenceGenes = [str(ex[geneAttr]) for ex in self.referenceDataset if not numpy.isnan(ex[geneAttr])] if any("," in gene for gene in clusterGenes): self.information(1, "Separators detected in reference gene names. Assuming multiple genes per example.") referenceGenes = reduce(operator.iadd, (genes.split(",") for genes in referenceGenes), []) else: self.information(1) else: self.information(1) referenceGenes = None if referenceGenes is None: referenceGenes = list(self.annotations.gene_names) self.referenceRadioBox.buttons[1].setText("Reference set") self.referenceRadioBox.buttons[1].setDisabled(True) self.information(2, "Unable to extract gene names from reference dataset. Using entire genome for reference") self.useReferenceDataset = 0 else: refc = len(referenceGenes) referenceGenes = self.annotations.get_gene_names_translator(referenceGenes).values() self.referenceRadioBox.buttons[1].setText("Reference set (%i genes, %i matched)" % (refc, len(referenceGenes))) self.referenceRadioBox.buttons[1].setDisabled(False) self.information(2) else: self.useReferenceDataset = 0 if not referenceGenes: self.error(1, "No valid reference set") return {} self.referenceGenes = referenceGenes evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) aspect = ["P", "C", "F"][self.aspectIndex] if clusterGenes: self.terms = terms = self.annotations.get_enriched_terms( clusterGenes, referenceGenes, evidences, aspect=aspect, prob=self.probFunctions[self.probFunc], use_fdr=False, progress_callback=lambda value: pb.advance()) ids = [] pvals = [] for i, d in self.terms.items(): ids.append(i) pvals.append(d[1]) for i, fdr in zip(ids, stats.FDR(pvals)): # save FDR as the last part of the tuple terms[i] = tuple(list(terms[i]) + [ fdr ]) else: self.terms = terms = {} if not self.terms: self.warning(0, "No enriched terms found.") else: self.warning(0) pb.finish() self.treeStructDict = {} ids = self.terms.keys() self.treeStructRootKey = None parents = {} for id in ids: parents[id] = set([term for _, term in self.ontology[id].related]) children = {} for term in self.terms: children[term] = set([id for id in ids if term in parents[id]]) for term in self.terms: self.treeStructDict[term] = TreeNode(self.terms[term], children[term]) if not self.ontology[term].related and not getattr(self.ontology[term], "is_obsolete", False): self.treeStructRootKey = term return terms def FilterGraph(self, graph): if self.filterByPValue_nofdr: graph = go.filterByPValue(graph, self.maxPValue_nofdr) if self.filterByPValue: #FDR graph = dict(filter(lambda item: item[1][3] <= self.maxPValue, graph.items())) if self.filterByNumOfInstances: graph = dict(filter(lambda item: len(item[1][0]) >= self.minNumOfInstances, graph.items())) return graph def FilterAndDisplayGraph(self): if self.clusterDataset: self.graph = self.FilterGraph(self.originalGraph) if self.originalGraph and not self.graph: self.warning(1, "All found terms were filtered out.") else: self.warning(1) self.ClearGraph() self.DisplayGraph() def SetGraph(self, graph=None): self.originalGraph = graph if graph: self.FilterAndDisplayGraph() else: self.graph = {} self.ClearGraph() def ClearGraph(self): self.listView.clear() self.listViewItems=[] self.sigTerms.clear() def DisplayGraph(self): fromParentDict = {} self.termListViewItemDict = {} self.listViewItems = [] enrichment = lambda t: len(t[0]) / t[2] * (len(self.referenceGenes) / len(self.clusterGenes)) maxFoldEnrichment = max([enrichment(term) for term in self.graph.values()] or [1]) def addNode(term, parent, parentDisplayNode): if (parent, term) in fromParentDict: return if term in self.graph: displayNode = GOTreeWidgetItem(self.ontology[term], self.graph[term], len(self.clusterGenes), len(self.referenceGenes), maxFoldEnrichment, parentDisplayNode) displayNode.goId = term self.listViewItems.append(displayNode) if term in self.termListViewItemDict: self.termListViewItemDict[term].append(displayNode) else: self.termListViewItemDict[term] = [displayNode] fromParentDict[(parent, term)] = True parent = term else: displayNode = parentDisplayNode for c in self.treeStructDict[term].children: addNode(c, parent, displayNode) if self.treeStructDict: addNode(self.treeStructRootKey, None, self.listView) terms = self.graph.items() terms = sorted(terms, key=lambda item: item[1][1]) self.sigTableTermsSorted = [t[0] for t in terms] self.sigTerms.clear() for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms): item = GOTreeWidgetItem(self.ontology[t_id], (genes, p_value, refCount, fdr), len(self.clusterGenes), len(self.referenceGenes), maxFoldEnrichment, self.sigTerms) item.goId = t_id self.listView.expandAll() for i in range(5): self.listView.resizeColumnToContents(i) self.sigTerms.resizeColumnToContents(i) self.sigTerms.resizeColumnToContents(6) width = min(self.listView.columnWidth(0), 350) self.listView.setColumnWidth(0, width) self.sigTerms.setColumnWidth(0, width) # Create and send the enrichemnt report table. termsDomain = Orange.data.Domain( [], [], # All is meta! [Orange.data.StringVariable("GO Term Id"), Orange.data.StringVariable("GO Term Name"), Orange.data.ContinuousVariable("Cluster Frequency"), Orange.data.ContinuousVariable("Genes in Cluster", number_of_decimals=0), Orange.data.ContinuousVariable("Reference Frequency"), Orange.data.ContinuousVariable("Genes in Reference", number_of_decimals=0), Orange.data.ContinuousVariable("p-value"), Orange.data.ContinuousVariable("FDR"), Orange.data.ContinuousVariable("Enrichment"), Orange.data.StringVariable("Genes")]) terms = [[t_id, self.ontology[t_id].name, len(genes) / len(self.clusterGenes), len(genes), r_count / len(self.referenceGenes), r_count, p_value, fdr, len(genes) / len(self.clusterGenes) * \ len(self.referenceGenes) / r_count, ",".join(genes) ] for t_id, (genes, p_value, r_count, fdr) in terms] if terms: X = numpy.empty((len(terms), 0)) M = numpy.array(terms, dtype=object) termsTable = Orange.data.Table.from_numpy(termsDomain, X, metas=M) else: termsTable = Orange.data.Table(termsDomain) self.send("Enrichment Report", termsTable) def ViewSelectionChanged(self): if self.selectionChanging: return self.selectionChanging = 1 self.selectedTerms = [] selected = self.listView.selectedItems() self.selectedTerms = list(set([lvi.term.id for lvi in selected])) self.ExampleSelection() self.selectionChanging = 0 def TableSelectionChanged(self): if self.selectionChanging: return self.selectionChanging = 1 self.selectedTerms = [] selectedIds = set([self.sigTerms.itemFromIndex(index).goId for index in self.sigTerms.selectedIndexes()]) for i in range(self.sigTerms.topLevelItemCount()): item = self.sigTerms.topLevelItem(i) selected = item.goId in selectedIds term = item.goId if selected: self.selectedTerms.append(term) for lvi in self.termListViewItemDict[term]: try: lvi.setSelected(selected) if selected: lvi.setExpanded(True) except RuntimeError: # Underlying C/C++ object deleted pass self.ExampleSelection() self.selectionChanging = 0 def UpdateAddClassButton(self): self.addClassCB.setEnabled(self.selectionDisjoint == 1) def ExampleSelection(self): self.commit() def commit(self): if self.clusterDataset is None: return terms = set(self.selectedTerms) genes = reduce(operator.ior, (set(self.graph[term][0]) for term in terms), set()) evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: # if getattr(self, "useEvidence" + etype): evidences.append(etype) allTerms = self.annotations.get_annotated_terms( genes, direct_annotation_only=self.selectionDirectAnnotation, evidence_codes=evidences) if self.selectionDisjoint > 0: count = defaultdict(int) for term in self.selectedTerms: for g in allTerms.get(term, []): count[g] += 1 ccount = 1 if self.selectionDisjoint == 1 else len(self.selectedTerms) selectedGenes = [gene for gene, c in count.items() if c == ccount and gene in genes] else: selectedGenes = reduce( operator.ior, (set(allTerms.get(term, [])) for term in self.selectedTerms), set()) if self.useAttrNames: vars = [self.clusterDataset.domain[gene] for gene in set(selectedGenes)] domain = Orange.data.Domain( vars, self.clusterDataset.domain.class_vars, self.clusterDataset.domain.metas) newdata = self.clusterDataset.from_table(domain, self.clusterDataset) self.send("Data on Selected Genes", newdata) self.send("Data on Unselected Genes", None) elif self.candidateGeneAttrs: selectedExamples = [] unselectedExamples = [] geneAttr = self.candidateGeneAttrs[min(self.geneAttrIndex, len(self.candidateGeneAttrs)-1)] if self.selectionDisjoint == 1: goVar = Orange.data.DiscreteVariable( "GO Term", values=list(self.selectedTerms)) newDomain = Orange.data.Domain( self.clusterDataset.domain.variables, goVar, self.clusterDataset.domain.metas) goColumn = [] for i, ex in enumerate(self.clusterDataset): if not numpy.isnan(ex[geneAttr]) and any(gene in selectedGenes for gene in str(ex[geneAttr]).split(",")): if self.selectionDisjoint == 1 and self.selectionAddTermAsClass: terms = filter(lambda term: any(gene in self.graph[term][0] for gene in str(ex[geneAttr]).split(",")) , self.selectedTerms) term = sorted(terms)[0] goColumn.append(goVar.values.index(term)) selectedExamples.append(i) else: unselectedExamples.append(i) if selectedExamples: selectedExamples = self.clusterDataset[selectedExamples] if self.selectionDisjoint == 1 and self.selectionAddTermAsClass: selectedExamples = Orange.data.Table.from_table(newDomain, selectedExamples) view, issparse = selectedExamples.get_column_view(goVar) assert not issparse view[:] = goColumn else: selectedExamples = None if unselectedExamples: unselectedExamples = self.clusterDataset[unselectedExamples] else: unselectedExamples = None self.send("Data on Selected Genes", selectedExamples) self.send("Data on Unselected Genes", unselectedExamples) def ShowInfo(self): dialog = QDialog(self) dialog.setModal(False) dialog.setLayout(QVBoxLayout()) label = QLabel(dialog) label.setText("Ontology:\n" + self.ontology.header if self.ontology else "Ontology not loaded!") dialog.layout().addWidget(label) label = QLabel(dialog) label.setText("Annotations:\n" + self.annotations.header.replace("!", "") if self.annotations else "Annotations not loaded!") dialog.layout().addWidget(label) dialog.show() def onDeleteWidget(self): """Called before the widget is removed from the canvas. """ self.annotations = None self.ontology = None gc.collect() # Force collection
def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox( box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox( box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=["Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference"], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit( self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter(contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")] ) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor( parent=self, threadPool=QThreadPool(self)) self._executor.submit(task)
class OWDatabasesUpdate(OWWidget): name = "Databases Update" description = "Update local systems biology databases." icon = "../widgets/icons/OWDatabasesUpdate.svg" priority = 1 inputs = [] outputs = [] want_main_area = False def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels( ["", "Data Source", "Update", "Last Updated", "Size"]) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(1, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.SearchUpdate) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.UpdateAll, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.DownloadFiltered, tooltip="Download all filtered files shown" ) self.cancelButton = gui.button( box, self, "Cancel", callback=self.Cancel, tooltip="Cancel scheduled downloads/updates." ) self.retryButton = gui.button( box, self, "Reconnect", callback=self.RetrieveFilesList ) self.retryButton.hide() gui.rubber(box) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.updateItems = [] self.resize(800, 600) self.progress = ProgressState(self, maximum=3) self.progress.valueChanged.connect(self._updateProgress) self.progress.rangeChanged.connect(self._updateProgress) self.executor = ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) task = Task(self, function=self.RetrieveFilesList) task.exceptionReady.connect(self.HandleError) task.start() self._tasks = [] self._haveProgress = False def RetrieveFilesList(self): self.retryButton.hide() self.warning(0) self.progress.setRange(0, 3) task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance"))) task.resultReady.connect(self.SetFilesList) task.exceptionReady.connect(self.HandleError) self.executor.submit(task) self.setEnabled(False) def SetFilesList(self, serverInfo): """ Set the files to show. """ self.setEnabled(True) localInfo = serverfiles.allinfo() all_tags = set() self.filesView.clear() self.updateItems = [] for item in join_info_dict(localInfo, serverInfo): tree_item = UpdateTreeWidgetItem(item) options_widget = UpdateOptionsWidget(item.state) options_widget.item = item options_widget.installClicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) options_widget.removeClicked.connect( partial(self.SubmitRemoveTask, item.domain, item.filename) ) self.updateItems.append((item, tree_item, options_widget)) all_tags.update(item.tags) self.filesView.addTopLevelItems( [tree_item for _, tree_item, _ in self.updateItems] ) for item, tree_item, options_widget in self.updateItems: self.filesView.setItemWidget(tree_item, 0, options_widget) # Add an update button if the file is updateable if item.state == OUTDATED: button = QToolButton( None, text="Update", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) self.progress.advance() self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0)) for column in range(1, 4): contents_hint = self.filesView.sizeHintForColumn(column) header_hint = self.filesView.header().sectionSizeHint(column) width = max(min(contents_hint, 400), header_hint) self.filesView.setColumnWidth(column, width) hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")] self.completer.setTokenList(hints) self.SearchUpdate() self.UpdateInfoLabel() self.toggleButtons() self.cancelButton.setEnabled(False) self.progress.setRange(0, 0) def buttonCheck(self, selected_items, state, button): for item in selected_items: if item.state != state: button.setEnabled(False) else: button.setEnabled(True) break def toggleButtons(self): selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] self.buttonCheck(selected_items, OUTDATED, self.updateButton) self.buttonCheck(selected_items, AVAILABLE, self.downloadButton) def HandleError(self, exception): if isinstance(exception, ConnectionError): self.warning(0, "Could not connect to server! Check your connection " "and try to reconnect.") self.SetFilesList({}) self.retryButton.show() else: sys.excepthook(type(exception), exception, None) self.progress.setRange(0, 0) self.setEnabled(True) def UpdateInfoLabel(self): local = [item for item, tree_item, _ in self.updateItems if item.state != AVAILABLE and not tree_item.isHidden()] size = sum(float(item.size) for item in local) onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] sizeOnServer = sum(float(item.size) for item in onServer) text = ("%i items, %s (on server: %i items, %s)" % (len(local), serverfiles.sizeformat(size), len(onServer), serverfiles.sizeformat(sizeOnServer))) self.infoLabel.setText(text) def UpdateAll(self): self.warning(0) for item, tree_item, _ in self.updateItems: if item.state == OUTDATED and not tree_item.isHidden(): self.SubmitDownloadTask(item.domain, item.filename) def DownloadFiltered(self): # TODO: submit items in the order shown. for item, tree_item, _ in self.updateItems: if not tree_item.isHidden() and item.state in \ [AVAILABLE, OUTDATED]: self.SubmitDownloadTask(item.domain, item.filename) def SearchUpdate(self, searchString=None): strings = str(self.lineEditFilter.text()).split() for item, tree_item, _ in self.updateItems: hide = not all(UpdateItem_match(item, string) for string in strings) tree_item.setHidden(hide) self.UpdateInfoLabel() self.toggleButtons() def SubmitDownloadTask(self, domain, filename): """ Submit the (domain, filename) to be downloaded/updated. """ self.cancelButton.setEnabled(True) index = self.updateItemIndex(domain, filename) _, tree_item, opt_widget = self.updateItems[index] task = DownloadTask(domain, filename, serverfiles.LOCALFILES) self.progress.adjustRange(0, 100) pb = ItemProgressBar(self.filesView) pb.setRange(0, 100) pb.setTextVisible(False) task.advanced.connect(pb.advance) task.advanced.connect(self.progress.advance) task.finished.connect(pb.hide) task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection) task.exception.connect(self.onDownloadError, Qt.QueuedConnection) self.filesView.setItemWidget(tree_item, 2, pb) # Clear the text so it does not show behind the progress bar. tree_item.setData(2, Qt.DisplayRole, "") pb.show() # Disable the options widget opt_widget.setEnabled(False) self._tasks.append(task) self.executor.submit(task) def EndDownloadTask(self, task): future = task.future() index = self.updateItemIndex(task.domain, task.filename) item, tree_item, opt_widget = self.updateItems[index] self.filesView.removeItemWidget(tree_item, 2) opt_widget.setEnabled(True) if future.cancelled(): # Restore the previous state tree_item.setUpdateItem(item) opt_widget.setState(item.state) elif future.exception(): tree_item.setUpdateItem(item) opt_widget.setState(item.state) # Show the exception string in the size column. self.warning(0, "Error while downloading. Check your connection " "and retry.") # recreate button for download button = QToolButton( None, text="Retry", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) else: # get the new updated info dict and replace the the old item self.warning(0) info = serverfiles.info(item.domain, item.filename) new_item = update_item_from_info(item.domain, item.filename, info, info) self.updateItems[index] = (new_item, tree_item, opt_widget) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.UpdateInfoLabel() def SubmitRemoveTask(self, domain, filename): serverfiles.LOCALFILES.remove(domain, filename) index = self.updateItemIndex(domain, filename) item, tree_item, opt_widget = self.updateItems[index] if item.info_server: new_item = item._replace(state=AVAILABLE, local=None, info_local=None) else: new_item = item._replace(local=None, info_local=None) # Disable the options widget. No more actions can be performed # for the item. opt_widget.setEnabled(False) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.updateItems[index] = (new_item, tree_item, opt_widget) self.UpdateInfoLabel() def Cancel(self): """ Cancel all pending update/download tasks (that have not yet started). """ for task in self._tasks: task.future().cancel() def onDeleteWidget(self): self.Cancel() self.executor.shutdown(wait=False) OWWidget.onDeleteWidget(self) def onDownloadFinished(self): # on download completed/canceled/error assert QThread.currentThread() is self.thread() for task in list(self._tasks): future = task.future() if future.done(): self.EndDownloadTask(task) self._tasks.remove(task) if not self._tasks: # Clear/reset the overall progress self.progress.setRange(0, 0) self.cancelButton.setEnabled(False) def onDownloadError(self, exc_info): sys.excepthook(*exc_info) self.warning(0, "Error while downloading. Check your connection and " "retry.") def updateItemIndex(self, domain, filename): for i, (item, _, _) in enumerate(self.updateItems): if item.domain == domain and item.filename == filename: return i raise ValueError("%r, %r not in update list" % (domain, filename)) def _updateProgress(self, *args): rmin, rmax = self.progress.range() if rmin != rmax: if not self._haveProgress: self._haveProgress = True self.progressBarInit() self.progressBarSet(self.progress.ratioCompleted() * 100, processEvents=None) if rmin == rmax: self._haveProgress = False self.progressBarFinished()
class OWKEGGPathwayBrowser(widget.OWWidget): name = "KEGG Pathways" description = "Browse KEGG pathways that include an input set of genes." icon = "../widgets/icons/OWKEGGPathwayBrowser.svg" priority = 70 inputs = [("Data", Orange.data.Table, "SetData", widget.Default), ("Reference", Orange.data.Table, "SetRefData")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Unselected Data", Orange.data.Table)] autoCommit = settings.Setting(False) autoResize = settings.Setting(True) useReference = settings.Setting(False) showOrthology = settings.Setting(True) Ready, Initializing, Running = 0, 1, 2 class Error(widget.OWWidget.Error): missing_annotation = widget.Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = widget.Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = widget.Msg(ERROR_ON_MISSING_TAX_ID) def __init__(self, parent=None): super().__init__(parent) self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox( self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView, ) gui.checkBox( self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform, ) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button( box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False, ) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget(allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None self.ref_data = None self.ref_genes = [] self.ref_tax_id = None self.ref_use_attr_names = None self.ref_gene_id_attribute = None self.ref_gene_id_column = None self.pathways = {} self.ncbi_gene_map = [] self.org = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float, )) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task) def __initialize_finish(self): if self.__runstate != OWKEGGPathwayBrowser.Initializing: return try: keys, genome = self._genomeTask.result() except Exception as err: self.error(0, str(err)) raise self.progressBarFinished() self.setEnabled(True) self.setBlocking(False) self.infoLabel.setText("No data on input\n") def clear(self): """ Clear the widget state. """ self.pathways = {} self.org = None self.infoLabel.setText("No data on input\n") self.listView.clear() self.pathwayView.SetPathway(None) self.send("Selected Data", None) self.send("Unselected Data", None) def SetData(self, data=None): if self.__runstate == OWKEGGPathwayBrowser.Initializing: self.__initialize_finish() self.Error.clear() if data: self.data = data self.tax_id = str(self.data.attributes.get(TAX_ID, None)) self.use_attr_names = self.data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.data.attributes.get( GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.data.attributes.get( GENE_ID_COLUMN, None) if not (self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return self.warning(0) self.error(0) self.information(0) self.__invalidated = True else: self.clear() def SetRefData(self, data=None): self.information(1) if data is not None and self.useReference: self.ref_data = data self.ref_tax_id = str(self.ref_data.attributes.get(TAX_ID, None)) self.ref_use_attr_names = self.ref_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.ref_gene_id_attribute = self.ref_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.ref_gene_id_column = self.ref_data.attributes.get( GENE_ID_COLUMN, None) if not (self.ref_use_attr_names is not None and ((self.ref_gene_id_attribute is None) ^ (self.ref_gene_id_column is None))): if self.ref_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.ref_tax_id is None: self.Error.missing_tax_id() return self.__invalidated = True def handleNewSignals(self): if self.__invalidated: self.Update() self.__invalidated = False def UpdateListView(self): self.bestPValueItem = None self.listView.clear() if not self.data: return allPathways = self.org.pathways() allRefPathways = kegg.pathways("map") items = [] kegg_pathways = kegg.KEGGPathways() org_code = self.org.org_code if self.showOrthology: self.koOrthology = kegg.KEGGBrite("ko00001") self.listView.setRootIsDecorated(True) path_ids = {s[-5:] for s in self.pathways.keys()} def _walkCollect(koEntry): num = koEntry.title[:5] if koEntry.title else None if num in path_ids: return [koEntry] + reduce( lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []) else: c = reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []) return c + (c and [koEntry] or []) allClasses = reduce(lambda li1, li2: li1 + li2, [_walkCollect(c) for c in self.koOrthology], []) def _walkCreate(koEntry, lvItem): item = QTreeWidgetItem(lvItem) id = "path:" + org_code + koEntry.title[:5] if koEntry.title[:5] in path_ids: p = kegg_pathways.get_entry(id) if p is None: # In case the genesets still have obsolete entries name = koEntry.title else: name = p.name genes, p_value, ref = self.pathways[id] item.setText(0, name) item.setText(1, "%.5f" % p_value) item.setText( 2, "%i of %i" % (len(genes), len(self.input_genes))) item.setText(3, "%i of %i" % (ref, len(self.ref_genes))) item.pathway_id = id if p is not None else None else: if id in allPathways: text = kegg_pathways.get_entry(id).name else: text = koEntry.title item.setText(0, text) if id in allPathways: item.pathway_id = id elif "path:map" + koEntry.title[:5] in allRefPathways: item.pathway_id = "path:map" + koEntry.title[:5] else: item.pathway_id = None for child in koEntry.entries: if child in allClasses: _walkCreate(child, item) for koEntry in self.koOrthology: if koEntry in allClasses: _walkCreate(koEntry, self.listView) self.listView.update() else: self.listView.setRootIsDecorated(False) pathways = self.pathways.items() pathways = sorted(pathways, key=lambda item: item[1][1]) for id, (genes, p_value, ref) in pathways: item = QTreeWidgetItem(self.listView) item.setText(0, kegg_pathways.get_entry(id).name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.input_genes))) item.setText(3, "%i of %i" % (ref, len(self.ref_genes))) item.pathway_id = id items.append(item) self.bestPValueItem = items and items[0] or None self.listView.expandAll() for i in range(4): self.listView.resizeColumnToContents(i) if self.bestPValueItem: index = self.listView.indexFromItem(self.bestPValueItem) self.listView.selectionModel().select( index, QItemSelectionModel.ClearAndSelect) def UpdatePathwayView(self): items = self.listView.selectedItems() if len(items) > 0: item = items[0] else: item = None self.commit() item = item or self.bestPValueItem if not item or not item.pathway_id: self.pathwayView.SetPathway(None) return def get_kgml_and_image(pathway_id): """Return an initialized KEGGPathway with pre-cached data""" p = kegg.KEGGPathway(pathway_id) p._get_kgml() # makes sure the kgml file is downloaded p._get_image_filename() # makes sure the image is downloaded return (pathway_id, p) self.setEnabled(False) self._pathwayTask = concurrent.Task( function=lambda: get_kgml_and_image(item.pathway_id)) self._pathwayTask.finished.connect(self._onPathwayTaskFinshed) self._executor.submit(self._pathwayTask) def _onPathwayTaskFinshed(self): self.setEnabled(True) pathway_id, self.pathway = self._pathwayTask.result() objects = self.pathways.get(pathway_id, [[]])[0] # [ncbi_gene_id] # map ncbi_gene_id to keg_id for display objects = flatten(relation_map(self.ncbi_gene_map, objects)) self.pathwayView.SetPathway(self.pathway, objects) def UpdatePathwayViewTransform(self): self.pathwayView.updateTransform() def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even allow this to be executed if # data has no genes try: self.__get_input_genes() self.input_genes = set(self.input_genes) except ValueError: self.error(0, "Cannot extract gene names from input.") self.information(1) self.org = kegg.KEGGOrganism(kegg.from_taxid(self.tax_id)) if self.useReference and self.ref_data: self.__get_ref_genes() self.ref_genes = set(self.ref_genes) else: self.ref_genes = self.org.get_ncbi_ids() def run_enrichment(org_code, genes, reference, progress=None): # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. kegg_api = kegg.api.CachedKeggApi() link_map = kegg_api.link(org_code, "pathway") # [(pathway_id, kegg_gene_id)] ncbi_gene_map = kegg_api.conv( org_code, 'ncbi-geneid') # [(ncbi_gene_id, kegg_gene_id)] ncbi_gene_map = [(_1.split(":", 1)[1], _2) for _1, _2 in ncbi_gene_map] link_map = relation_join( link_map, [(_2, _1) for _1, _2 in ncbi_gene_map]) # [(pathway_id, ncbi_gene_id)] kegg_sets = relation_list_to_multimap( link_map) # {pathway_id -> [ncbi_gene_ids]} # map kegg gene ids to ncbi_gene_ids. kegg_sets = geneset.GeneSets(sets=[ geneset.GeneSet(gs_id=ddi, genes=set(genes)) for ddi, genes in kegg_sets.items() ]) pathways = pathway_enrichment(kegg_sets, genes, reference, callback=progress) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress) return pathways, ncbi_gene_map self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float, )) run_func = partial(run_enrichment, self.org.org_code, self.input_genes, self.ref_genes, progress) self._enrichTask = concurrent.Task(function=run_func) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask) def _onEnrichTaskFinished(self): self.setEnabled(True) self.setBlocking(False) try: pathways, ncbi_gene_map = self._enrichTask.result() except Exception: raise self.progressBarFinished() self.pathways = pathways self.ncbi_gene_map = ncbi_gene_map if not self.pathways: self.warning(0, "No enriched pathways found.") else: self.warning(0) self.infoLabel.setText("{} unique gene names on input\n".format( len(set(self.input_genes)))) self.UpdateListView() @Slot(float) def setProgress(self, value): if self.__in_setProgress: return self.__in_setProgress = True self.progressBarSet(value) self.__in_setProgress = False def __get_input_genes(self): """ Extract and return gene names from `data`. """ self.input_genes = [] if self.use_attr_names: for variable in self.data.domain.attributes: self.input_genes.append( str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] if len(self.input_genes) <= 0: raise ValueError("No gene names in data.") def __get_ref_genes(self): """ Extract and return gene names from `data`. """ self.ref_genes = [] if self.ref_use_attr_names: for variable in self.ref_data.domain.attributes: self.ref_genes.append( str( variable.attributes.get(self.ref_gene_id_attribute, '?'))) else: genes, _ = self.ref_data.get_column_view(self.ref_gene_id_column) self.ref_genes = [str(g) for g in genes] def selectAll(self): """ Select all items in the pathway view. """ changed = False scene = self.pathwayView.scene() with disconnected(scene.selectionChanged, self._onSelectionChanged): for item in scene.items(): if item.flags( ) & QGraphicsItem.ItemIsSelectable and not item.isSelected(): item.setSelected(True) changed = True if changed: self._onSelectionChanged() def _onSelectionChanged(self): # Item selection in the pathwayView/scene has changed self.commit() def commit(self): if self.data: selectedItems = self.pathwayView.scene().selectedItems() selectedGenes = reduce( set.union, [item.marked_objects for item in selectedItems], set()) # map kegg_ids back to ncbi_gene_id backmap = [(_2, _1) for _1, _2 in self.ncbi_gene_map] selectedGenes = set(flatten(relation_map(backmap, selectedGenes))) if self.use_attr_names: selected = [ column for column in self.data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in selectedGenes ] data = self.data[:, selected] self.send("Selected Data", data) else: selected_indices = [] other_indices = [] for row_index, row in enumerate(self.data): gene_in_row = str(row[self.gene_id_column]) if gene_in_row in self.input_genes and gene_in_row in selectedGenes: selected_indices.append(row_index) else: other_indices.append(row_index) if selected_indices: selected = self.data[selected_indices] else: selected = None if other_indices: other = self.data[other_indices] else: other = None self.send("Selected Data", selected) self.send("Unselected Data", other) else: self.send("Selected Data", None) self.send("Unselected Data", None) def ClearCache(self): kegg.caching.clear_cache() def onDeleteWidget(self): """ Called before the widget is removed from the canvas. """ super().onDeleteWidget() self.org = None self._executor.shutdown(wait=False) gc.collect() # Force collection (WHY?) def sizeHint(self): return QSize(1024, 720)
class OWGenExpress(widget.OWWidget): name = "GenExpress" description = "Expression data from GenExpress." icon = "../widgets/icons/GenCloud.svg" priority = 36 inputs = [] outputs = [("Data", Orange.data.Table)] username = settings.Setting("anonymous") password = settings.Setting("") log2 = settings.Setting(False) transpose = settings.Setting(False) rtypei = settings.Setting(0) projecti = settings.Setting(0) serveri = settings.Setting(0) exnamei = settings.Setting(6) excludeconstant = settings.Setting(False) joinreplicates = settings.Setting(False) currentSelection = settings.Setting(None) experimentsHeaderState = settings.Setting( {name: False for _, name in HEADER[:ID_INDEX + 1]}) storedSortOrder = settings.Setting([]) storedSelections = settings.Setting([]) def __init__(self, parent=None): super().__init__(parent) self.servers = [ ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'), ('https://cloud.genialis.com/', 'Genialis'), ] self.selectedExperiments = [] self.buffer = dicty.CacheSQLite(bufferfile) self.searchString = "" self.items = [] self.result_types = [] self.controlArea.setMaximumWidth(250) self.controlArea.setMinimumWidth(250) box = gui.widgetBox(self.controlArea, 'Project') self.projectCB = gui.comboBox(box, self, "projecti", items=[], callback=self.ProjectChosen) self.projects = [] b = gui.widgetBox(self.controlArea, "Selection bookmarks") self.selectionSetsWidget = SelectionSetsWidget(self) self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) def store_selections(modified): if not modified: self.storedSelections = self.selectionSetsWidget.selections self.selectionSetsWidget.selectionModified.connect(store_selections) b.layout().addWidget(self.selectionSetsWidget) gui.separator(self.controlArea) b = gui.widgetBox(self.controlArea, "Sort output columns") self.columnsSortingWidget = SortedListWidget(self) self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) box = gui.widgetBox(self.controlArea, 'Experiment name') self.experimentNameCB = gui.comboBox(box, self, "exnamei", items=SORTING_MODEL_LIST) b.layout().addWidget(self.columnsSortingWidget) sorting_model = QStringListModel(SORTING_MODEL_LIST) self.columnsSortingWidget.setModel(sorting_model) self.columnsSortingWidget.sortingOrder = self.storedSortOrder def store_sort_order(): self.storedSortOrder = self.columnsSortingWidget.sortingOrder self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order) gui.separator(self.controlArea) box = gui.widgetBox(self.controlArea, 'Expression Type') self.expressionTypesCB = gui.comboBox(box, self, "rtypei", items=[], callback=self.UpdateResultsList) gui.checkBox(self.controlArea, self, "excludeconstant", "Exclude labels with constant values") gui.checkBox(self.controlArea, self, "joinreplicates", "Average replicates (use median)") gui.checkBox(self.controlArea, self, "log2", "Logarithmic (base 2) transformation") gui.checkBox(self.controlArea, self, "transpose", "Genes as columns") self.commit_button = gui.button(self.controlArea, self, "&Commit", callback=self.Commit) self.commit_button.setDisabled(True) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, 'Server') gui.comboBox(box, self, "serveri", items=[title for url, title in self.servers], callback=self.ServerChosen) gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Clear cache", callback=self.clear_cache) gui.lineEdit(self.mainArea, self, "searchString", "Search", callbackOnType=True, callback=self.SearchUpdate) self.headerLabels = [t[1] for t in HEADER] self.experimentsWidget = QTreeWidget() self.experimentsWidget.setHeaderLabels(self.headerLabels) self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection) self.experimentsWidget.setRootIsDecorated(False) self.experimentsWidget.setSortingEnabled(True) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.experimentsWidget, self.experimentsWidget) self.experimentsWidget.header().installEventFilter(contextEventFilter) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.setAlternatingRowColors(True) self.experimentsWidget.selectionModel().selectionChanged.connect( self.onSelectionChanged) self.selectionSetsWidget.setSelectionModel( self.experimentsWidget.selectionModel()) self.selectionSetsWidget.setSelections(self.storedSelections) self.mainArea.layout().addWidget(self.experimentsWidget) self.restoreHeaderState() self.experimentsWidget.header().geometriesChanged.connect( self.saveHeaderState) self.dbc = None self.AuthSet() QTimer.singleShot(100, self.ConnectAndUpdate) def sizeHint(self): return QSize(800, 600) def AuthSet(self): if len(self.username): self.passf.setDisabled(False) else: self.passf.setDisabled(True) def AuthChanged(self): self.AuthSet() self.ConnectAndUpdate() def ConnectAndUpdate(self): self.Connect() if self.dbc: def get_data_count(project_id): # XXX: is there a better way? # Note: limit 0 would return all objects return self.dbc.gen.api.data.get( case_ids__contains=project_id, type__startswith='data:expression:', limit=1)['meta']['total_count'] self.projects = sorted([ p for p in self.dbc.projects().items() if get_data_count(p[0]) > 0 ], key=lambda x: x[1]) self.UpdateProjects() self.ProjectChosen() self.UpdateExperimentTypes() def Connect(self): self.error(1) self.warning(1) username = '******' password = '******' url = self.servers[self.serveri][0] if self.username: username = self.username password = self.password if username.lower() in ['*****@*****.**', 'anonymous']: username = '******' password = '******' self.dbc = None self.projects = [] self.result_types = [] try: self.dbc = Genesis(address=url, username=username, password=password, cache=self.buffer) except requests.exceptions.ConnectionError: self.dbc = Genesis(address=url, username=username, password=password, connect=False, cache=self.buffer) self.warning(1, "Could not connect to server, working from cache.") except Exception: self.error(1, "Wrong username or password.") self.UpdateProjects() self.UpdateExperimentTypes() # clear lists def Reload(self): self.UpdateExperiments(reload=True) def clear_cache(self): self.buffer.clear() self.Reload() def rtype(self): """Return selected result template type """ if self.result_types: return self.result_types[self.rtypei] else: return None def UpdateExperimentTypes(self): self.expressionTypesCB.clear() items = [self.result_types_labels[desc] for desc in self.result_types] self.expressionTypesCB.addItems(items) #do not update anything if the list is empty if len(self.result_types): self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1)) def UpdateProjects(self): self.projectCB.clear() items = [desc for pid, desc in self.projects] self.projectCB.addItems(items) #do not update anything if the list if empty if len(self.projects) > 0: self.projecti = max(0, min(self.projecti, len(self.projects) - 1)) def UpdateExperiments(self, reload=False): self.experimentsWidget.clear() if not self.dbc or not self.dbc.projectid: # the connection did not succeed return self.items = [] self.progressBarInit() result_types = [] result_types_labels = [] sucind = False # success indicator for database index try: result_types, result_types_labels = self.dbc.result_types( reload=reload) sucind = True except Exception: try: result_types, result_types_labels = self.dbc.result_types() self.warning(0, "Can not access database - using cached data.") sucind = True except Exception: self.error(0, "Can not access database.") if sucind: self.warning(0) self.error(0) self.result_types = result_types self.result_types_labels = result_types_labels self.UpdateExperimentTypes() self.UpdateResultsList(reload=reload) self.progressBarFinished() if self.currentSelection: self.currentSelection.select( self.experimentsWidget.selectionModel()) self.handle_commit_button() def ProjectChosen(self, reload=False): if self.projects: self.dbc.projectid = self.projects[self.projecti][0] else: self.dbc.projectid = None self.UpdateExperiments(reload=reload) def ServerChosen(self): self.ConnectAndUpdate() def UpdateResultsList(self, reload=False): results_list = self.dbc.results_list(self.rtype(), reload=reload) try: results_list = self.dbc.results_list(self.rtype(), reload=reload) except Exception: try: results_list = self.dbc.results_list(self.rtype()) except Exception: self.error(0, "Can not access database.") self.results_list = results_list #softly change the view so that the selection stays the same items_shown = {} for i, item in enumerate(self.items): c = str(item.text(ID_INDEX)) items_shown[c] = i items_to_show = set(id_ for id_ in self.results_list) add_items = set(items_to_show) - set(items_shown) delete_items = set(items_shown) - set(items_to_show) i = 0 while i < self.experimentsWidget.topLevelItemCount(): it = self.experimentsWidget.topLevelItem(i) if str(it.text(ID_INDEX)) in delete_items: self.experimentsWidget.takeTopLevelItem(i) else: i += 1 delete_ind = set([items_shown[i] for i in delete_items]) self.items = [ it for i, it in enumerate(self.items) if i not in delete_ind ] for r_annot in add_items: d = defaultdict(lambda: "?", self.results_list[r_annot]) row_items = [""] + [ to_text(d.get(key, "?")) for key, _ in HEADER[1:] ] row_items[ID_INDEX] = r_annot ci = MyTreeWidgetItem(self.experimentsWidget, row_items) self.items.append(ci) for i in range(len(self.headerLabels)): self.experimentsWidget.resizeColumnToContents(i) self.wantbufver = lambda x: self.results_list[x]["date_modified"] self.UpdateCached() def UpdateCached(self): if self.wantbufver and self.dbc: for item in self.items: id = str(item.text(ID_INDEX)) version = self.dbc._in_buffer(id + "|||" + self.rtype()) value = " " if version == self.wantbufver(id) else "" item.setData(0, Qt.DisplayRole, value) def SearchUpdate(self, string=""): for item in self.items: item.setHidden(not all(s in item for s in self.searchString.split())) def Commit(self): pb = gui.ProgressBar(self, iterations=100) table = None ids = [] for item in self.experimentsWidget.selectedItems(): unique_id = str(item.text(ID_INDEX)) ids.append(unique_id) transfn = None if self.log2: transfn = lambda x: math.log(x + 1.0, 2) reverse_header_dict = {name: name for key, name in HEADER} reverse_header_dict["ID"] = "id" allowed_labels = None def namefn(a): name = SORTING_MODEL_LIST[self.exnamei] name = reverse_header_dict.get(name, "id") return dict(a)[name] if len(ids): table = self.dbc.get_data( ids=ids, result_type=self.rtype(), callback=pb.advance, exclude_constant_labels=self.excludeconstant, bufver=self.wantbufver, transform=transfn, allowed_labels=allowed_labels, namefn=namefn) if self.joinreplicates: table = dicty.join_replicates( table, ignorenames=self.dbc.IGNORE_REPLICATE, namefn="name", avg=dicty.median, fnshow=lambda x: " | ".join(map(str, x))) # Sort attributes sortOrder = self.columnsSortingWidget.sortingOrder all_values = defaultdict(set) for at in table.domain.attributes: atts = at.attributes for name in sortOrder: all_values[name].add( atts.get(reverse_header_dict[name], "")) isnum = {} for at, vals in all_values.items(): vals = filter(None, vals) try: for a in vals: float(a) isnum[at] = True except ValueError: isnum[at] = False def optfloat(x, at): if x == "": return "" else: return float(x) if isnum[at] else x def sorting_key(attr): atts = attr.attributes return tuple([ optfloat(atts.get(reverse_header_dict[name], ""), name) for name in sortOrder ]) attributes = sorted(table.domain.attributes, key=sorting_key) domain = Orange.data.Domain(attributes, table.domain.class_vars, table.domain.metas) table = Orange.data.Table.from_table(domain, table) table = Orange.data.Table(domain, table) if self.transpose: experiments = [at for at in table.domain.variables] attr = [ compat.ContinuousVariable.make(ex['DDB'].value) for ex in table ] metavars = sorted(table.domain.variables[0].attributes.keys()) metavars = [ compat.StringVariable.make(name) for name in metavars ] domain = compat.create_domain(attr, None, metavars) metavars = compat.get_metas(domain) metas = [[exp.attributes[var.name] for var in metavars] for exp in experiments] table = compat.create_table(domain, table.X.transpose(), None, metas) data_hints.set_hint(table, "taxid", "352472") data_hints.set_hint(table, "genesinrows", False) self.send("Data", table) self.UpdateCached() pb.finish() def onSelectionChanged(self, selected, deselected): self.handle_commit_button() def handle_commit_button(self): self.currentSelection = \ SelectionByKey(self.experimentsWidget.selectionModel().selection(), key=(ID_INDEX,)) self.commit_button.setDisabled(not len(self.currentSelection)) def saveHeaderState(self): hview = self.experimentsWidget.header() for i, label in enumerate(self.headerLabels): self.experimentsHeaderState[label] = hview.isSectionHidden(i) def restoreHeaderState(self): hview = self.experimentsWidget.header() state = self.experimentsHeaderState for i, label in enumerate(self.headerLabels): hview.setSectionHidden(i, state.get(label, True)) self.experimentsWidget.resizeColumnToContents(i)
def __init__(self, parent=None): super().__init__(parent) self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox( self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView, ) gui.checkBox( self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform, ) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button( box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False, ) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget(allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.input_genes = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.gene_id_column = None self.ref_data = None self.ref_genes = [] self.ref_tax_id = None self.ref_use_attr_names = None self.ref_gene_id_attribute = None self.ref_gene_id_column = None self.pathways = {} self.ncbi_gene_map = [] self.org = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float, )) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task)
class OWGOBrowser(widget.OWWidget): name = "GO Browser" description = "Enrichment analysis for Gene Ontology terms." icon = "../widgets/icons/OWGOBrowser.svg" priority = 7 inputs = [("Cluster Data", Orange.data.Table, "setDataset", widget.Single + widget.Default), ("Reference Data", Orange.data.Table, "setReferenceDataset")] outputs = [("Data on Selected Genes", Orange.data.Table), ("Enrichment Report", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() geneAttrIndex = settings.ContextSetting(0) useAttrNames = settings.ContextSetting(False) useReferenceDataset = settings.Setting(False) aspectIndex = settings.Setting(0) useEvidenceType = settings.Setting( {et: True for et in go.evidenceTypesOrdered}) filterByNumOfInstances = settings.Setting(False) minNumOfInstances = settings.Setting(1) filterByPValue = settings.Setting(True) maxPValue = settings.Setting(0.2) filterByPValue_nofdr = settings.Setting(False) maxPValue_nofdr = settings.Setting(0.01) probFunc = settings.Setting(0) selectionDirectAnnotation = settings.Setting(0) selectionDisjoint = settings.Setting(0) class Error(widget.OWWidget.Error): serverfiles_unavailable = widget.Msg('Can not locate annotation files, ' 'please check your connection and try again.') missing_annotation = widget.Msg(ERROR_ON_MISSING_ANNOTATION) missing_gene_id = widget.Msg(ERROR_ON_MISSING_GENE_ID) missing_tax_id = widget.Msg(ERROR_ON_MISSING_TAX_ID) def __init__(self, parent=None): super().__init__(self, parent) self.input_data = None self.ref_data = None self.ontology = None self.annotations = None self.loaded_annotation_code = None self.treeStructRootKey = None self.probFunctions = [statistics.Binomial(), statistics.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = State.Ready self.__scheduletimer = QTimer(self, singleShot=True) self.__scheduletimer.timeout.connect(self.__update) ############# # GUI ############# self.tabs = gui.tabWidget(self.controlArea) # Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button(box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=["Use entire genome for reference", "Use genes from Referece Examples input signal as reference"], box="Reference", callback=self.__invalidate) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self.__invalidate) # Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox(box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") # use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=["Use binomial distribution test", "Use hypergeometric distribution test"], callback=self.__invalidate) # TODO: only update the p values box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox( etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb # Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox( self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=["All selected genes", "Term-specific genes", "Common term genes"], tooltips=["Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms"], callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment'] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.originalGraph = None self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) class AnnotationSlot(SimpleNamespace): taxid = ... # type: str name = ... # type: str filename = ... # type:str @staticmethod def parse_tax_id(f_name): return f_name.split('.')[1] try: remote_files = serverfiles.ServerFiles().listfiles(DOMAIN) except (ConnectTimeout, RequestException, ConnectionError): # TODO: Warn user about failed connection to the remote server remote_files = [] self.available_annotations = [ AnnotationSlot( taxid=AnnotationSlot.parse_tax_id(annotation_file), name=taxonomy.common_taxid_to_name(AnnotationSlot.parse_tax_id(annotation_file)), filename=FILENAME_ANNOTATION.format(AnnotationSlot.parse_tax_id(annotation_file)) ) for _, annotation_file in set(remote_files + serverfiles.listfiles(DOMAIN)) if annotation_file != FILENAME_ONTOLOGY ] self._executor = ThreadExecutor() def sizeHint(self): return QSize(1000, 700) def __on_evidenceChanged(self): for etype, cb in self.evidenceCheckBoxDict.items(): self.useEvidenceType[etype] = cb.isChecked() self.__invalidate() def clear(self): self.infoLabel.setText("No data on input\n") self.warning(0) self.warning(1) self.ClearGraph() self.send("Data on Selected Genes", None) self.send("Enrichment Report", None) def setDataset(self, data=None): self.closeContext() self.clear() self.Error.clear() if data: self.input_data = data self.tax_id = str(self.input_data.attributes.get(TAX_ID, None)) self.use_attr_names = self.input_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get(GENE_ID_ATTRIBUTE, None) self.gene_id_column = self.input_data.attributes.get(GENE_ID_COLUMN, None) self.annotation_index = None if not(self.use_attr_names is not None and ((self.gene_id_attribute is None) ^ (self.gene_id_column is None))): if self.tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.tax_id is None: self.Error.missing_tax_id() return _c2i = {a.taxid: i for i, a in enumerate(self.available_annotations)} try: self.annotation_index = _c2i[self.tax_id] except KeyError: self.Error.serverfiles_unavailable() # raise ValueError('Taxonomy {} not supported.'.format(self.tax_id)) return self.__invalidate() def setReferenceDataset(self, data=None): self.Error.clear() if data: self.ref_data = data self.ref_tax_id = str(self.ref_data.attributes.get(TAX_ID, None)) self.ref_use_attr_names = self.ref_data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None) self.ref_gene_id_attribute = self.ref_data.attributes.get(GENE_ID_ATTRIBUTE, None) self.ref_gene_id_column = self.ref_data.attributes.get(GENE_ID_COLUMN, None) if not (self.ref_use_attr_names is not None and ((self.ref_gene_id_attribute is None) ^ (self.ref_gene_id_column is None))): if self.ref_tax_id is None: self.Error.missing_annotation() return self.Error.missing_gene_id() return elif self.ref_tax_id is None: self.Error.missing_tax_id() return self.referenceRadioBox.buttons[1].setDisabled(not bool(data)) self.referenceRadioBox.buttons[1].setText("Reference set") if self.input_data is not None and self.useReferenceDataset: self.useReferenceDataset = 0 if not data else 1 self.__invalidate() @Slot() def __invalidate(self): # Invalidate the current results or pending task and schedule an # update. self.__scheduletimer.start() if self.__state != State.Ready: self.__state |= State.Stale self.SetGraph({}) self.ref_genes = None self.input_genes = None def __invalidateAnnotations(self): self.annotations = None self.loaded_annotation_code = None if self.input_data: self.infoLabel.setText("...\n") self.__invalidate() @Slot() def __update(self): self.__scheduletimer.stop() if self.input_data is None: return if self.__state & State.Running: self.__state |= State.Stale elif self.__state & State.Downloading: self.__state |= State.Stale elif self.__state & State.Ready: if self.__ensure_data(): self.Load() self.Enrichment() else: assert self.__state & State.Downloading assert self.isBlocking() def __get_ref_genes(self): self.ref_genes = [] if self.ref_use_attr_names: for variable in self.input_data.domain.attributes: self.ref_genes.append(str(variable.attributes.get(self.ref_gene_id_attribute, '?'))) else: genes, _ = self.ref_data.get_column_view(self.ref_gene_id_column) self.ref_genes = [str(g) for g in genes] def __get_input_genes(self): self.input_genes = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes .append(str(variable.attributes.get(self.gene_id_attribute, '?'))) else: genes, _ = self.input_data.get_column_view(self.gene_id_column) self.input_genes = [str(g) for g in genes] def FilterAnnotatedGenes(self, genes): matchedgenes = self.annotations.get_gene_names_translator(genes).values() return matchedgenes, [gene for gene in genes if gene not in matchedgenes] def __start_download(self, files_list): # type: (List[Tuple[str, str]]) -> None task = EnsureDownloaded(files_list) task.progress.connect(self._progressBarSet) f = self._executor.submit(task) fw = FutureWatcher(f, self) fw.finished.connect(self.__download_finish) fw.finished.connect(fw.deleteLater) fw.resultReady.connect(self.__invalidate) self.progressBarInit(processEvents=None) self.setBlocking(True) self.setStatusMessage("Downloading") self.__state = State.Downloading @Slot(Future) def __download_finish(self, result): # type: (Future[None]) -> None assert QThread.currentThread() is self.thread() assert result.done() self.setBlocking(False) self.setStatusMessage("") self.progressBarFinished(processEvents=False) try: result.result() except ConnectTimeout: logging.getLogger(__name__).error("Error:") self.error(2, "Internet connection error, unable to load data. " + "Check connection and create a new GO Browser widget.") except RequestException as err: logging.getLogger(__name__).error("Error:") self.error(2, "Internet error:\n" + str(err)) except BaseException as err: logging.getLogger(__name__).error("Error:") self.error(2, "Error:\n" + str(err)) raise else: self.error(2) finally: self.__state = State.Ready def __ensure_data(self): # Ensure that all required database (ontology and annotations for # the current selected organism are present. If not start a download in # the background. Return True if all dbs are present and false # otherwise assert self.__state == State.Ready annotation = self.available_annotations[self.annotation_index] go_files = [fname for domain, fname in serverfiles.listfiles(DOMAIN)] files = [] if annotation.filename not in go_files: files.append(("GO", annotation.filename)) if FILENAME_ONTOLOGY not in go_files: files.append((DOMAIN, FILENAME_ONTOLOGY)) if files: self.__start_download(files) assert self.__state == State.Downloading return False else: return True def Load(self): a = self.available_annotations[self.annotation_index] if self.ontology is None: self.ontology = go.Ontology() if a.taxid != self.loaded_annotation_code: self.annotations = None gc.collect() # Force run garbage collection self.annotations = go.Annotations(a.taxid) self.loaded_annotation_code = a.taxid count = defaultdict(int) geneSets = defaultdict(set) for anno in self.annotations.annotations: count[anno.evidence] += 1 geneSets[anno.evidence].add(anno.gene_id) for etype in go.evidenceTypesOrdered: ecb = self.evidenceCheckBoxDict[etype] ecb.setEnabled(bool(count[etype])) ecb.setText(etype + ": %i annots(%i genes)" % (count[etype], len(geneSets[etype]))) def Enrichment(self): assert self.input_data is not None assert self.__state == State.Ready if not self.annotations.ontology: self.annotations.ontology = self.ontology self.error(1) self.warning([0, 1]) self.__get_input_genes() self.input_genes = set(self.input_genes) self.known_input_genes = self.annotations.get_genes_with_known_annotation(self.input_genes) # self.clusterGenes = clusterGenes = self.annotations.map_to_ncbi_id(self.input_genes).values() self.infoLabel.setText("%i unique genes on input\n%i (%.1f%%) genes with known annotations" % (len(self.input_genes), len(self.known_input_genes), 100.0*len(self.known_input_genes)/len(self.input_genes) if len(self.input_genes) else 0.0)) if not self.useReferenceDataset or self.ref_data is None: self.information(2) self.information(1) self.ref_genes = self.annotations.genes() self.ref_genes = set(self.ref_genes) elif self.ref_data is not None: self.__get_ref_genes() self.ref_genes = set(self.ref_genes) ref_count = len(self.ref_genes) if ref_count == 0: self.ref_genes = self.annotations.genes() self.referenceRadioBox.buttons[1].setText("Reference set") self.referenceRadioBox.buttons[1].setDisabled(True) self.information(2, "Unable to extract gene names from reference dataset. " "Using entire genome for reference") self.useReferenceDataset = 0 else: self.referenceRadioBox.buttons[1].setText("Reference set ({} genes)".format(ref_count)) self.referenceRadioBox.buttons[1].setDisabled(False) self.information(2) else: self.useReferenceDataset = 0 self.ref_genes = [] if not self.ref_genes: self.error(1, "No valid reference set") return {} evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) aspect = ['Process', 'Component', 'Function'][self.aspectIndex] self.progressBarInit(processEvents=False) self.setBlocking(True) self.__state = State.Running if self.input_genes: f = self._executor.submit( self.annotations.get_enriched_terms, self.input_genes, self.ref_genes, evidences, aspect=aspect, prob=self.probFunctions[self.probFunc], use_fdr=False, progress_callback=methodinvoke( self, "_progressBarSet", (float,)) ) fw = FutureWatcher(f, parent=self) fw.done.connect(self.__on_enrichment_done) fw.done.connect(fw.deleteLater) return else: f = Future() f.set_result({}) self.__on_enrichment_done(f) def __on_enrichment_done(self, results): # type: (Future[Dict[str, tuple]]) -> None self.progressBarFinished(processEvents=False) self.setBlocking(False) self.setStatusMessage("") if self.__state & State.Stale: self.__state = State.Ready self.__invalidate() return self.__state = State.Ready try: results = results.result() # type: Dict[str, tuple] except Exception as ex: results = {} error = str(ex) self.error(1, error) if results: terms = list(results.items()) fdr_vals = statistics.FDR([d[1] for _, d in terms]) terms = [(key, d + (fdr,)) for (key, d), fdr in zip(terms, fdr_vals)] terms = dict(terms) else: terms = {} self.terms = terms if not self.terms: self.warning(0, "No enriched terms found.") else: self.warning(0) self.treeStructDict = {} ids = self.terms.keys() self.treeStructRootKey = None parents = {} for id in ids: parents[id] = set([term for _, term in self.ontology[id].related]) children = {} for term in self.terms: children[term] = set([id for id in ids if term in parents[id]]) for term in self.terms: self.treeStructDict[term] = TreeNode(self.terms[term], children[term]) if not self.ontology[term].related and not getattr(self.ontology[term], "is_obsolete", False): self.treeStructRootKey = term self.SetGraph(terms) self._updateEnrichmentReportOutput() self.commit() def _updateEnrichmentReportOutput(self): terms = sorted(self.terms.items(), key=lambda item: item[1][1]) # Create and send the enrichemnt report table. termsDomain = Orange.data.Domain( [], [], # All is meta! [Orange.data.StringVariable("GO Term Id"), Orange.data.StringVariable("GO Term Name"), Orange.data.ContinuousVariable("Cluster Frequency"), Orange.data.ContinuousVariable("Genes in Cluster", number_of_decimals=0), Orange.data.ContinuousVariable("Reference Frequency"), Orange.data.ContinuousVariable("Genes in Reference", number_of_decimals=0), Orange.data.ContinuousVariable("p-value"), Orange.data.ContinuousVariable("FDR"), Orange.data.ContinuousVariable("Enrichment"), Orange.data.StringVariable("Genes")]) terms = [[t_id, self.ontology[t_id].name, len(genes) / len(self.input_genes), len(genes), r_count / len(self.ref_genes), r_count, p_value, fdr, len(genes) / len(self.input_genes) * \ len(self.ref_genes) / r_count, ",".join(genes) ] for t_id, (genes, p_value, r_count, fdr) in terms if genes and r_count] if terms: X = numpy.empty((len(terms), 0)) M = numpy.array(terms, dtype=object) termsTable = Orange.data.Table.from_numpy(termsDomain, X, metas=M) else: termsTable = None self.send("Enrichment Report", termsTable) @Slot(float) def _progressBarSet(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot() def _progressBarFinish(self): assert QThread.currentThread() is self.thread() self.progressBarFinished(processEvents=None) def FilterGraph(self, graph): if self.filterByPValue_nofdr: graph = go.filterByPValue(graph, self.maxPValue_nofdr) if self.filterByPValue: # FDR graph = dict(filter(lambda item: item[1][3] <= self.maxPValue, graph.items())) if self.filterByNumOfInstances: graph = dict(filter(lambda item: len(item[1][0]) >= self.minNumOfInstances, graph.items())) return graph def FilterAndDisplayGraph(self): if self.input_data and self.originalGraph is not None: self.graph = self.FilterGraph(self.originalGraph) if self.originalGraph and not self.graph: self.warning(1, "All found terms were filtered out.") else: self.warning(1) self.ClearGraph() self.DisplayGraph() def SetGraph(self, graph=None): self.originalGraph = graph if graph: self.FilterAndDisplayGraph() else: self.graph = {} self.ClearGraph() def ClearGraph(self): self.listView.clear() self.listViewItems=[] self.sigTerms.clear() def DisplayGraph(self): fromParentDict = {} self.termListViewItemDict = {} self.listViewItems = [] def enrichment(t): try: return len(t[0]) / t[2] * (len(self.ref_genes) / len(self.input_genes)) except ZeroDivisionError: # TODO: find out why this happens return 0 maxFoldEnrichment = max([enrichment(term) for term in self.graph.values()] or [1]) def addNode(term, parent, parentDisplayNode): if (parent, term) in fromParentDict: return if term in self.graph: displayNode = GOTreeWidgetItem(self.ontology[term], self.graph[term], len(self.input_genes), len(self.ref_genes), maxFoldEnrichment, parentDisplayNode) displayNode.goId = term self.listViewItems.append(displayNode) if term in self.termListViewItemDict: self.termListViewItemDict[term].append(displayNode) else: self.termListViewItemDict[term] = [displayNode] fromParentDict[(parent, term)] = True parent = term else: displayNode = parentDisplayNode for c in self.treeStructDict[term].children: addNode(c, parent, displayNode) if self.treeStructDict: addNode(self.treeStructRootKey, None, self.listView) terms = self.graph.items() terms = sorted(terms, key=lambda item: item[1][1]) self.sigTableTermsSorted = [t[0] for t in terms] self.sigTerms.clear() for i, (t_id, (genes, p_value, refCount, fdr)) in enumerate(terms): item = GOTreeWidgetItem(self.ontology[t_id], (genes, p_value, refCount, fdr), len(self.input_genes), len(self.ref_genes), maxFoldEnrichment, self.sigTerms) item.goId = t_id self.listView.expandAll() for i in range(5): self.listView.resizeColumnToContents(i) self.sigTerms.resizeColumnToContents(i) self.sigTerms.resizeColumnToContents(6) width = min(self.listView.columnWidth(0), 350) self.listView.setColumnWidth(0, width) self.sigTerms.setColumnWidth(0, width) def ViewSelectionChanged(self): if self.selectionChanging: return self.selectionChanging = 1 self.selectedTerms = [] selected = self.listView.selectedItems() self.selectedTerms = list(set([lvi.term.id for lvi in selected])) self.ExampleSelection() self.selectionChanging = 0 def TableSelectionChanged(self): if self.selectionChanging: return self.selectionChanging = 1 self.selectedTerms = [] selectedIds = set([self.sigTerms.itemFromIndex(index).goId for index in self.sigTerms.selectedIndexes()]) for i in range(self.sigTerms.topLevelItemCount()): item = self.sigTerms.topLevelItem(i) selected = item.goId in selectedIds term = item.goId if selected: self.selectedTerms.append(term) for lvi in self.termListViewItemDict[term]: try: lvi.setSelected(selected) if selected: lvi.setExpanded(True) except RuntimeError: # Underlying C/C++ object deleted pass self.selectionChanging = 0 self.ExampleSelection() def ExampleSelection(self): self.commit() def commit(self): if self.input_data is None or self.originalGraph is None or \ self.annotations is None: return if self.__state & State.Stale: return terms = set(self.selectedTerms) genes = reduce(operator.ior, (set(self.graph[term][0]) for term in terms), set()) evidences = [] for etype in go.evidenceTypesOrdered: if self.useEvidenceType[etype]: evidences.append(etype) allTerms = self.annotations.get_annotated_terms( genes, direct_annotation_only=self.selectionDirectAnnotation, evidence_codes=evidences) if self.selectionDisjoint > 0: count = defaultdict(int) for term in self.selectedTerms: for g in allTerms.get(term, []): count[g] += 1 ccount = 1 if self.selectionDisjoint == 1 else len(self.selectedTerms) selected_genes = [gene for gene, c in count.items() if c == ccount and gene in genes] else: selected_genes = reduce( operator.ior, (set(allTerms.get(term, [])) for term in self.selectedTerms), set()) if self.use_attr_names: selected = [column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[self.gene_id_attribute]) in set(selected_genes)] domain = Orange.data.Domain(selected, self.input_data.domain.class_vars, self.input_data.domain.metas) new_data = self.input_data.from_table(domain, self.input_data) self.send("Data on Selected Genes", new_data) else: selected_rows = [] for row_index, row in enumerate(self.input_data): gene_in_row = str(row[self.gene_id_column]) if gene_in_row in self.input_genes and gene_in_row in selected_genes: selected_rows.append(row_index) if selected_rows: selected = self.input_data[selected_rows] else: selected = None self.send("Data on Selected Genes", selected) def ShowInfo(self): dialog = QDialog(self) dialog.setModal(False) dialog.setLayout(QVBoxLayout()) label = QLabel(dialog) label.setText("Ontology:\n" + self.ontology.header if self.ontology else "Ontology not loaded!") dialog.layout().addWidget(label) label = QLabel(dialog) label.setText("Annotations:\n" + self.annotations.header.replace("!", "") if self.annotations else "Annotations not loaded!") dialog.layout().addWidget(label) dialog.show() def onDeleteWidget(self): """Called before the widget is removed from the canvas. """ self.annotations = None self.ontology = None gc.collect() # Force collection
class OWItemsets(widget.OWWidget): name = 'Frequent Itemsets' description = 'Explore sets of items that frequently appear together.' icon = 'icons/FrequentItemsets.svg' priority = 10 class Inputs: data = Input("Data", Table) class Outputs: matching_data = Output("Matching Data", Table) class Error(widget.OWWidget.Error): need_discrete_data = widget.Msg( "Need some discrete data to work with.") no_disc_features = widget.Msg( "Discrete features required but data has none.") class Warning(widget.OWWidget.Warning): cont_attrs = widget.Msg( "Data has continuous attributes which will be skipped.") err_reg_expression = widget.Msg("Error in regular expression: {}") minSupport = settings.Setting(30) maxItemsets = settings.Setting(10000) filterSearch = settings.Setting(True) autoFind = settings.Setting(False) autoSend = settings.Setting(True) filterKeywords = settings.Setting('') filterMinItems = settings.Setting(1) filterMaxItems = settings.Setting(10000) UserAdviceMessages = [ widget.Message( 'Itemset are listed in item-sorted order, i.e. ' 'an itemset containing A and B is only listed once, as ' 'A > B (and not also B > A).', 'itemsets-order', widget.Message.Warning), widget.Message( 'To select all the itemsets that are descendants of ' '(include) some item X (i.e. the whole subtree), you ' 'can fold the subtree at that item and then select it.', 'itemsets-order', widget.Message.Information) ] def __init__(self): self.data = None self.output = None self._is_running = False self.isRegexMatch = lambda x: True self.tree = QTreeWidget(self.mainArea, columnCount=2, allColumnsShowFocus=True, alternatingRowColors=True, selectionMode=QTreeWidget.ExtendedSelection, uniformRowHeights=True) self.tree.setHeaderLabels(["Itemsets", "Support", "%"]) self.tree.header().setStretchLastSection(True) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.mainArea.layout().addWidget(self.tree) box = gui.widgetBox(self.controlArea, "Info") self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = '' gui.label(box, self, "Number of itemsets: %(nItemsets)s") gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s") gui.label(box, self, "Selected examples: %(nSelectedExamples)s") hbox = gui.widgetBox(box, orientation='horizontal') gui.button(hbox, self, "Expand all", callback=self.tree.expandAll) gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll) box = gui.widgetBox(self.controlArea, 'Find itemsets') gui.valueSlider(box, self, 'minSupport', values=[.0001, .0005, .001, .005, .01, .05, .1, .5] + list(range(1, 101)), label='Minimal support:', labelFormat="%g%%", callback=lambda: self.find_itemsets()) gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000, label='Max. number of itemsets:', labelFormat="%d", callback=lambda: self.find_itemsets()) self.button = gui.auto_commit( box, self, 'autoFind', 'Find Itemsets', commit=self.find_itemsets, callback=lambda: self.autoFind and self.find_itemsets()) box = gui.widgetBox(self.controlArea, 'Filter itemsets') gui.lineEdit(box, self, 'filterKeywords', 'Contains:', callback=self.filter_change, orientation='horizontal', tooltip='A comma or space-separated list of regular ' 'expressions.') hbox = gui.widgetBox(box, orientation='horizontal') gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:', callback=self.filter_change) gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:', callback=self.filter_change) gui.checkBox(box, self, 'filterSearch', label='Apply these filters in search', tooltip='If checked, the itemsets are filtered according ' 'to these filter conditions already in the search ' 'phase. \nIf unchecked, the only filters applied ' 'during search are the ones above, ' 'and the itemsets are \nfiltered afterwards only for ' 'display, i.e. only the matching itemsets are shown.') gui.rubber(hbox) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection') self.filter_change() ITEM_DATA_ROLE = Qt.UserRole + 1 def selectionChanged(self): X = self.X mapping = self.onehot_mapping instances = set() where = np.where def whole_subtree(node): yield node for i in range(node.childCount()): yield from whole_subtree(node.child(i)) def itemset(node): while node: yield node.data(0, self.ITEM_DATA_ROLE) node = node.parent() def selection_ranges(node): n_children = node.childCount() if n_children: yield (self.tree.indexFromItem(node.child(0)), self.tree.indexFromItem(node.child(n_children - 1))) for i in range(n_children): yield from selection_ranges(node.child(i)) nSelectedItemsets = 0 item_selection = QItemSelection() for node in self.tree.selectedItems(): nodes = (node, ) if node.isExpanded() else whole_subtree(node) if not node.isExpanded(): for srange in selection_ranges(node): item_selection.select(*srange) for node in nodes: nSelectedItemsets += 1 cols, vals = zip(*(mapping[i] for i in itemset(node))) if issparse(X): rows = (len(cols) == np.bincount( (X[:, cols] != 0).indices, minlength=X.shape[0])).nonzero()[0] else: rows = where((X[:, cols] == vals).all(axis=1))[0] instances.update(rows) self.tree.itemSelectionChanged.disconnect(self.selectionChanged) self.tree.selectionModel().select( item_selection, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.nSelectedExamples = len(instances) self.nSelectedItemsets = nSelectedItemsets self.output = self.data[sorted(instances)] or None self.commit() def commit(self): self.Outputs.matching_data.send(self.output) def filter_change(self): self.Warning.err_reg_expression.clear() try: isRegexMatch = self.isRegexMatch = re.compile( '|'.join( i.strip() for i in re.split('(,|\s)+', self.filterKeywords.strip()) if i.strip()), re.IGNORECASE).search except Exception as e: self.Warning.err_reg_expression(e.args[0]) isRegexMatch = self.isRegexMatch = lambda x: True def hide(node, depth, has_kw): if not has_kw: has_kw = isRegexMatch(node.text(0)) hidden = ( sum( hide(node.child(i), depth + 1, has_kw) for i in range(node.childCount())) == node.childCount() if node.childCount() else (not has_kw or not self.filterMinItems <= depth <= self.filterMaxItems)) node.setHidden(hidden) return hidden hide(self.tree.invisibleRootItem(), 0, False) class TreeWidgetItem(QTreeWidgetItem): def data(self, column, role): """Construct lazy tooltips""" if role != Qt.ToolTipRole: return super().data(column, role) tooltip = [] while self: tooltip.append(self.text(0)) self = self.parent() return '\n'.join(reversed(tooltip)) def find_itemsets(self): if self.data is None or not len(self.data): return if self._is_running: self._is_running = False return self._is_running = True self.button.button.setText('Cancel') data = self.data self.tree.clear() self.tree.setUpdatesEnabled(False) self.tree.blockSignals(True) class ItemDict(dict): def __init__(self, item): self.item = item top = ItemDict(self.tree.invisibleRootItem()) X, mapping = OneHot.encode(data) self.Error.need_discrete_data.clear() if X is None: self.Error.need_discrete_data() self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping) } nItemsets = 0 filterSearch = self.filterSearch filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems isRegexMatch = self.isRegexMatch # Find itemsets and populate the TreeView with self.progressBar(self.maxItemsets + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): if filterSearch and not filterMinItems <= len( itemset) <= filterMaxItems: continue parent = top first_new_item = None itemset_matches_filter = False for item in sorted(itemset): name = names[item] if filterSearch and not itemset_matches_filter: itemset_matches_filter = isRegexMatch(name) child = parent.get(name) if child is None: try: wi = self.TreeWidgetItem(parent.item, [ name, str(support), '{:.4g}'.format( 100 * support / len(data)) ]) except RuntimeError: # FIXME: When autoFind was in effect and the support # slider was moved, this line excepted with: # RuntimeError: wrapped C/C++ object of type # TreeWidgetItem has been deleted return wi.setData(0, self.ITEM_DATA_ROLE, item) child = parent[name] = ItemDict(wi) if first_new_item is None: first_new_item = (parent, name) parent = child if filterSearch and not itemset_matches_filter: parent, name = first_new_item parent.item.removeChild(parent[name].item) del parent[name].item del parent[name] else: nItemsets += 1 progress.advance() if not self._is_running or nItemsets >= self.maxItemsets: break qApp.processEvents() if not filterSearch: self.filter_change() self.nItemsets = nItemsets self.nSelectedItemsets = 0 self.nSelectedExamples = 0 self.tree.expandAll() for i in range(self.tree.columnCount()): self.tree.resizeColumnToContents(i) self.tree.setUpdatesEnabled(True) self.tree.blockSignals(False) self._is_running = False self.button.button.setText('Find Itemsets') @Inputs.data def set_data(self, data): self.data = data is_error = False if data is not None: self.Warning.cont_attrs.clear() self.Error.no_disc_features.clear() self.button.setDisabled(False) self.X = data.X if issparse(data.X): self.X = data.X.tocsc() else: if not data.domain.has_discrete_attributes(): self.Error.no_disc_features() is_error = True self.button.setDisabled(True) elif data.domain.has_continuous_attributes(): self.Warning.cont_attrs() else: self.output = None self.commit() if self.autoFind and not is_error: self.find_itemsets()
class OWWordEnrichment(OWWidget): # Basic widget info name = "Word Enrichment" description = "Word enrichment analysis for selected documents." icon = "icons/SetEnrichment.svg" priority = 600 # Input/output class Inputs: selected_data = Input("Selected Data", Table) data = Input("Data", Table) want_main_area = True class Error(OWWidget.Error): no_bow_features = Msg('No bag-of-words features!') no_words_overlap = Msg('No words overlap!') empty_selection = Msg('Selected data is empty!') all_selected = Msg('All examples can not be selected!') # Settings filter_by_p = Setting(False) filter_p_value = Setting(0.01) filter_by_fdr = Setting(True) filter_fdr_value = Setting(0.2) def __init__(self): super().__init__() # Init data self.data = None self.selected_data = None self.selected_data_transformed = None # used for transforming the 'selected data' into the 'data' domain self.words = [] self.p_values = [] self.fdr_values = [] # Info section fbox = gui.widgetBox(self.controlArea, "Info") self.info_all = gui.label(fbox, self, 'Cluster words:') self.info_sel = gui.label(fbox, self, 'Selected words:') self.info_fil = gui.label(fbox, self, 'After filtering:') # Filtering settings fbox = gui.widgetBox(self.controlArea, "Filter") hbox = gui.widgetBox(fbox, orientation=0) self.chb_p = gui.checkBox(hbox, self, "filter_by_p", "p-value", callback=self.filter_and_display, tooltip="Filter by word p-value") self.spin_p = gui.doubleSpin(hbox, self, 'filter_p_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, callbackOnReturn=True, tooltip="Max p-value for word") self.spin_p.setEnabled(self.filter_by_p) hbox = gui.widgetBox(fbox, orientation=0) self.chb_fdr = gui.checkBox(hbox, self, "filter_by_fdr", "FDR", callback=self.filter_and_display, tooltip="Filter by word FDR") self.spin_fdr = gui.doubleSpin(hbox, self, 'filter_fdr_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, callbackOnReturn=True, tooltip="Max p-value for word") self.spin_fdr.setEnabled(self.filter_by_fdr) gui.rubber(self.controlArea) # Word's list view self.cols = ['Word', 'p-value', 'FDR'] self.sig_words = QTreeWidget() self.sig_words.setColumnCount(len(self.cols)) self.sig_words.setHeaderLabels(self.cols) self.sig_words.setSortingEnabled(True) self.sig_words.setSelectionMode(QTreeView.ExtendedSelection) self.sig_words.sortByColumn(2, 0) # 0 is ascending order for i in range(len(self.cols)): self.sig_words.resizeColumnToContents(i) self.mainArea.layout().addWidget(self.sig_words) def sizeHint(self): return QSize(450, 240) @Inputs.data def set_data(self, data=None): self.data = data @Inputs.selected_data def set_data_selected(self, data=None): self.selected_data = data def handleNewSignals(self): self.check_data() def get_bow_domain(self): domain = self.data.domain return Domain( attributes=[a for a in domain.attributes if a.attributes.get('bow-feature', False)], class_vars=domain.class_vars, metas=domain.metas, source=domain) def check_data(self): self.Error.clear() if isinstance(self.data, Table) and \ isinstance(self.selected_data, Table): if len(self.selected_data) == 0: self.Error.empty_selection() self.clear() return # keep only BoW features bow_domain = self.get_bow_domain() if len(bow_domain.attributes) == 0: self.Error.no_bow_features() self.clear() return self.data = Corpus.from_table(bow_domain, self.data) self.selected_data_transformed = Corpus.from_table(bow_domain, self.selected_data) if np_sp_sum(self.selected_data_transformed.X) == 0: self.Error.no_words_overlap() self.clear() elif len(self.data) == len(self.selected_data): self.Error.all_selected() self.clear() else: self.apply() else: self.clear() def clear(self): self.sig_words.clear() self.info_all.setText('Cluster words:') self.info_sel.setText('Selected words:') self.info_fil.setText('After filtering:') def filter_enabled(self, b): self.chb_p.setEnabled(b) self.chb_fdr.setEnabled(b) self.spin_p.setEnabled(b) self.spin_fdr.setEnabled(b) def filter_and_display(self): self.spin_p.setEnabled(self.filter_by_p) self.spin_fdr.setEnabled(self.filter_by_fdr) self.sig_words.clear() if self.selected_data_transformed is None: # do nothing when no Data return count = 0 if self.words: for word, pval, fval in zip(self.words, self.p_values, self.fdr_values): if (not self.filter_by_p or pval <= self.filter_p_value) and \ (not self.filter_by_fdr or fval <= self.filter_fdr_value): it = EATreeWidgetItem(word, pval, fval, self.sig_words) self.sig_words.addTopLevelItem(it) count += 1 for i in range(len(self.cols)): self.sig_words.resizeColumnToContents(i) self.info_all.setText('Cluster words: {}'.format(len(self.selected_data_transformed.domain.attributes))) self.info_sel.setText('Selected words: {}'.format(np.count_nonzero(np_sp_sum(self.selected_data_transformed.X, axis=0)))) if not self.filter_by_p and not self.filter_by_fdr: self.info_fil.setText('After filtering:') self.info_fil.setEnabled(False) else: self.info_fil.setEnabled(True) self.info_fil.setText('After filtering: {}'.format(count)) def progress(self, p): self.progressBarSet(p) def apply(self): self.clear() self.progressBarInit() self.filter_enabled(False) self.words = [i.name for i in self.selected_data_transformed.domain.attributes] self.p_values = hypergeom_p_values(self.data.X, self.selected_data_transformed.X, callback=self.progress) self.fdr_values = false_discovery_rate(self.p_values) self.filter_and_display() self.filter_enabled(True) self.progressBarFinished() def tree_to_table(self): view = [self.cols] items = self.sig_words.topLevelItemCount() for i in range(items): line = [] for j in range(3): line.append(self.sig_words.topLevelItem(i).text(j)) view.append(line) return(view) def send_report(self): if self.words: self.report_table("Enriched words", self.tree_to_table())
def __init__(self): self.data = None self.output = None self._is_running = False self.isRegexMatch = lambda x: True self.tree = QTreeWidget(self.mainArea, columnCount=2, allColumnsShowFocus=True, alternatingRowColors=True, selectionMode=QTreeWidget.ExtendedSelection, uniformRowHeights=True) self.tree.setHeaderLabels(["Itemsets", "Support", "%"]) self.tree.header().setStretchLastSection(True) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.mainArea.layout().addWidget(self.tree) box = gui.widgetBox(self.controlArea, "Info") self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = '' gui.label(box, self, "Number of itemsets: %(nItemsets)s") gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s") gui.label(box, self, "Selected examples: %(nSelectedExamples)s") hbox = gui.widgetBox(box, orientation='horizontal') gui.button(hbox, self, "Expand all", callback=self.tree.expandAll) gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll) box = gui.widgetBox(self.controlArea, 'Find itemsets') gui.valueSlider(box, self, 'minSupport', values=[.0001, .0005, .001, .005, .01, .05, .1, .5] + list(range(1, 101)), label='Minimal support:', labelFormat="%g%%", callback=lambda: self.find_itemsets()) gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000, label='Max. number of itemsets:', labelFormat="%d", callback=lambda: self.find_itemsets()) self.button = gui.auto_commit( box, self, 'autoFind', 'Find Itemsets', commit=self.find_itemsets, callback=lambda: self.autoFind and self.find_itemsets()) box = gui.widgetBox(self.controlArea, 'Filter itemsets') gui.lineEdit(box, self, 'filterKeywords', 'Contains:', callback=self.filter_change, orientation='horizontal', tooltip='A comma or space-separated list of regular ' 'expressions.') hbox = gui.widgetBox(box, orientation='horizontal') gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:', callback=self.filter_change) gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:', callback=self.filter_change) gui.checkBox(box, self, 'filterSearch', label='Apply these filters in search', tooltip='If checked, the itemsets are filtered according ' 'to these filter conditions already in the search ' 'phase. \nIf unchecked, the only filters applied ' 'during search are the ones above, ' 'and the itemsets are \nfiltered afterwards only for ' 'display, i.e. only the matching itemsets are shown.') gui.rubber(hbox) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection') self.filter_change()
def __init__(self, parent=None): super().__init__(self, parent) self.clusterDataset = None self.referenceDataset = None self.ontology = None self.annotations = None self.loadedAnnotationCode = "---" self.treeStructRootKey = None self.probFunctions = [stats.Binomial(), stats.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = OWGOEnrichmentAnalysis.Initializing self.annotationCodes = [] ############# ## GUI ############# self.tabs = gui.tabWidget(self.controlArea) ## Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button( box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") box = gui.widgetBox(self.inputTab, "Organism") self.annotationComboBox = gui.comboBox(box, self, "annotationIndex", items=self.annotationCodes, callback=self._updateEnrichment, tooltip="Select organism") genebox = gui.widgetBox(self.inputTab, "Gene Names") self.geneAttrIndexCombo = gui.comboBox( genebox, self, "geneAttrIndex", callback=self._updateEnrichment, tooltip="Use this attribute to extract gene names from input data") self.geneAttrIndexCombo.setDisabled(self.useAttrNames) cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names", tooltip="Use column names for gene names", callback=self._updateEnrichment) cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled) gui.button(genebox, self, "Gene matcher settings", callback=self.UpdateGeneMatcher, tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=[ "Use entire genome for reference", "Use genes from Referece Examples input signal as reference" ], box="Reference", callback=self._updateEnrichment) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self._updateEnrichment) ## Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox( box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") #use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=[ "Use binomial distribution test", "Use hypergeometric distribution test" ], callback=self._updateEnrichment) box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox(etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb ## Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox(self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=[ "All selected genes", "Term-specific genes", "Common term genes" ], tooltips=[ "Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms" ], callback=[self.ExampleSelection, self.UpdateAddClassButton]) self.addClassCB = gui.checkBox(box, self, "selectionAddTermAsClass", "Add GO Term as class", callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = [ 'GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment' ] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) self.setBlocking(True) self._executor = ThreadExecutor() self._init = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), ("GO", "taxonomy.pickle")]) self._init.finished.connect(self.__initialize_finish) self._executor.submit(self._init)
class OWGenExpress(widget.OWWidget): name = "GenExpress" description = "Expression data from GenExpress." icon = "../widgets/icons/GenCloud.svg" priority = 36 inputs = [] outputs = [("Data", Orange.data.Table)] username = settings.Setting("anonymous") password = settings.Setting("") log2 = settings.Setting(False) transpose = settings.Setting(False) rtypei = settings.Setting(0) projecti = settings.Setting(0) serveri = settings.Setting(0) exnamei = settings.Setting(6) excludeconstant = settings.Setting(False) joinreplicates = settings.Setting(False) currentSelection = settings.Setting(None) experimentsHeaderState = settings.Setting({ name: False for _, name in HEADER[:ID_INDEX + 1]} ) storedSortOrder = settings.Setting([]) storedSelections = settings.Setting([]) def __init__(self, parent=None): super().__init__(parent) self.servers = [ ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'), ('https://cloud.genialis.com/', 'Genialis'), ] self.selectedExperiments = [] self.buffer = dicty.CacheSQLite(bufferfile) self.searchString = "" self.items = [] self.result_types = [] self.controlArea.setMaximumWidth(250) self.controlArea.setMinimumWidth(250) box = gui.widgetBox(self.controlArea, 'Project') self.projectCB = gui.comboBox( box, self, "projecti", items=[], callback=self.ProjectChosen) self.projects = [] b = gui.widgetBox(self.controlArea, "Selection bookmarks") self.selectionSetsWidget = SelectionSetsWidget(self) self.selectionSetsWidget.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Maximum) def store_selections(modified): if not modified: self.storedSelections = self.selectionSetsWidget.selections self.selectionSetsWidget.selectionModified.connect(store_selections) b.layout().addWidget(self.selectionSetsWidget) gui.separator(self.controlArea) b = gui.widgetBox(self.controlArea, "Sort output columns") self.columnsSortingWidget = SortedListWidget(self) self.columnsSortingWidget.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Maximum) box = gui.widgetBox(self.controlArea, 'Experiment name') self.experimentNameCB = gui.comboBox( box, self, "exnamei", items=SORTING_MODEL_LIST) b.layout().addWidget(self.columnsSortingWidget) sorting_model = QStringListModel(SORTING_MODEL_LIST) self.columnsSortingWidget.setModel(sorting_model) self.columnsSortingWidget.sortingOrder = self.storedSortOrder def store_sort_order(): self.storedSortOrder = self.columnsSortingWidget.sortingOrder self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order) gui.separator(self.controlArea) box = gui.widgetBox(self.controlArea, 'Expression Type') self.expressionTypesCB = gui.comboBox( box, self, "rtypei", items=[], callback=self.UpdateResultsList) gui.checkBox(self.controlArea, self, "excludeconstant", "Exclude labels with constant values") gui.checkBox(self.controlArea, self, "joinreplicates", "Average replicates (use median)") gui.checkBox(self.controlArea, self, "log2", "Logarithmic (base 2) transformation") gui.checkBox(self.controlArea, self, "transpose", "Genes as columns") self.commit_button = gui.button(self.controlArea, self, "&Commit", callback=self.Commit) self.commit_button.setDisabled(True) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, 'Server') gui.comboBox(box, self, "serveri", items=[title for url, title in self.servers], callback=self.ServerChosen) gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Clear cache", callback=self.clear_cache) gui.lineEdit(self.mainArea, self, "searchString", "Search", callbackOnType=True, callback=self.SearchUpdate) self.headerLabels = [t[1] for t in HEADER] self.experimentsWidget = QTreeWidget() self.experimentsWidget.setHeaderLabels(self.headerLabels) self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection) self.experimentsWidget.setRootIsDecorated(False) self.experimentsWidget.setSortingEnabled(True) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.experimentsWidget, self.experimentsWidget) self.experimentsWidget.header().installEventFilter(contextEventFilter) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.setAlternatingRowColors(True) self.experimentsWidget.selectionModel().selectionChanged.connect( self.onSelectionChanged) self.selectionSetsWidget.setSelectionModel( self.experimentsWidget.selectionModel()) self.selectionSetsWidget.setSelections(self.storedSelections) self.mainArea.layout().addWidget(self.experimentsWidget) self.restoreHeaderState() self.experimentsWidget.header().geometriesChanged.connect( self.saveHeaderState) self.dbc = None self.AuthSet() QTimer.singleShot(100, self.ConnectAndUpdate) def sizeHint(self): return QSize(800, 600) def AuthSet(self): if len(self.username): self.passf.setDisabled(False) else: self.passf.setDisabled(True) def AuthChanged(self): self.AuthSet() self.ConnectAndUpdate() def ConnectAndUpdate(self): self.Connect() if self.dbc: def get_data_count(project_id): # XXX: is there a better way? # Note: limit 0 would return all objects return self.dbc.gen.api.data.get(case_ids__contains=project_id, type__startswith='data:expression:', limit=1)['meta']['total_count'] self.projects = sorted([p for p in self.dbc.projects().items() if get_data_count(p[0]) > 0], key=lambda x: x[1]) self.UpdateProjects() self.ProjectChosen() self.UpdateExperimentTypes() def Connect(self): self.error(1) self.warning(1) username = '******' password = '******' url = self.servers[self.serveri][0] if self.username: username = self.username password = self.password if username.lower() in ['*****@*****.**', 'anonymous']: username = '******' password = '******' self.dbc = None self.projects = [] self.result_types = [] try: self.dbc = Genesis( address=url, username=username, password=password, cache=self.buffer) except requests.exceptions.ConnectionError: self.dbc = Genesis( address=url, username=username, password=password, connect=False, cache=self.buffer) self.warning(1, "Could not connect to server, working from cache.") except Exception: self.error(1, "Wrong username or password.") self.UpdateProjects() self.UpdateExperimentTypes() # clear lists def Reload(self): self.UpdateExperiments(reload=True) def clear_cache(self): self.buffer.clear() self.Reload() def rtype(self): """Return selected result template type """ if self.result_types: return self.result_types[self.rtypei] else: return None def UpdateExperimentTypes(self): self.expressionTypesCB.clear() items = [self.result_types_labels[desc] for desc in self.result_types] self.expressionTypesCB.addItems(items) #do not update anything if the list is empty if len(self.result_types): self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1)) def UpdateProjects(self): self.projectCB.clear() items = [desc for pid, desc in self.projects] self.projectCB.addItems(items) #do not update anything if the list if empty if len(self.projects) > 0: self.projecti = max(0, min(self.projecti, len(self.projects) - 1)) def UpdateExperiments(self, reload=False): self.experimentsWidget.clear() if not self.dbc or not self.dbc.projectid: # the connection did not succeed return self.items = [] self.progressBarInit() result_types = [] result_types_labels = [] sucind = False # success indicator for database index try: result_types, result_types_labels = self.dbc.result_types(reload=reload) sucind = True except Exception: try: result_types, result_types_labels = self.dbc.result_types() self.warning(0, "Can not access database - using cached data.") sucind = True except Exception: self.error(0, "Can not access database.") if sucind: self.warning(0) self.error(0) self.result_types = result_types self.result_types_labels = result_types_labels self.UpdateExperimentTypes() self.UpdateResultsList(reload=reload) self.progressBarFinished() if self.currentSelection: self.currentSelection.select(self.experimentsWidget.selectionModel()) self.handle_commit_button() def ProjectChosen(self, reload=False): if self.projects: self.dbc.projectid = self.projects[self.projecti][0] else: self.dbc.projectid = None self.UpdateExperiments(reload=reload) def ServerChosen(self): self.ConnectAndUpdate() def UpdateResultsList(self, reload=False): results_list = self.dbc.results_list(self.rtype(), reload=reload) try: results_list = self.dbc.results_list(self.rtype(), reload=reload) except Exception: try: results_list = self.dbc.results_list(self.rtype()) except Exception: self.error(0, "Can not access database.") self.results_list = results_list #softly change the view so that the selection stays the same items_shown = {} for i, item in enumerate(self.items): c = str(item.text(ID_INDEX)) items_shown[c] = i items_to_show = set(id_ for id_ in self.results_list) add_items = set(items_to_show) - set(items_shown) delete_items = set(items_shown) - set(items_to_show) i = 0 while i < self.experimentsWidget.topLevelItemCount(): it = self.experimentsWidget.topLevelItem(i) if str(it.text(ID_INDEX)) in delete_items: self.experimentsWidget.takeTopLevelItem(i) else: i += 1 delete_ind = set([items_shown[i] for i in delete_items]) self.items = [it for i, it in enumerate(self.items) if i not in delete_ind] for r_annot in add_items: d = defaultdict(lambda: "?", self.results_list[r_annot]) row_items = [""] + [to_text(d.get(key, "?")) for key, _ in HEADER[1:]] row_items[ID_INDEX] = r_annot ci = MyTreeWidgetItem(self.experimentsWidget, row_items) self.items.append(ci) for i in range(len(self.headerLabels)): self.experimentsWidget.resizeColumnToContents(i) self.wantbufver = lambda x: self.results_list[x]["date_modified"] self.UpdateCached() def UpdateCached(self): if self.wantbufver and self.dbc: for item in self.items: id = str(item.text(ID_INDEX)) version = self.dbc._in_buffer(id + "|||" + self.rtype()) value = " " if version == self.wantbufver(id) else "" item.setData(0, Qt.DisplayRole, value) def SearchUpdate(self, string=""): for item in self.items: item.setHidden( not all(s in item for s in self.searchString.split())) def Commit(self): pb = gui.ProgressBar(self, iterations=100) table = None ids = [] for item in self.experimentsWidget.selectedItems(): unique_id = str(item.text(ID_INDEX)) ids.append(unique_id) transfn = None if self.log2: transfn = lambda x: math.log(x + 1.0, 2) reverse_header_dict = {name: name for key, name in HEADER} reverse_header_dict["ID"] = "id" allowed_labels = None def namefn(a): name = SORTING_MODEL_LIST[self.exnamei] name = reverse_header_dict.get(name, "id") return dict(a)[name] if len(ids): table = self.dbc.get_data( ids=ids, result_type=self.rtype(), callback=pb.advance, exclude_constant_labels=self.excludeconstant, bufver=self.wantbufver, transform=transfn, allowed_labels=allowed_labels, namefn=namefn) if self.joinreplicates: table = dicty.join_replicates(table, ignorenames=self.dbc.IGNORE_REPLICATE, namefn="name", avg=dicty.median, fnshow=lambda x: " | ".join(map(str, x))) # Sort attributes sortOrder = self.columnsSortingWidget.sortingOrder all_values = defaultdict(set) for at in table.domain.attributes: atts = at.attributes for name in sortOrder: all_values[name].add(atts.get(reverse_header_dict[name], "")) isnum = {} for at, vals in all_values.items(): vals = filter(None, vals) try: for a in vals: float(a) isnum[at] = True except ValueError: isnum[at] = False def optfloat(x, at): if x == "": return "" else: return float(x) if isnum[at] else x def sorting_key(attr): atts = attr.attributes return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) for name in sortOrder]) attributes = sorted(table.domain.attributes, key=sorting_key) domain = Orange.data.Domain( attributes, table.domain.class_vars, table.domain.metas) table = Orange.data.Table.from_table(domain, table) table = Orange.data.Table(domain, table) if self.transpose: experiments = [at for at in table.domain.variables] attr = [compat.ContinuousVariable.make(ex['DDB'].value) for ex in table] metavars = sorted(table.domain.variables[0].attributes.keys()) metavars = [compat.StringVariable.make(name) for name in metavars] domain = compat.create_domain(attr, None, metavars) metas = [[exp.attributes[var.name] for var in metavars] for exp in experiments] table = compat.create_table(domain, table.X.transpose(), None, metas) data_hints.set_hint(table, "taxid", "352472") data_hints.set_hint(table, "genesinrows", False) self.send("Data", table) self.UpdateCached() pb.finish() def onSelectionChanged(self, selected, deselected): self.handle_commit_button() def handle_commit_button(self): self.currentSelection = \ SelectionByKey(self.experimentsWidget.selectionModel().selection(), key=(ID_INDEX,)) self.commit_button.setDisabled(not len(self.currentSelection)) def saveHeaderState(self): hview = self.experimentsWidget.header() for i, label in enumerate(self.headerLabels): self.experimentsHeaderState[label] = hview.isSectionHidden(i) def restoreHeaderState(self): hview = self.experimentsWidget.header() state = self.experimentsHeaderState for i, label in enumerate(self.headerLabels): hview.setSectionHidden(i, state.get(label, True)) self.experimentsWidget.resizeColumnToContents(i)
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = [ "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames" ] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' )) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join(gds[key] for gds in self.gds for key in self.searchKeys) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.treeWidget.scrollTo(current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain([ attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items()) ], data.domain.class_var, data.domain.metas) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [ bytes(sp.saveState()) for sp in self.splitters ] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting( (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01') ) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button( box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive ) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo( current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.class_var, data.domain.metas ) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append((subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format(desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWDatabasesUpdate(OWWidget): name = "Databases Update" description = "Update local systems biology databases." icon = "../widgets/icons/OWDatabasesUpdate.svg" priority = 1 inputs = [] outputs = [] want_main_area = False def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.search_update) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels(header_labels) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(header.Title, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.search_update) box.layout().addWidget(self.filesView) layout = QHBoxLayout() gui.widgetBox(self.controlArea, margin=0, orientation=layout) self.updateButton = gui.button( box, self, "Update all", callback=self.update_all, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.download_filtered, tooltip="Download all filtered files shown") self.cancelButton = gui.button( box, self, "Cancel", callback=self.cancel_active_threads, tooltip="Cancel scheduled downloads/updates.") self.addButton = gui.button(box, self, "Add ...", callback=self.__handle_dialog, tooltip="Add files for personal use.") layout.addWidget(self.updateButton) layout.addWidget(self.downloadButton) layout.addWidget(self.cancelButton) layout.addStretch() layout.addWidget(self.addButton) # Enable retryButton once connection is established # self.retryButton = gui.button( # box, self, "Reconnect", callback=self.initialize_files_view # ) # self.retryButton.hide() self.resize(800, 600) self.update_items = [] self._dialog = None self.progress_bar = None # threads self.threadpool = QThreadPool(self) #self.threadpool.setMaxThreadCount(1) self.workers = list() self.initialize_files_view() def __handle_dialog(self): if not self._dialog: self._dialog = FileUploadHelper(self) self._dialog.show() def __progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here if self.progress_bar: self.progress_bar.advance() def handle_worker_exception(self, ex): self.progress_bar.finish() self.setStatusMessage('') if isinstance(ex, ConnectionError): # TODO: set warning messages pass print(ex) def initialize_files_view(self): # self.retryButton.hide() # clear view self.filesView.clear() # init progress bar self.progress_bar = gui.ProgressBar(self, iterations=3) # status message self.setStatusMessage('initializing') worker = Worker(evaluate_files_state, progress_callback=True) worker.signals.progress.connect(self.__progress_advance) worker.signals.result.connect(self.set_files_list) worker.signals.error.connect(self.handle_worker_exception) # move download process to worker thread self.threadpool.start(worker) self.setEnabled(False) def __create_action_button(self, fs, retry=None): if not fs.state not in [OUTDATED, USER_FILE] or not retry: self.filesView.setItemWidget(fs.tree_item, header.Update, None) button = QToolButton(None) if not retry: if fs.state == OUTDATED: button.setText('Update') button.clicked.connect( partial(self.submit_download_task, fs.domain, fs.filename, True)) elif fs.state == USER_FILE: if not fs.info_server: button.setText('Remove') button.clicked.connect( partial(self.submit_remove_task, fs.domain, fs.filename)) else: button.setText('Use server version') button.clicked.connect( partial(self.submit_download_task, fs.domain, fs.filename, True)) else: button.setText('Retry') button.clicked.connect( partial(self.submit_download_task, fs.domain, fs.filename, True)) button.setMaximumWidth(120) button.setMaximumHeight(20) button.setMinimumHeight(20) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) self.filesView.setItemWidget(fs.tree_item, header.Update, button) def set_files_list(self, result): """ Set the files to show. """ assert threading.current_thread() == threading.main_thread() self.progress_bar.finish() self.setStatusMessage('') self.setEnabled(True) self.update_items = result all_tags = set() for fs in self.update_items: fs.tree_item = FileStateItem(fs) fs.download_option = DownloadOption(state=fs.state) fs.download_option.download_clicked.connect( partial(self.submit_download_task, fs.domain, fs.filename)) fs.download_option.remove_clicked.connect( partial(self.submit_remove_task, fs.domain, fs.filename)) # add widget items to the QTreeWidget self.filesView.addTopLevelItems( [fs.tree_item for fs in self.update_items]) # add action widgets to tree items for fs in self.update_items: self.filesView.setItemWidget(fs.tree_item, header.Download, fs.download_option) if fs.state in [USER_FILE, OUTDATED]: self.__create_action_button(fs) all_tags.update(fs.tags) self.filesView.setColumnWidth( header.Download, self.filesView.sizeHintForColumn(header.Download)) for column in range(1, len(header_labels)): self.filesView.resizeColumnToContents(column) hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")] self.completer.setTokenList(hints) self.search_update() self.toggle_action_buttons() self.cancelButton.setEnabled(False) def toggle_action_buttons(self): selected_items = [ fs for fs in self.update_items if not fs.tree_item.isHidden() ] def button_check(sel_items, state, button): for item in sel_items: if item.state != state: button.setEnabled(False) else: button.setEnabled(True) break button_check(selected_items, OUTDATED, self.updateButton) button_check(selected_items, AVAILABLE, self.downloadButton) def search_update(self, searchString=None): strings = str(self.lineEditFilter.text()).split() for fs in self.update_items: hide = not all(UpdateItem_match(fs, string) for string in strings) fs.tree_item.setHidden(hide) self.toggle_action_buttons() def update_all(self): for fs in self.update_items: if fs.state == OUTDATED and not fs.tree_item.isHidden(): self.submit_download_task(fs.domain, fs.filename) def download_filtered(self): for fs in self.update_items: if not fs.tree_item.isHidden() and fs.state in [ AVAILABLE, OUTDATED ]: self.submit_download_task(fs.domain, fs.filename, start=False) self.run_download_tasks() def submit_download_task(self, domain, filename, start=True): """ Submit the (domain, filename) to be downloaded/updated. """ # get selected tree item index = self.tree_item_index(domain, filename) fs = self.update_items[index] worker = Worker(download_server_file, fs, index, progress_callback=True) worker.signals.progress.connect(self.__progress_advance) worker.signals.result.connect(self.on_download_finished) worker.signals.error.connect(self.on_download_exception) self.workers.append(worker) if start: self.run_download_tasks() def run_download_tasks(self): self.cancelButton.setEnabled(True) # init progress bar self.progress_bar = gui.ProgressBar(self, iterations=len(self.workers) * 100) # status message self.setStatusMessage('downloading') # move workers to threadpool [self.threadpool.start(worker) for worker in self.workers] self.filesView.setDisabled(True) # reset list of workers self.workers = list() def on_download_exception(self, ex): assert threading.current_thread() == threading.main_thread() self.progress_bar.finish() self.setStatusMessage('') print(ex) if isinstance(ex, ValueError): fs, index = ex.args # restore state and retry fs.refresh_state() fs.tree_item.update_data(fs) fs.download_option.state = fs.state self.__create_action_button(fs, retry=True) def on_download_finished(self, result): assert threading.current_thread() == threading.main_thread() # We check if all workers have completed. If not, continue if self.progress_bar.count == 100 or self.threadpool.activeThreadCount( ) == 0: self.filesView.setDisabled(False) self.progress_bar.finish() self.setStatusMessage('') fs, index = result # re-evaluate File State info = serverfiles.info(fs.domain, fs.filename) fs.refresh_state(info_local=info, info_server=info) # reinitialize treeWidgetItem fs.tree_item.update_data(fs) # reinitialize OptionWidget fs.download_option.state = fs.state self.filesView.setItemWidget(fs.tree_item, header.Update, None) self.toggle_action_buttons() for column in range(1, len(header_labels)): self.filesView.resizeColumnToContents(column) def submit_remove_task(self, domain, filename): serverfiles.LOCALFILES.remove(domain, filename) index = self.tree_item_index(domain, filename) fs = self.update_items[index] if fs.state == USER_FILE: self.filesView.takeTopLevelItem( self.filesView.indexOfTopLevelItem(fs.tree_item)) self.update_items.remove(fs) # self.filesView.removeItemWidget(index) else: # refresh item state fs.info_local = None fs.refresh_state() # reinitialize treeWidgetItem fs.tree_item.update_data(fs) # reinitialize OptionWidget fs.download_option.state = fs.state self.toggle_action_buttons() def cancel_active_threads(self): """ Cancel all pending update/download tasks (that have not yet started). """ if self.threadpool: self.threadpool.clear() def tree_item_index(self, domain, filename): for i, fs in enumerate(self.update_items): if fs.domain == domain and fs.filename == filename: return i raise ValueError("%r, %r not in update list" % (domain, filename)) def onDeleteWidget(self): self.cancel_active_threads() OWWidget.onDeleteWidget(self)
def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button( box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive ) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None
class OWPIPAx(widget.OWWidget): name = "PIPAx" description = "Access data from PIPA RNA-Seq database." icon = "../widgets/icons/PIPA.svg" priority = 35 inputs = [] outputs = [("Data", Orange.data.Table)] username = settings.Setting("") password = settings.Setting("") log2 = settings.Setting(False) rtypei = settings.Setting(5) # hardcoded rpkm mapability polya excludeconstant = settings.Setting(False) joinreplicates = settings.Setting(False) #: The stored current selection (in experiments view) #: SelectionByKey | None currentSelection = settings.Setting(None) #: Stored selections (presets) #: list of SelectionByKey storedSelections = settings.Setting([]) #: Stored column sort keys (from Sort view) #: list of strings storedSortingOrder = settings.Setting( ["Strain", "Experiment", "Genotype", "Timepoint"]) experimentsHeaderState = settings.Setting( {name: False for _, name in HEADER[:ID_INDEX + 1]} ) def __init__(self, parent=None, signalManager=None, name="PIPAx"): super().__init__(parent) self.selectedExperiments = [] self.buffer = dicty.CacheSQLite(bufferfile) self.searchString = "" self.result_types = [] self.mappings = {} self.controlArea.setMaximumWidth(250) self.controlArea.setMinimumWidth(250) gui.button(self.controlArea, self, "Reload", callback=self.Reload) gui.button(self.controlArea, self, "Clear cache", callback=self.clear_cache) b = gui.widgetBox(self.controlArea, "Experiment Sets") self.selectionSetsWidget = SelectionSetsWidget(self) self.selectionSetsWidget.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Maximum) def store_selections(modified): if not modified: self.storedSelections = self.selectionSetsWidget.selections self.selectionSetsWidget.selectionModified.connect(store_selections) b.layout().addWidget(self.selectionSetsWidget) gui.separator(self.controlArea) b = gui.widgetBox(self.controlArea, "Sort output columns") self.columnsSortingWidget = SortedListWidget(self) self.columnsSortingWidget.setSizePolicy( QSizePolicy.Preferred, QSizePolicy.Maximum) def store_sort_order(): self.storedSortingOrder = self.columnsSortingWidget.sortingOrder self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order) b.layout().addWidget(self.columnsSortingWidget) sorting_model = QStringListModel(SORTING_MODEL_LIST) self.columnsSortingWidget.setModel(sorting_model) gui.separator(self.controlArea) box = gui.widgetBox(self.controlArea, 'Expression Type') self.expressionTypesCB = gui.comboBox( box, self, "rtypei", items=[], callback=self.UpdateResultsList) gui.checkBox(self.controlArea, self, "excludeconstant", "Exclude labels with constant values") gui.checkBox(self.controlArea, self, "joinreplicates", "Average replicates (use median)") gui.checkBox(self.controlArea, self, "log2", "Logarithmic (base 2) transformation") self.commit_button = gui.button(self.controlArea, self, "&Commit", callback=self.Commit) self.commit_button.setDisabled(True) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, "Authentication") gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"searchString", "Search", callbackOnType=True, callback=self.SearchUpdate) self.headerLabels = [t[1] for t in HEADER] self.experimentsWidget = QTreeWidget() self.experimentsWidget.setHeaderLabels(self.headerLabels) self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection) self.experimentsWidget.setRootIsDecorated(False) self.experimentsWidget.setSortingEnabled(True) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.experimentsWidget, self.experimentsWidget ) self.experimentsWidget.header().installEventFilter(contextEventFilter) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.setAlternatingRowColors(True) self.experimentsWidget.selectionModel().selectionChanged.connect( self.onSelectionChanged) self.selectionSetsWidget.setSelectionModel( self.experimentsWidget.selectionModel() ) self.mainArea.layout().addWidget(self.experimentsWidget) # Restore the selection states from the stored settings self.selectionSetsWidget.selections = self.storedSelections self.columnsSortingWidget.sortingOrder = self.storedSortingOrder self.restoreHeaderState() self.experimentsWidget.header().geometriesChanged.connect( self.saveHeaderState) self.dbc = None self.AuthSet() QTimer.singleShot(100, self.UpdateExperiments) def sizeHint(self): return QSize(800, 600) def AuthSet(self): if len(self.username): self.passf.setDisabled(False) else: self.passf.setDisabled(True) def AuthChanged(self): self.AuthSet() self.ConnectAndUpdate() def ConnectAndUpdate(self): self.Connect() self.UpdateExperiments(reload=True) def Connect(self): self.error(1) self.warning(1) def en(x): return x if len(x) else None self.dbc = dicty.PIPAx(cache=self.buffer, username=en(self.username), password=self.password) # check password if en(self.username) != None: try: self.dbc.mappings(reload=True) except dicty.AuthenticationError: self.error(1, "Wrong username or password") self.dbc = None except Exception as ex: print("Error when contacting the PIPA database", ex) sys.excepthook(*sys.exc_info()) try: # maybe cached? self.dbc.mappings() self.warning(1, "Can not access database - using cached data.") except Exception as ex: self.dbc = None self.error(1, "Can not access database.") def Reload(self): self.UpdateExperiments(reload=True) def clear_cache(self): self.buffer.clear() self.Reload() def rtype(self): """Return selected result template type """ if self.result_types: return self.result_types[self.rtypei][0] else: return "-1" def UpdateExperimentTypes(self): self.expressionTypesCB.clear() items = [desc for _, desc in self.result_types] self.expressionTypesCB.addItems(items) self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1)) def UpdateExperiments(self, reload=False): self.experimentsWidget.clear() self.items = [] self.progressBarInit() if not self.dbc: self.Connect() mappings = {} result_types = [] sucind = False # success indicator for database index try: mappings = self.dbc.mappings(reload=reload) result_types = self.dbc.result_types(reload=reload) sucind = True except Exception as ex: try: mappings = self.dbc.mappings() result_types = self.dbc.result_types() self.warning(0, "Can not access database - using cached data.") sucind = True except Exception as ex: self.error(0, "Can not access database.") if sucind: self.warning(0) self.error(0) self.mappings = mappings self.result_types = result_types self.UpdateExperimentTypes() self.UpdateResultsList(reload=reload) self.progressBarFinished() if self.currentSelection: self.currentSelection.select( self.experimentsWidget.selectionModel()) self.handle_commit_button() def UpdateResultsList(self, reload=False): results_list = {} try: results_list = self.dbc.results_list(self.rtype(), reload=reload) except Exception as ex: try: results_list = self.dbc.results_list(self.rtype()) except Exception as ex: self.error(0, "Can not access database.") self.results_list = results_list mappings_key_dict = dict(((m["data_id"], m["id"]), key) \ for key, m in self.mappings.items()) def mapping_unique_id(annot): """Map annotations dict from results_list to unique `mappings` ids. """ data_id, mappings_id = annot["data_id"], annot["mappings_id"] return mappings_key_dict[data_id, mappings_id] elements = [] # softly change the view so that the selection stays the same items_shown = {} for i, item in enumerate(self.items): c = str(item.text(10)) items_shown[c] = i items_to_show = dict((mapping_unique_id(annot), annot) for annot in self.results_list.values()) add_items = set(items_to_show) - set(items_shown) delete_items = set(items_shown) - set(items_to_show) i = 0 while i < self.experimentsWidget.topLevelItemCount(): it = self.experimentsWidget.topLevelItem(i) if str(it.text(10)) in delete_items: self.experimentsWidget.takeTopLevelItem(i) else: i += 1 delete_ind = set([items_shown[i] for i in delete_items]) self.items = [it for i, it in enumerate(self.items) if i not in delete_ind] for r_annot in [items_to_show[i] for i in add_items]: d = defaultdict(lambda: "?", r_annot) row_items = [""] + [d.get(key, "?") for key, _ in HEADER[1:]] try: time_dict = literal_eval(row_items[DATE_INDEX]) date_rna = date(time_dict["fullYearUTC"], time_dict["monthUTC"] + 1, # Why is month 0 based? time_dict["dateUTC"]) row_items[DATE_INDEX] = date_rna.strftime("%x") except Exception: row_items[DATE_INDEX] = '' row_items[ID_INDEX] = mapping_unique_id(r_annot) elements.append(row_items) ci = MyTreeWidgetItem(self.experimentsWidget, row_items) self.items.append(ci) for i in range(len(self.headerLabels)): self.experimentsWidget.resizeColumnToContents(i) # which is the ok buffer version # FIXME: what attribute to use for version? self.wantbufver = \ lambda x, ad=self.results_list: \ defaultdict(lambda: "?", ad[x])["date"] self.wantbufver = lambda x: "0" self.UpdateCached() def UpdateCached(self): if self.wantbufver and self.dbc: fn = self.dbc.download_key_function() result_id_key = dict(((m["data_id"], m["mappings_id"]), key) \ for key, m in self.results_list.items()) for item in self.items: c = str(item.text(10)) mapping = self.mappings[c] data_id, mappings_id = mapping["data_id"], mapping["id"] r_id = result_id_key[data_id, mappings_id] # Get the buffered version buffered = self.dbc.inBuffer(fn(r_id)) value = " " if buffered == self.wantbufver(r_id) else "" item.setData(0, Qt.DisplayRole, value) def SearchUpdate(self, string=""): for item in self.items: item.setHidden(not all(s in item \ for s in self.searchString.split()) ) def Commit(self): if not self.dbc: self.Connect() pb = gui.ProgressBar(self, iterations=100) table = None ids = [] for item in self.experimentsWidget.selectedItems(): unique_id = str(item.text(10)) annots = self.mappings[unique_id] ids.append((annots["data_id"], annots["id"])) transfn = None if self.log2: transfn = lambda x: math.log(x + 1.0, 2) reverse_header_dict = dict((name, key) for key, name in HEADER) hview = self.experimentsWidget.header() shownHeaders = [label for i, label in \ list(enumerate(self.headerLabels))[1:] \ if not hview.isSectionHidden(i) ] allowed_labels = [reverse_header_dict.get(label, label) \ for label in shownHeaders] if self.joinreplicates and "id" not in allowed_labels: # need 'id' labels in join_replicates for attribute names allowed_labels.append("id") if len(ids): table = self.dbc.get_data(ids=ids, result_type=self.rtype(), callback=pb.advance, exclude_constant_labels=self.excludeconstant, # bufver=self.wantbufver, transform=transfn, allowed_labels=allowed_labels) if self.joinreplicates: table = dicty.join_replicates(table, ignorenames=["replicate", "data_id", "mappings_id", "data_name", "id", "unique_id"], namefn=None, avg=dicty.median ) # Sort attributes sortOrder = self.columnsSortingWidget.sortingOrder all_values = defaultdict(set) for at in table.domain.attributes: atts = at.attributes for name in sortOrder: all_values[name].add(atts.get(reverse_header_dict[name], "")) isnum = {} for at, vals in all_values.items(): vals = filter(None, vals) try: for a in vals: float(a) isnum[at] = True except: isnum[at] = False def optfloat(x, at): if x == "": return "" else: return float(x) if isnum[at] else x def sorting_key(attr): atts = attr.attributes return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \ for name in sortOrder]) attributes = sorted(table.domain.attributes, key=sorting_key) domain = Orange.data.Domain( attributes, table.domain.class_var, table.domain.metas) table = table.from_table(domain, table) data_hints.set_hint(table, "taxid", "352472") data_hints.set_hint(table, "genesinrows", False) self.send("Data", table) self.UpdateCached() pb.finish() def onSelectionChanged(self, selected, deselected): self.handle_commit_button() def handle_commit_button(self): self.currentSelection = \ SelectionByKey(self.experimentsWidget.selectionModel().selection(), key=(1, 2, 3, 10)) self.commit_button.setDisabled(not len(self.currentSelection)) def saveHeaderState(self): hview = self.experimentsWidget.header() for i, label in enumerate(self.headerLabels): self.experimentsHeaderState[label] = hview.isSectionHidden(i) def restoreHeaderState(self): hview = self.experimentsWidget.header() state = self.experimentsHeaderState for i, label in enumerate(self.headerLabels): hview.setSectionHidden(i, state.get(label, True)) self.experimentsWidget.resizeColumnToContents(i)
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox(box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox(box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=[ "Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference" ], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit(self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter( contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")]) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor(parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QSize(1024, 600) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" if self.annotationsChartView.model() is not None: self.annotationsChartView.model().clear() def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [ var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable) ] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint(data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key, ) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get((full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy( taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [ key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked ] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name, ) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [ self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount()) ] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [ getattr(dialog, item[0]) for item in dialog.items ] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [ m for m, use in zip(matchers, self.geneMatcherSettings) if use ] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list( filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float, )) info = methodinvoke(self, "_setRunInfo", (str, )) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results if self.annotationsChartView.model(): self.annotationsChartView.model().clear() nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt) fmt_ref_count = partial(fmt_count, ref_fmt) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = QStandardItemModel() model.setSortRole(Qt.UserRole) model.setHorizontalHeaderLabels([ "Category", "Term", "Count", "Reference count", "p-value", "FDR", "Enrichment" ]) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(fmt_query_count(nquery_mapped, nquery), tooltip=nquery_mapped, user=nquery_mapped), item(fmt_ref_count(nref_mapped, nref), tooltip=nref_mapped, user=nref_mapped), item(fmtp(enrich.p_value), user=enrich.p_value), item( ), # column 5, FDR, is computed in filterAnnotationsChartView item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) self.annotationsChartView.setModel(model) self.annotationsChartView.selectionModel().selectionChanged.connect( self.commit) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set( gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max( (e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( 6, BarItemDelegate(self, scale=(0.0, max_score))) self.annotationsChartView.setItemDelegateForColumn( 1, gui.LinkStyledItemDelegate(self.annotationsChartView)) header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(sh, header.sectionSizeHint(i)) self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) # self.annotationsChartView.resizeColumnToContents(i) self.filterAnnotationsChartView() self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.error(0, ) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) # TODO: warn on no enriched sets found (i.e no query genes # mapped to any set) def filterAnnotationsChartView(self, filterString=""): if self.__state & OWSetEnrichment.RunningEnrichment: return # TODO: Move filtering to a filter proxy model # TODO: Re-enable string search categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) # filterString = str(self.filterLineEdit.text()).lower() model = self.annotationsChartView.model() def ishidden(index): # Is item at index (row) hidden item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [ model.item(i, 4).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden ] fdrs = utils.stats.FDR(pvals) # update FDR for the selected collections and apply filtering rules itemsHidden = [] fdriter = iter(fdrs) for index, hidden in enumerate(hidemask): if not hidden: fdr = next(fdriter) pval = model.index(index, 4).data(Qt.UserRole) count = model.index(index, 2).data(Qt.ToolTipRole) hidden = (self.useMinCountFilter and count < self.minClusterCount) or \ (self.useMaxPValFilter and pval > self.maxPValue) or \ (self.useMaxFDRFilter and fdr > self.maxFDR) if not hidden: fdr_item = model.item(index, 5) fdr_item.setData(fmtpdet(fdr), Qt.ToolTipRole) fdr_item.setData(fmtp(fdr), Qt.DisplayRole) fdr_item.setData(fdr, Qt.UserRole) self.annotationsChartView.setRowHidden(index, QModelIndex(), hidden) itemsHidden.append(hidden) if model.rowCount() and all(itemsHidden): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return model = self.annotationsChartView.model() rows = self.annotationsChartView.selectionModel().selectedRows(0) selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [ attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped ] newdomain = Orange.data.Domain(mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [ i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped ] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)
class OWDatabasesUpdate(OWWidget): name = "Databases Update" description = "Update local systems biology databases." icon = "../widgets/icons/Databases.svg" priority = 10 inputs = [] outputs = [] want_main_area = False def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels( ["", "Data Source", "Update", "Last Updated", "Size"]) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(1, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.SearchUpdate) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.UpdateAll, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.DownloadFiltered, tooltip="Download all filtered files shown" ) self.cancelButton = gui.button( box, self, "Cancel", callback=self.Cancel, tooltip="Cancel scheduled downloads/updates." ) self.retryButton = gui.button( box, self, "Reconnect", callback=self.RetrieveFilesList ) self.retryButton.hide() gui.rubber(box) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.updateItems = [] self.resize(800, 600) self.progress = ProgressState(self, maximum=3) self.progress.valueChanged.connect(self._updateProgress) self.progress.rangeChanged.connect(self._updateProgress) self.executor = ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) task = Task(self, function=self.RetrieveFilesList) task.exceptionReady.connect(self.HandleError) task.start() self._tasks = [] self._haveProgress = False def RetrieveFilesList(self): self.retryButton.hide() self.warning(0) self.progress.setRange(0, 3) task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance"))) task.resultReady.connect(self.SetFilesList) task.exceptionReady.connect(self.HandleError) self.executor.submit(task) self.setEnabled(False) def SetFilesList(self, serverInfo): """ Set the files to show. """ self.setEnabled(True) localInfo = serverfiles.allinfo() all_tags = set() self.filesView.clear() self.updateItems = [] for item in join_info_dict(localInfo, serverInfo): tree_item = UpdateTreeWidgetItem(item) options_widget = UpdateOptionsWidget(item.state) options_widget.item = item options_widget.installClicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) options_widget.removeClicked.connect( partial(self.SubmitRemoveTask, item.domain, item.filename) ) self.updateItems.append((item, tree_item, options_widget)) all_tags.update(item.tags) self.filesView.addTopLevelItems( [tree_item for _, tree_item, _ in self.updateItems] ) for item, tree_item, options_widget in self.updateItems: self.filesView.setItemWidget(tree_item, 0, options_widget) # Add an update button if the file is updateable if item.state == OUTDATED: button = QToolButton( None, text="Update", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) self.progress.advance() self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0)) for column in range(1, 4): contents_hint = self.filesView.sizeHintForColumn(column) header_hint = self.filesView.header().sectionSizeHint(column) width = max(min(contents_hint, 400), header_hint) self.filesView.setColumnWidth(column, width) hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")] self.completer.setTokenList(hints) self.SearchUpdate() self.UpdateInfoLabel() self.toggleButtons() self.cancelButton.setEnabled(False) self.progress.setRange(0, 0) def buttonCheck(self, selected_items, state, button): for item in selected_items: if item.state != state: button.setEnabled(False) else: button.setEnabled(True) break def toggleButtons(self): selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] self.buttonCheck(selected_items, OUTDATED, self.updateButton) self.buttonCheck(selected_items, AVAILABLE, self.downloadButton) def HandleError(self, exception): if isinstance(exception, ConnectionError): self.warning(0, "Could not connect to server! Check your connection " "and try to reconnect.") self.SetFilesList({}) self.retryButton.show() else: sys.excepthook(type(exception), exception, None) self.progress.setRange(0, 0) self.setEnabled(True) def UpdateInfoLabel(self): local = [item for item, tree_item, _ in self.updateItems if item.state != AVAILABLE and not tree_item.isHidden()] size = sum(float(item.size) for item in local) onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] sizeOnServer = sum(float(item.size) for item in onServer) text = ("%i items, %s (on server: %i items, %s)" % (len(local), sizeof_fmt(size), len(onServer), sizeof_fmt(sizeOnServer))) self.infoLabel.setText(text) def UpdateAll(self): self.warning(0) for item, tree_item, _ in self.updateItems: if item.state == OUTDATED and not tree_item.isHidden(): self.SubmitDownloadTask(item.domain, item.filename) def DownloadFiltered(self): # TODO: submit items in the order shown. for item, tree_item, _ in self.updateItems: if not tree_item.isHidden() and item.state in \ [AVAILABLE, OUTDATED]: self.SubmitDownloadTask(item.domain, item.filename) def SearchUpdate(self, searchString=None): strings = str(self.lineEditFilter.text()).split() for item, tree_item, _ in self.updateItems: hide = not all(UpdateItem_match(item, string) for string in strings) tree_item.setHidden(hide) self.UpdateInfoLabel() self.toggleButtons() def SubmitDownloadTask(self, domain, filename): """ Submit the (domain, filename) to be downloaded/updated. """ self.cancelButton.setEnabled(True) index = self.updateItemIndex(domain, filename) _, tree_item, opt_widget = self.updateItems[index] sf = LocalFiles(serverfiles.PATH, serverfiles.ServerFiles()) task = DownloadTask(domain, filename, sf) self.progress.adjustRange(0, 100) pb = ItemProgressBar(self.filesView) pb.setRange(0, 100) pb.setTextVisible(False) task.advanced.connect(pb.advance) task.advanced.connect(self.progress.advance) task.finished.connect(pb.hide) task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection) task.exception.connect(self.onDownloadError, Qt.QueuedConnection) self.filesView.setItemWidget(tree_item, 2, pb) # Clear the text so it does not show behind the progress bar. tree_item.setData(2, Qt.DisplayRole, "") pb.show() # Disable the options widget opt_widget.setEnabled(False) self._tasks.append(task) self.executor.submit(task) def EndDownloadTask(self, task): future = task.future() index = self.updateItemIndex(task.domain, task.filename) item, tree_item, opt_widget = self.updateItems[index] self.filesView.removeItemWidget(tree_item, 2) opt_widget.setEnabled(True) if future.cancelled(): # Restore the previous state tree_item.setUpdateItem(item) opt_widget.setState(item.state) elif future.exception(): tree_item.setUpdateItem(item) opt_widget.setState(item.state) # Show the exception string in the size column. self.warning(0, "Error while downloading. Check your connection " "and retry.") # recreate button for download button = QToolButton( None, text="Retry", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) else: # get the new updated info dict and replace the the old item self.warning(0) info = serverfiles.info(item.domain, item.filename) new_item = update_item_from_info(item.domain, item.filename, info, info) self.updateItems[index] = (new_item, tree_item, opt_widget) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.UpdateInfoLabel() def SubmitRemoveTask(self, domain, filename): serverfiles.LOCALFILES.remove(domain, filename) index = self.updateItemIndex(domain, filename) item, tree_item, opt_widget = self.updateItems[index] if item.info_server: new_item = item._replace(state=AVAILABLE, local=None, info_local=None) else: new_item = item._replace(local=None, info_local=None) # Disable the options widget. No more actions can be performed # for the item. opt_widget.setEnabled(False) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.updateItems[index] = (new_item, tree_item, opt_widget) self.UpdateInfoLabel() def Cancel(self): """ Cancel all pending update/download tasks (that have not yet started). """ for task in self._tasks: task.future().cancel() def onDeleteWidget(self): self.Cancel() self.executor.shutdown(wait=False) OWWidget.onDeleteWidget(self) def onDownloadFinished(self): # on download completed/canceled/error assert QThread.currentThread() is self.thread() for task in list(self._tasks): future = task.future() if future.done(): self.EndDownloadTask(task) self._tasks.remove(task) if not self._tasks: # Clear/reset the overall progress self.progress.setRange(0, 0) self.cancelButton.setEnabled(False) def onDownloadError(self, exc_info): sys.excepthook(*exc_info) self.warning(0, "Error while downloading. Check your connection and " "retry.") def updateItemIndex(self, domain, filename): for i, (item, _, _) in enumerate(self.updateItems): if item.domain == domain and item.filename == filename: return i raise ValueError("%r, %r not in update list" % (domain, filename)) def _updateProgress(self, *args): rmin, rmax = self.progress.range() if rmin != rmax: if not self._haveProgress: self._haveProgress = True self.progressBarInit() self.progressBarSet(self.progress.ratioCompleted() * 100, processEvents=None) if rmin == rmax: self._haveProgress = False self.progressBarFinished()
def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox(box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox(box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=[ "Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference" ], box="Reference", callback=self.updateAnnotations) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox(fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit(self, placeholderText="Filter ...") self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, ) self.annotationsChartView.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.annotationsChartView) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView) self.annotationsChartView.header().installEventFilter( contextEventFilter) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded([(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")]) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor(parent=self, threadPool=QThreadPool(self)) self._executor.submit(task)
def __init__(self, parent=None): super().__init__(self, parent) self.clusterDataset = None self.referenceDataset = None self.ontology = None self.annotations = None self.loadedAnnotationCode = "---" self.treeStructRootKey = None self.probFunctions = [stats.Binomial(), stats.Hypergeometric()] self.selectedTerms = [] self.selectionChanging = 0 self.__state = OWGOEnrichmentAnalysis.Initializing self.annotationCodes = [] ############# ## GUI ############# self.tabs = gui.tabWidget(self.controlArea) ## Input tab self.inputTab = gui.createTabPage(self.tabs, "Input") box = gui.widgetBox(self.inputTab, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") gui.button(box, self, "Ontology/Annotation Info", callback=self.ShowInfo, tooltip="Show information on loaded ontology and annotations") box = gui.widgetBox(self.inputTab, "Organism") self.annotationComboBox = gui.comboBox( box, self, "annotationIndex", items=self.annotationCodes, callback=self._updateEnrichment, tooltip="Select organism") genebox = gui.widgetBox(self.inputTab, "Gene Names") self.geneAttrIndexCombo = gui.comboBox( genebox, self, "geneAttrIndex", callback=self._updateEnrichment, tooltip="Use this attribute to extract gene names from input data") self.geneAttrIndexCombo.setDisabled(self.useAttrNames) cb = gui.checkBox(genebox, self, "useAttrNames", "Use column names", tooltip="Use column names for gene names", callback=self._updateEnrichment) cb.toggled[bool].connect(self.geneAttrIndexCombo.setDisabled) gui.button(genebox, self, "Gene matcher settings", callback=self.UpdateGeneMatcher, tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.inputTab, self, "useReferenceDataset", ["Entire genome", "Reference set (input)"], tooltips=["Use entire genome for reference", "Use genes from Referece Examples input signal as reference"], box="Reference", callback=self._updateEnrichment) self.referenceRadioBox.buttons[1].setDisabled(True) gui.radioButtonsInBox( self.inputTab, self, "aspectIndex", ["Biological process", "Cellular component", "Molecular function"], box="Aspect", callback=self._updateEnrichment) ## Filter tab self.filterTab = gui.createTabPage(self.tabs, "Filter") box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes") gui.checkBox(box, self, "filterByNumOfInstances", "Genes", callback=self.FilterAndDisplayGraph, tooltip="Filter by number of input genes mapped to a term") ibox = gui.indentedBox(box) gui.spin(ibox, self, 'minNumOfInstances', 1, 100, step=1, label='#:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Min. number of input genes mapped to a term") gui.checkBox(box, self, "filterByPValue_nofdr", "p-value", callback=self.FilterAndDisplayGraph, tooltip="Filter by term p-value") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") #use filterByPValue for FDR, as it was the default in prior versions gui.checkBox(box, self, "filterByPValue", "FDR", callback=self.FilterAndDisplayGraph, tooltip="Filter by term FDR") gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1, step=1e-8, label='p:', labelWidth=15, callback=self.FilterAndDisplayGraph, callbackOnReturn=True, tooltip="Max term p-value") box = gui.widgetBox(box, "Significance test") gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"], tooltips=["Use binomial distribution test", "Use hypergeometric distribution test"], callback=self._updateEnrichment) box = gui.widgetBox(self.filterTab, "Evidence codes in annotation", addSpace=True) self.evidenceCheckBoxDict = {} for etype in go.evidenceTypesOrdered: ecb = QCheckBox( etype, toolTip=go.evidenceTypes[etype], checked=self.useEvidenceType[etype]) ecb.toggled.connect(self.__on_evidenceChanged) box.layout().addWidget(ecb) self.evidenceCheckBoxDict[etype] = ecb ## Select tab self.selectTab = gui.createTabPage(self.tabs, "Select") box = gui.radioButtonsInBox( self.selectTab, self, "selectionDirectAnnotation", ["Directly or Indirectly", "Directly"], box="Annotated genes", callback=self.ExampleSelection) box = gui.widgetBox(self.selectTab, "Output", addSpace=True) gui.radioButtonsInBox( box, self, "selectionDisjoint", btnLabels=["All selected genes", "Term-specific genes", "Common term genes"], tooltips=["Outputs genes annotated to all selected GO terms", "Outputs genes that appear in only one of selected GO terms", "Outputs genes common to all selected GO terms"], callback=[self.ExampleSelection, self.UpdateAddClassButton]) self.addClassCB = gui.checkBox( box, self, "selectionAddTermAsClass", "Add GO Term as class", callback=self.ExampleSelection) # ListView for DAG, and table for significant GOIDs self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value', 'FDR', 'Genes', 'Enrichment'] self.splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(self.splitter) # list view self.listView = GOTreeWidget(self.splitter) self.listView.setSelectionMode(QTreeView.ExtendedSelection) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(len(self.DAGcolumns)) self.listView.setHeaderLabels(self.DAGcolumns) self.listView.header().setSectionsClickable(True) self.listView.header().setSortIndicatorShown(True) self.listView.setSortingEnabled(True) self.listView.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.listView.setRootIsDecorated(True) self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged) # table of significant GO terms self.sigTerms = QTreeWidget(self.splitter) self.sigTerms.setColumnCount(len(self.DAGcolumns)) self.sigTerms.setHeaderLabels(self.DAGcolumns) self.sigTerms.setSortingEnabled(True) self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection) self.sigTerms.setItemDelegateForColumn( 6, EnrichmentColumnItemDelegate(self)) self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged) self.sigTableTermsSorted = [] self.graph = {} self.inputTab.layout().addStretch(1) self.filterTab.layout().addStretch(1) self.selectTab.layout().addStretch(1) self.setBlocking(True) self._executor = ThreadExecutor() self._init = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), ("GO", "taxonomy.pickle")] ) self._init.finished.connect(self.__initialize_finish) self._executor.submit(self._init)
class OWdictyExpress(OWWidget): name = "dictyExpress" description = "Time-course gene expression data" icon = "../widgets/icons/OWdictyExpress.png" want_main_area = True priority = 3 class Inputs: pass class Outputs: etc_data = Output("Data", Table) class Error(OWWidget.Error): unreachable_host = Msg('Host not reachable') invalid_credentials = Msg('Invalid credentials') username = settings.Setting('') # password = settings.Setting('') gene_as_attr_name = settings.Setting(0) selected_item = settings.Setting(None, schema_only=True) auto_commit = settings.Setting(False, schema_only=True) def __init__(self): super().__init__() self.res = None self.organism = '44689' self.server = 'https://dictyexpress.research.bcm.edu' self.headerLabels = [x[1] for x in Labels] self.searchString = "" self.items = [] self.progress_bar = None # threads self.threadpool = QThreadPool() # Login Section box = gui.widgetBox(self.controlArea, 'Login') self.namefield = gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Output", addSpace=True) gui.radioButtonsInBox(box, self, "gene_as_attr_name", ["Genes in rows", "Genes in columns"], callback=self.invalidate) self.controlArea.layout().addWidget(h_line()) self.refresh_button = gui.button(self.controlArea, self, "Refresh", callback=self.refresh) self.handle_cache_button(True) gui.rubber(self.controlArea) self.commit_button = gui.auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) # Experiment Section label = QLabel("Available projects:") my_font = QFont() my_font.setBold(True) label.setFont(my_font) self.mainArea.layout().addWidget(label) self.mainArea.layout().addWidget(h_line()) self.filter = gui.lineEdit(self.mainArea, self, "searchString", "Filter:", callbackOnType=True, callback=self.search_update) self.experimentsWidget = QTreeWidget(alternatingRowColors=True, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.selectionModel().selectionChanged.connect( self.on_selection_changed) self.experimentsWidget.setHeaderLabels(self.headerLabels) self.mainArea.layout().addWidget(self.experimentsWidget) self.auth_set() self.connect() self.sizeHint() def sizeHint(self): return QSize(1400, 680) def auth_set(self): self.passfield.setDisabled(not self.username) def auth_changed(self): self.auth_set() self.connect() def refresh(self): self.reset() self.load_experiments() def reset(self): self.experimentsWidget.clear() # clear QTreeWidget self.items = [] # self.lastSelected = None self.searchString = "" def search_update(self): parts = self.searchString.split() for item in self.items: item.setHidden(not all(s in item for s in parts)) def progress_advance(self): # GUI should be updated in main thread. That's why we are calling advance method here assert threading.current_thread() == threading.main_thread() if self.progress_bar: self.progress_bar.advance() def handle_error(self, ex): self.progress_bar.finish() self.setStatusMessage('') if isinstance(ex, ConnectionError) or isinstance(ex, ValueError): self.Error.unreachable_host() print(ex) def load_experiments_result(self, experiments): self.load_tree_items(experiments) self.progress_bar.finish() self.setStatusMessage('') def connect(self): self.res = None self.Error.clear() self.reset() self.handle_cache_button(False) user, password = resolwe.DEFAULT_EMAIL, resolwe.DEFAULT_PASSWD if self.username or self.password: user, password = self.username, self.password try: self.res = resolwe.connect(user, password, self.server, 'genesis') except resolwe.ResolweAuthException: self.Error.invalid_credentials() else: self.load_experiments() self.handle_cache_button(True) def load_experiments(self): if self.res: # init progress bar self.progress_bar = gui.ProgressBar(self, iterations=2) # status message self.setStatusMessage('downloading experiments') worker = Worker(self.res.fetch_etc_objects, progress_callback=True) worker.signals.progress.connect(self.progress_advance) worker.signals.result.connect(self.load_experiments_result) worker.signals.error.connect(self.handle_error) # move download process to worker thread self.threadpool.start(worker) def load_tree_items(self, list_of_exp): self.items = [ CustomTreeItem(self.experimentsWidget, item) for item in list_of_exp ] for i in range(len(self.headerLabels)): self.experimentsWidget.resizeColumnToContents(i) self.set_cached_indicator() self.set_selected() def set_selected(self): for item in self.items: if self.selected_item and item.gen_data_id == self.selected_item: self.experimentsWidget.setCurrentItem(item) def on_selection_changed(self): self.invalidate() def invalidate(self): self.commit() def handle_cache_button(self, handle): self.refresh_button.setEnabled(handle) def send_to_output(self, result): self.progress_bar.finish() self.setStatusMessage('') etc_json, table_name = result # convert to table data = etc_to_table(etc_json, bool(self.gene_as_attr_name)) # set table name data.name = table_name # match genes gene_matcher = GeneMatcher(str(self.organism)) if not bool(self.gene_as_attr_name): if 'Gene' in data.domain: gene_column = data.domain['Gene'] gene_names = data.get_column_view(gene_column)[0] gene_matcher.genes = gene_names domain_ids = Domain([], metas=[StringVariable(ENTREZ_ID)]) data_ids = [[str(gene.gene_id) if gene.gene_id else '?'] for gene in gene_matcher.genes] table_ids = Table(domain_ids, data_ids) data = Table.concatenate([data, table_ids]) data.attributes[GENE_ID_COLUMN] = ENTREZ_ID else: gene_matcher.match_table_attributes(data) data.attributes[GENE_ID_ATTRIBUTE] = ENTREZ_ID # add table attributes data.attributes[TAX_ID] = str(self.organism) data.attributes[GENE_AS_ATTRIBUTE_NAME] = bool(self.gene_as_attr_name) # reset cache indicators self.set_cached_indicator() # send data to the output signal self.Outputs.etc_data.send(data) def commit(self): self.Error.clear() selected_item = self.experimentsWidget.currentItem( ) # get selected TreeItem self.selected_item = selected_item.gen_data_id if selected_item: # init progress bar self.progress_bar = gui.ProgressBar(self, iterations=1) # status message self.setStatusMessage('downloading experiment data') worker = Worker( self.res.download_etc_data, selected_item.gen_data_id, table_name=selected_item.data_name, progress_callback=True, ) worker.signals.progress.connect(self.progress_advance) worker.signals.result.connect(self.send_to_output) worker.signals.error.connect(self.handle_error) # move download process to worker thread self.threadpool.start(worker) def set_cached_indicator(self): cached = self.res.get_cached_ids() for item in self.items: if item.gen_data_id in cached: item.setData(0, Qt.DisplayRole, " ") else: item.setData(0, Qt.DisplayRole, "")
class OWWordEnrichment(OWWidget): # Basic widget info name = "Word Enrichment" description = "Word enrichment analysis for selected documents." icon = "icons/SetEnrichment.svg" priority = 600 # Input/output class Inputs: selected_data = Input("Selected Data", Table) data = Input("Data", Table) want_main_area = True class Error(OWWidget.Error): no_bow_features = Msg('No bag-of-words features!') no_words_overlap = Msg('No words overlap!') empty_selection = Msg('Selected data is empty!') all_selected = Msg('All examples can not be selected!') # Settings filter_by_p = Setting(False) filter_p_value = Setting(0.01) filter_by_fdr = Setting(True) filter_fdr_value = Setting(0.2) def __init__(self): super().__init__() # Init data self.data = None self.selected_data = None self.selected_data_transformed = None # used for transforming the 'selected data' into the 'data' domain self.words = [] self.p_values = [] self.fdr_values = [] # Info section fbox = gui.widgetBox(self.controlArea, "Info") self.info_all = gui.label(fbox, self, 'Cluster words:') self.info_sel = gui.label(fbox, self, 'Selected words:') self.info_fil = gui.label(fbox, self, 'After filtering:') # Filtering settings fbox = gui.widgetBox(self.controlArea, "Filter") hbox = gui.widgetBox(fbox, orientation=0) self.chb_p = gui.checkBox(hbox, self, "filter_by_p", "p-value", callback=self.filter_and_display, tooltip="Filter by word p-value") self.spin_p = gui.doubleSpin(hbox, self, 'filter_p_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, callbackOnReturn=True, tooltip="Max p-value for word") self.spin_p.setEnabled(self.filter_by_p) hbox = gui.widgetBox(fbox, orientation=0) self.chb_fdr = gui.checkBox(hbox, self, "filter_by_fdr", "FDR", callback=self.filter_and_display, tooltip="Filter by word FDR") self.spin_fdr = gui.doubleSpin(hbox, self, 'filter_fdr_value', 1e-4, 1, step=1e-4, labelWidth=15, callback=self.filter_and_display, callbackOnReturn=True, tooltip="Max p-value for word") self.spin_fdr.setEnabled(self.filter_by_fdr) gui.rubber(self.controlArea) # Word's list view self.cols = ['Word', 'p-value', 'FDR'] self.sig_words = QTreeWidget() self.sig_words.setColumnCount(len(self.cols)) self.sig_words.setHeaderLabels(self.cols) self.sig_words.setSortingEnabled(True) self.sig_words.setSelectionMode(QTreeView.ExtendedSelection) self.sig_words.sortByColumn(2, 0) # 0 is ascending order for i in range(len(self.cols)): self.sig_words.resizeColumnToContents(i) self.mainArea.layout().addWidget(self.sig_words) @Inputs.data def set_data(self, data=None): self.data = data @Inputs.selected_data def set_data_selected(self, data=None): self.selected_data = data def handleNewSignals(self): self.check_data() def get_bow_domain(self): domain = self.data.domain return Domain(attributes=[ a for a in domain.attributes if a.attributes.get('bow-feature', False) ], class_vars=domain.class_vars, metas=domain.metas, source=domain) def check_data(self): self.Error.clear() if isinstance(self.data, Table) and \ isinstance(self.selected_data, Table): if len(self.selected_data) == 0: self.Error.empty_selection() self.clear() return # keep only BoW features bow_domain = self.get_bow_domain() if len(bow_domain.attributes) == 0: self.Error.no_bow_features() self.clear() return self.data = Corpus.from_table(bow_domain, self.data) self.selected_data_transformed = Corpus.from_table( bow_domain, self.selected_data) if np_sp_sum(self.selected_data_transformed.X) == 0: self.Error.no_words_overlap() self.clear() elif len(self.data) == len(self.selected_data): self.Error.all_selected() self.clear() else: self.apply() else: self.clear() def clear(self): self.sig_words.clear() self.info_all.setText('Cluster words:') self.info_sel.setText('Selected words:') self.info_fil.setText('After filtering:') def filter_enabled(self, b): self.chb_p.setEnabled(b) self.chb_fdr.setEnabled(b) self.spin_p.setEnabled(b) self.spin_fdr.setEnabled(b) def filter_and_display(self): self.spin_p.setEnabled(self.filter_by_p) self.spin_fdr.setEnabled(self.filter_by_fdr) self.sig_words.clear() if self.selected_data_transformed is None: # do nothing when no Data return count = 0 if self.words: for word, pval, fval in zip(self.words, self.p_values, self.fdr_values): if (not self.filter_by_p or pval <= self.filter_p_value) and \ (not self.filter_by_fdr or fval <= self.filter_fdr_value): it = EATreeWidgetItem(word, pval, fval, self.sig_words) self.sig_words.addTopLevelItem(it) count += 1 for i in range(len(self.cols)): self.sig_words.resizeColumnToContents(i) self.info_all.setText('Cluster words: {}'.format( len(self.selected_data_transformed.domain.attributes))) self.info_sel.setText('Selected words: {}'.format( np.count_nonzero( np_sp_sum(self.selected_data_transformed.X, axis=0)))) if not self.filter_by_p and not self.filter_by_fdr: self.info_fil.setText('After filtering:') self.info_fil.setEnabled(False) else: self.info_fil.setEnabled(True) self.info_fil.setText('After filtering: {}'.format(count)) def progress(self, p): self.progressBarSet(p) def apply(self): self.clear() self.progressBarInit() self.filter_enabled(False) self.words = [ i.name for i in self.selected_data_transformed.domain.attributes ] self.p_values = hypergeom_p_values(self.data.X, self.selected_data_transformed.X, callback=self.progress) self.fdr_values = false_discovery_rate(self.p_values) self.filter_and_display() self.filter_enabled(True) self.progressBarFinished()
def __init__(self): super().__init__() self.res = None self.organism = '44689' self.server = 'https://dictyexpress.research.bcm.edu' self.headerLabels = [x[1] for x in Labels] self.searchString = "" self.items = [] self.progress_bar = None # threads self.threadpool = QThreadPool() # Login Section box = gui.widgetBox(self.controlArea, 'Login') self.namefield = gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Output", addSpace=True) gui.radioButtonsInBox(box, self, "gene_as_attr_name", ["Genes in rows", "Genes in columns"], callback=self.invalidate) self.controlArea.layout().addWidget(h_line()) self.refresh_button = gui.button(self.controlArea, self, "Refresh", callback=self.refresh) self.handle_cache_button(True) gui.rubber(self.controlArea) self.commit_button = gui.auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) # Experiment Section label = QLabel("Available projects:") my_font = QFont() my_font.setBold(True) label.setFont(my_font) self.mainArea.layout().addWidget(label) self.mainArea.layout().addWidget(h_line()) self.filter = gui.lineEdit(self.mainArea, self, "searchString", "Filter:", callbackOnType=True, callback=self.search_update) self.experimentsWidget = QTreeWidget(alternatingRowColors=True, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.selectionModel().selectionChanged.connect( self.on_selection_changed) self.experimentsWidget.setHeaderLabels(self.headerLabels) self.mainArea.layout().addWidget(self.experimentsWidget) self.auth_set() self.connect() self.sizeHint()