class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = [ "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames" ] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' )) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join(gds[key] for gds in self.gds for key in self.searchKeys) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.treeWidget.scrollTo(current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain([ attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items()) ], data.domain.class_var, data.domain.metas) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [ bytes(sp.saveState()) for sp in self.splitters ] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGEODatasets(OWWidget): settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.outputs = [("Expression Data", ExampleTable)] ## Settings self.selectedAnnotation = 0 self.includeIf = False self.minSamples = 3 self.autoCommit = False self.outputRows = 0 self.mergeSpots = True self.filterString = "" self.currentGds = None self.selectionChanged = False self.autoCommit = False self.gdsSelectionStates = {} self.splitterSettings = [ '\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', '\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' ] self.datasetNames = {} self.loadSettings() self.datasetName = "" ## GUI self.infoBox = OWGUI.widgetLabel( OWGUI.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n\n" ) box = OWGUI.widgetBox(self.controlArea, "Output", addSpace=True) OWGUI.radioButtonsInBox(box, self, "outputRows", ["Genes or spots", "Samples"], "Rows", callback=self.commitIf) OWGUI.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) OWGUI.separator(box) self.nameEdit = OWGUI.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") box = OWGUI.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = OWGUI.button(box, self, "Commit", callback=self.commit) cb = OWGUI.checkBox(box, self, "autoCommit", "Commit on any change") OWGUI.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) OWGUI.rubber(self.controlArea) self.filterLineEdit = OWGUIEx.lineEditHint( self.mainArea, self, "filterString", "Filter", caseSensitive=False, matchAnywhere=True, callback=self.filter, delimiters=" ") splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QAbstractItemView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, OWGUI.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = OWGUI.widgetBox(splitterH, "Description") self.infoGDS = OWGUI.widgetLabel(box, "") self.infoGDS.setWordWrap(True) OWGUI.rubber(box) box = OWGUI.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @pyqtSlot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = string.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.filterLineEdit.setItems(filter_items) if self.currentGds: gdss = [(i, proxy.data(proxy.index(i, 1), Qt.DisplayRole)) for i in range(proxy.rowCount())] current = [i for i, variant in gdss if variant.isValid() and str(variant.toString()) == self.currentGds["dataset_id"]] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo(current_index) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text(), errors="ignore") proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit() self.progressBarSet(10) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) def get_data(gds_id, report_genes, transpose, sample_type, title): gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarSet(50) try: data = self._datatask.result() except urllib2.URLError as error: self.error(0, "Error while connecting to the NCBI ftp server! %r" % error) self._datatask = None self.progressBarFinished() return self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for i, a in data.domain.get_metas().items(): out.append((a.name, ex[i].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[-1].value)) return out samples = set(samples) select = [1 if samples.issuperset(samplesinst(ex)) else 0 for ex in data] data = data.select(select) if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = orange.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.classVar ) domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = orange.ExampleTable(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) self.progressBarFinished() data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), QVariant(" "), Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [str(sp.saveState()) for sp in self.splitters] def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWDatabasesUpdate(OWWidget): name = "Databases Update" description = "Update local systems biology databases." icon = "../widgets/icons/Databases.svg" priority = 10 inputs = [] outputs = [] want_main_area = False def __init__(self, parent=None, signalManager=None, name="Databases update"): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels( ["", "Data Source", "Update", "Last Updated", "Size"]) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(1, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.SearchUpdate) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.UpdateAll, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.DownloadFiltered, tooltip="Download all filtered files shown" ) self.cancelButton = gui.button( box, self, "Cancel", callback=self.Cancel, tooltip="Cancel scheduled downloads/updates." ) self.retryButton = gui.button( box, self, "Reconnect", callback=self.RetrieveFilesList ) self.retryButton.hide() gui.rubber(box) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.updateItems = [] self.resize(800, 600) self.progress = ProgressState(self, maximum=3) self.progress.valueChanged.connect(self._updateProgress) self.progress.rangeChanged.connect(self._updateProgress) self.executor = ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) task = Task(self, function=self.RetrieveFilesList) task.exceptionReady.connect(self.HandleError) task.start() self._tasks = [] self._haveProgress = False def RetrieveFilesList(self): self.retryButton.hide() self.warning(0) self.progress.setRange(0, 3) task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance"))) task.resultReady.connect(self.SetFilesList) task.exceptionReady.connect(self.HandleError) self.executor.submit(task) self.setEnabled(False) def SetFilesList(self, serverInfo): """ Set the files to show. """ self.setEnabled(True) localInfo = serverfiles.allinfo() all_tags = set() self.filesView.clear() self.updateItems = [] for item in join_info_dict(localInfo, serverInfo): tree_item = UpdateTreeWidgetItem(item) options_widget = UpdateOptionsWidget(item.state) options_widget.item = item options_widget.installClicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) options_widget.removeClicked.connect( partial(self.SubmitRemoveTask, item.domain, item.filename) ) self.updateItems.append((item, tree_item, options_widget)) all_tags.update(item.tags) self.filesView.addTopLevelItems( [tree_item for _, tree_item, _ in self.updateItems] ) for item, tree_item, options_widget in self.updateItems: self.filesView.setItemWidget(tree_item, 0, options_widget) # Add an update button if the file is updateable if item.state == OUTDATED: button = QToolButton( None, text="Update", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) self.progress.advance() self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0)) for column in range(1, 4): contents_hint = self.filesView.sizeHintForColumn(column) header_hint = self.filesView.header().sectionSizeHint(column) width = max(min(contents_hint, 400), header_hint) self.filesView.setColumnWidth(column, width) hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")] self.completer.setTokenList(hints) self.SearchUpdate() self.UpdateInfoLabel() self.toggleButtons() self.cancelButton.setEnabled(False) self.progress.setRange(0, 0) def buttonCheck(self, selected_items, state, button): for item in selected_items: if item.state != state: button.setEnabled(False) else: button.setEnabled(True) break def toggleButtons(self): selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] self.buttonCheck(selected_items, OUTDATED, self.updateButton) self.buttonCheck(selected_items, AVAILABLE, self.downloadButton) def HandleError(self, exception): if isinstance(exception, ConnectionError): self.warning(0, "Could not connect to server! Check your connection " "and try to reconnect.") self.SetFilesList({}) self.retryButton.show() else: sys.excepthook(type(exception), exception, None) self.progress.setRange(0, 0) self.setEnabled(True) def UpdateInfoLabel(self): local = [item for item, tree_item, _ in self.updateItems if item.state != AVAILABLE and not tree_item.isHidden()] size = sum(float(item.size) for item in local) onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] sizeOnServer = sum(float(item.size) for item in onServer) text = ("%i items, %s (on server: %i items, %s)" % (len(local), sizeof_fmt(size), len(onServer), sizeof_fmt(sizeOnServer))) self.infoLabel.setText(text) def UpdateAll(self): self.warning(0) for item, tree_item, _ in self.updateItems: if item.state == OUTDATED and not tree_item.isHidden(): self.SubmitDownloadTask(item.domain, item.filename) def DownloadFiltered(self): # TODO: submit items in the order shown. for item, tree_item, _ in self.updateItems: if not tree_item.isHidden() and item.state in \ [AVAILABLE, OUTDATED]: self.SubmitDownloadTask(item.domain, item.filename) def SearchUpdate(self, searchString=None): strings = str(self.lineEditFilter.text()).split() for item, tree_item, _ in self.updateItems: hide = not all(UpdateItem_match(item, string) for string in strings) tree_item.setHidden(hide) self.UpdateInfoLabel() self.toggleButtons() def SubmitDownloadTask(self, domain, filename): """ Submit the (domain, filename) to be downloaded/updated. """ self.cancelButton.setEnabled(True) index = self.updateItemIndex(domain, filename) _, tree_item, opt_widget = self.updateItems[index] sf = LocalFiles(serverfiles.PATH, serverfiles.ServerFiles()) task = DownloadTask(domain, filename, sf) self.progress.adjustRange(0, 100) pb = ItemProgressBar(self.filesView) pb.setRange(0, 100) pb.setTextVisible(False) task.advanced.connect(pb.advance) task.advanced.connect(self.progress.advance) task.finished.connect(pb.hide) task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection) task.exception.connect(self.onDownloadError, Qt.QueuedConnection) self.filesView.setItemWidget(tree_item, 2, pb) # Clear the text so it does not show behind the progress bar. tree_item.setData(2, Qt.DisplayRole, "") pb.show() # Disable the options widget opt_widget.setEnabled(False) self._tasks.append(task) self.executor.submit(task) def EndDownloadTask(self, task): future = task.future() index = self.updateItemIndex(task.domain, task.filename) item, tree_item, opt_widget = self.updateItems[index] self.filesView.removeItemWidget(tree_item, 2) opt_widget.setEnabled(True) if future.cancelled(): # Restore the previous state tree_item.setUpdateItem(item) opt_widget.setState(item.state) elif future.exception(): tree_item.setUpdateItem(item) opt_widget.setState(item.state) # Show the exception string in the size column. self.warning(0, "Error while downloading. Check your connection " "and retry.") # recreate button for download button = QToolButton( None, text="Retry", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) else: # get the new updated info dict and replace the the old item self.warning(0) info = serverfiles.info(item.domain, item.filename) new_item = update_item_from_info(item.domain, item.filename, info, info) self.updateItems[index] = (new_item, tree_item, opt_widget) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.UpdateInfoLabel() def SubmitRemoveTask(self, domain, filename): serverfiles.LOCALFILES.remove(domain, filename) index = self.updateItemIndex(domain, filename) item, tree_item, opt_widget = self.updateItems[index] if item.info_server: new_item = item._replace(state=AVAILABLE, local=None, info_local=None) else: new_item = item._replace(local=None, info_local=None) # Disable the options widget. No more actions can be performed # for the item. opt_widget.setEnabled(False) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.updateItems[index] = (new_item, tree_item, opt_widget) self.UpdateInfoLabel() def Cancel(self): """ Cancel all pending update/download tasks (that have not yet started). """ for task in self._tasks: task.future().cancel() def onDeleteWidget(self): self.Cancel() self.executor.shutdown(wait=False) OWWidget.onDeleteWidget(self) def onDownloadFinished(self): # on download completed/canceled/error assert QThread.currentThread() is self.thread() for task in list(self._tasks): future = task.future() if future.done(): self.EndDownloadTask(task) self._tasks.remove(task) if not self._tasks: # Clear/reset the overall progress self.progress.setRange(0, 0) self.cancelButton.setEnabled(False) def onDownloadError(self, exc_info): sys.excepthook(*exc_info) self.warning(0, "Error while downloading. Check your connection and " "retry.") def updateItemIndex(self, domain, filename): for i, (item, _, _) in enumerate(self.updateItems): if item.domain == domain and item.filename == filename: return i raise ValueError("%r, %r not in update list" % (domain, filename)) def _updateProgress(self, *args): rmin, rmax = self.progress.range() if rmin != rmax: if not self._haveProgress: self._haveProgress = True self.progressBarInit() self.progressBarSet(self.progress.ratioCompleted() * 100, processEvents=None) if rmin == rmax: self._haveProgress = False self.progressBarFinished()
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting( (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01') ) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button( box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive ) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo( current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.class_var, data.domain.metas ) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append((subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format(desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWDatabasesUpdate(OWWidget): name = "Databases Update" description = "Update local systems biology databases." icon = "../widgets/icons/Databases.svg" priority = 10 inputs = [] outputs = [] want_main_area = False def __init__(self, parent=None, signalManager=None, name="Databases update", domains=None): OWWidget.__init__(self, parent, signalManager, name, wantMainArea=False) self.searchString = "" self.accessCode = "" self.domains = domains or DOMAINS self.serverFiles = serverfiles.ServerFiles() fbox = gui.widgetBox(self.controlArea, "Filter") self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive) self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate) self.lineEditFilter.setCompleter(self.completer) fbox.layout().addWidget(self.lineEditFilter) box = gui.widgetBox(self.controlArea, "Files") self.filesView = QTreeWidget(self) self.filesView.setHeaderLabels( ["", "Data Source", "Update", "Last Updated", "Size"]) self.filesView.setRootIsDecorated(False) self.filesView.setUniformRowHeights(True) self.filesView.setSelectionMode(QAbstractItemView.NoSelection) self.filesView.setSortingEnabled(True) self.filesView.sortItems(1, Qt.AscendingOrder) self.filesView.setItemDelegateForColumn( 0, UpdateOptionsItemDelegate(self.filesView)) self.filesView.model().layoutChanged.connect(self.SearchUpdate) box.layout().addWidget(self.filesView) box = gui.widgetBox(self.controlArea, orientation="horizontal") self.updateButton = gui.button( box, self, "Update all", callback=self.UpdateAll, tooltip="Update all updatable files", ) self.downloadButton = gui.button( box, self, "Download all", callback=self.DownloadFiltered, tooltip="Download all filtered files shown" ) self.cancelButton = gui.button( box, self, "Cancel", callback=self.Cancel, tooltip="Cancel scheduled downloads/updates." ) self.retryButton = gui.button( box, self, "Reconnect", callback=self.RetrieveFilesList ) self.retryButton.hide() gui.rubber(box) gui.lineEdit(box, self, "accessCode", "Access Code", orientation="horizontal", callback=self.RetrieveFilesList) self.warning(0) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.rubber(box) self.infoLabel = QLabel() self.infoLabel.setAlignment(Qt.AlignCenter) self.controlArea.layout().addWidget(self.infoLabel) self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) self.updateItems = [] self.resize(800, 600) self.progress = ProgressState(self, maximum=3) self.progress.valueChanged.connect(self._updateProgress) self.progress.rangeChanged.connect(self._updateProgress) self.executor = ThreadExecutor( threadPool=QThreadPool(maxThreadCount=2) ) task = Task(self, function=self.RetrieveFilesList) task.exceptionReady.connect(self.HandleError) task.start() self._tasks = [] self._haveProgress = False def RetrieveFilesList(self): self.retryButton.hide() self.warning(0) self.progress.setRange(0, 3) self.serverFiles = serverfiles.ServerFiles(access_code=self.accessCode) task = Task(function=partial(retrieveFilesList, self.serverFiles, self.domains, methodinvoke(self.progress, "advance"))) task.resultReady.connect(self.SetFilesList) task.exceptionReady.connect(self.HandleError) self.executor.submit(task) self.setEnabled(False) def SetFilesList(self, serverInfo): """ Set the files to show. """ self.setEnabled(True) domains = serverInfo.keys() if not domains: if self.domains: domains = self.domains else: domains = serverfiles.listdomains() localInfo = dict([(dom, serverfiles.allinfo(dom)) for dom in domains]) all_tags = set() self.filesView.clear() self.updateItems = [] for item in join_info_dict(localInfo, serverInfo): tree_item = UpdateTreeWidgetItem(item) options_widget = UpdateOptionsWidget(item.state) options_widget.item = item options_widget.installClicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) options_widget.removeClicked.connect( partial(self.SubmitRemoveTask, item.domain, item.filename) ) self.updateItems.append((item, tree_item, options_widget)) all_tags.update(item.tags) self.filesView.addTopLevelItems( [tree_item for _, tree_item, _ in self.updateItems] ) for item, tree_item, options_widget in self.updateItems: self.filesView.setItemWidget(tree_item, 0, options_widget) # Add an update button if the file is updateable if item.state == OUTDATED: button = QToolButton( None, text="Update", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) self.progress.advance() self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0)) for column in range(1, 4): contents_hint = self.filesView.sizeHintForColumn(column) header_hint = self.filesView.header().sectionSizeHint(column) width = max(min(contents_hint, 400), header_hint) self.filesView.setColumnWidth(column, width) hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")] self.completer.setTokenList(hints) self.SearchUpdate() self.UpdateInfoLabel() self.toggleButtons() self.cancelButton.setEnabled(False) self.progress.setRange(0, 0) def buttonCheck(self, selected_items, state, button): for item in selected_items: if item.state != state: button.setEnabled(False) else: button.setEnabled(True) break def toggleButtons(self): selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] self.buttonCheck(selected_items, OUTDATED, self.updateButton) self.buttonCheck(selected_items, AVAILABLE, self.downloadButton) def HandleError(self, exception): if isinstance(exception, ConnectionError): self.warning(0, "Could not connect to server! Check your connection " "and try to reconnect.") self.SetFilesList({}) self.retryButton.show() else: sys.excepthook(type(exception), exception.args, None) self.progress.setRange(0, 0) self.setEnabled(True) def UpdateInfoLabel(self): local = [item for item, tree_item, _ in self.updateItems if item.state != AVAILABLE and not tree_item.isHidden()] size = sum(float(item.size) for item in local) onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()] sizeOnServer = sum(float(item.size) for item in onServer) text = ("%i items, %s (on server: %i items, %s)" % (len(local), sizeof_fmt(size), len(onServer), sizeof_fmt(sizeOnServer))) self.infoLabel.setText(text) def UpdateAll(self): self.warning(0) for item, tree_item, _ in self.updateItems: if item.state == OUTDATED and not tree_item.isHidden(): self.SubmitDownloadTask(item.domain, item.filename) def DownloadFiltered(self): # TODO: submit items in the order shown. for item, tree_item, _ in self.updateItems: if not tree_item.isHidden() and item.state in \ [AVAILABLE, OUTDATED]: self.SubmitDownloadTask(item.domain, item.filename) def SearchUpdate(self, searchString=None): strings = str(self.lineEditFilter.text()).split() for item, tree_item, _ in self.updateItems: hide = not all(UpdateItem_match(item, string) for string in strings) tree_item.setHidden(hide) self.UpdateInfoLabel() self.toggleButtons() def SubmitDownloadTask(self, domain, filename): """ Submit the (domain, filename) to be downloaded/updated. """ self.cancelButton.setEnabled(True) index = self.updateItemIndex(domain, filename) _, tree_item, opt_widget = self.updateItems[index] if self.accessCode: sf = serverfiles.ServerFiles(access_code=self.accessCode) else: sf = serverfiles.ServerFiles() task = DownloadTask(domain, filename, sf) self.progress.adjustRange(0, 100) pb = ItemProgressBar(self.filesView) pb.setRange(0, 100) pb.setTextVisible(False) task.advanced.connect(pb.advance) task.advanced.connect(self.progress.advance) task.finished.connect(pb.hide) task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection) task.exception.connect(self.onDownloadError, Qt.QueuedConnection) self.filesView.setItemWidget(tree_item, 2, pb) # Clear the text so it does not show behind the progress bar. tree_item.setData(2, Qt.DisplayRole, "") pb.show() # Disable the options widget opt_widget.setEnabled(False) self._tasks.append(task) self.executor.submit(task) def EndDownloadTask(self, task): future = task.future() index = self.updateItemIndex(task.domain, task.filename) item, tree_item, opt_widget = self.updateItems[index] self.filesView.removeItemWidget(tree_item, 2) opt_widget.setEnabled(True) if future.cancelled(): # Restore the previous state tree_item.setUpdateItem(item) opt_widget.setState(item.state) elif future.exception(): tree_item.setUpdateItem(item) opt_widget.setState(item.state) # Show the exception string in the size column. self.warning(0, "Error while downloading. Check your connection " "and retry.") # recreate button for download button = QToolButton( None, text="Retry", maximumWidth=120, minimumHeight=20, maximumHeight=20 ) if sys.platform == "darwin": button.setAttribute(Qt.WA_MacSmallSize) button.clicked.connect( partial(self.SubmitDownloadTask, item.domain, item.filename) ) self.filesView.setItemWidget(tree_item, 2, button) else: # get the new updated info dict and replace the the old item self.warning(0) info = serverfiles.info(item.domain, item.filename) new_item = update_item_from_info(item.domain, item.filename, info, info) self.updateItems[index] = (new_item, tree_item, opt_widget) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.UpdateInfoLabel() def SubmitRemoveTask(self, domain, filename): serverfiles.remove(domain, filename) index = self.updateItemIndex(domain, filename) item, tree_item, opt_widget = self.updateItems[index] if item.info_server: new_item = item._replace(state=AVAILABLE, local=None, info_local=None) else: new_item = item._replace(local=None, info_local=None) # Disable the options widget. No more actions can be performed # for the item. opt_widget.setEnabled(False) tree_item.setUpdateItem(new_item) opt_widget.setState(new_item.state) self.updateItems[index] = (new_item, tree_item, opt_widget) self.UpdateInfoLabel() def Cancel(self): """ Cancel all pending update/download tasks (that have not yet started). """ for task in self._tasks: task.future().cancel() def onDeleteWidget(self): self.Cancel() self.executor.shutdown(wait=False) OWWidget.onDeleteWidget(self) def onDownloadFinished(self): # on download completed/canceled/error assert QThread.currentThread() is self.thread() for task in list(self._tasks): future = task.future() if future.done(): self.EndDownloadTask(task) self._tasks.remove(task) if not self._tasks: # Clear/reset the overall progress self.progress.setRange(0, 0) self.cancelButton.setEnabled(False) def onDownloadError(self, exc_info): sys.excepthook(*exc_info) self.warning(0, "Error while downloading. Check your connection and " "retry.") def updateItemIndex(self, domain, filename): for i, (item, _, _) in enumerate(self.updateItems): if item.domain == domain and item.filename == filename: return i raise ValueError("%r, %r not in update list" % (domain, filename)) def _updateProgress(self, *args): rmin, rmax = self.progress.range() if rmin != rmax: if not self._haveProgress: self._haveProgress = True self.progressBarInit() self.progressBarSet(self.progress.ratioCompleted() * 100, processEvents=None) if rmin == rmax: self._haveProgress = False self.progressBarFinished()
class OWGEODatasets(OWWidget): settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.outputs = [("Expression Data", ExampleTable)] ## Settings self.selectedAnnotation = 0 self.includeIf = False self.minSamples = 3 self.autoCommit = False self.outputRows = 1 self.mergeSpots = True self.filterString = "" self.currentGds = None self.selectionChanged = False self.autoCommit = False self.gdsSelectionStates = {} self.splitterSettings = [ '\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', '\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' ] self.datasetNames = {} self.loadSettings() self.datasetName = "" ## GUI self.infoBox = OWGUI.widgetLabel( OWGUI.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n\n" ) box = OWGUI.widgetBox(self.controlArea, "Output", addSpace=True) OWGUI.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) OWGUI.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) OWGUI.separator(box) self.nameEdit = OWGUI.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") box = OWGUI.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = OWGUI.button(box, self, "Commit", callback=self.commit) cb = OWGUI.checkBox(box, self, "autoCommit", "Commit on any change") OWGUI.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) OWGUI.rubber(self.controlArea) self.filterLineEdit = OWGUIEx.lineEditHint( self.mainArea, self, "filterString", "Filter", caseSensitive=False, matchAnywhere=True, callback=self.filter, delimiters=" ") splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QAbstractItemView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, OWGUI.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = OWGUI.widgetBox(splitterH, "Description") self.infoGDS = OWGUI.widgetLabel(box, "") self.infoGDS.setWordWrap(True) OWGUI.rubber(box) box = OWGUI.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @pyqtSlot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = string.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.filterLineEdit.setItems(filter_items) if self.currentGds: gdss = [(i, proxy.data(proxy.index(i, 1), Qt.DisplayRole)) for i in range(proxy.rowCount())] current = [i for i, variant in gdss if variant.isValid() and str(variant.toString()) == self.currentGds["dataset_id"]] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo(current_index) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text(), errors="ignore") proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit() self.progressBarSet(10) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) def get_data(gds_id, report_genes, transpose, sample_type, title): gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarSet(50) try: data = self._datatask.result() except urllib2.URLError as error: self.error(0, "Error while connecting to the NCBI ftp server! %r" % error) self._datatask = None self.progressBarFinished() return self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for i, a in data.domain.get_metas().items(): out.append((a.name, ex[i].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[-1].value)) return out samples = set(samples) select = [1 if samples.issuperset(samplesinst(ex)) else 0 for ex in data] data = data.select(select) if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = orange.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.classVar ) domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = orange.ExampleTable(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) self.progressBarFinished() data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), QVariant(" "), Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [str(sp.saveState()) for sp in self.splitters] def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGeneNetwork(OWWidget.OWWidget): settingsList = [ "taxid", "use_attr_names", "network_source", "include_neighborhood", "min_score" ] contextHandlers = { "": OWWidget.DomainContextHandler( "", ["taxid", "gene_var_index", "use_attr_names"]) } def __init__(self, parent=None, signalManager=None, title="Gene Network"): super(OWGeneNetwork, self).__init__(parent, signalManager, title, wantMainArea=False, resizingEnabled=False) self.taxid = "9606" self.gene_var_index = -1 self.use_attr_names = False self.network_source = 1 self.include_neighborhood = True self.autocommit = False self.min_score = 0.9 self.loadSettings() self.taxids = taxonomy.common_taxids() self.current_taxid_index = self.taxids.index(self.taxid) self.data = None self.geneinfo = None self.nettask = None self._invalidated = False box = OWGUI.widgetBox(self.controlArea, "Info") self.info = OWGUI.widgetLabel(box, "No data on input\n") box = OWGUI.widgetBox(self.controlArea, "Organism") self.organism_cb = OWGUI.comboBox(box, self, "current_taxid_index", items=map(taxonomy.name, self.taxids), callback=self._update_organism) box = OWGUI.widgetBox(self.controlArea, "Genes") self.genes_cb = OWGUI.comboBox(box, self, "gene_var_index", callback=self._update_query_genes) self.varmodel = OWItemModels.VariableListModel() self.genes_cb.setModel(self.varmodel) OWGUI.checkBox(box, self, "use_attr_names", "Use attribute names", callback=self._update_query_genes) box = OWGUI.widgetBox(self.controlArea, "Network") OWGUI.comboBox(box, self, "network_source", items=[s.name for s in SOURCES], callback=self._on_source_db_changed) OWGUI.checkBox(box, self, "include_neighborhood", "Include immediate gene neighbors", callback=self.invalidate) self.score_spin = OWGUI.doubleSpin(box, self, "min_score", 0.0, 1.0, step=0.001, label="Minimal edge score", callback=self.invalidate) self.score_spin.setEnabled(SOURCES[self.network_source].score_filter) box = OWGUI.widgetBox(self.controlArea, "Commit") OWGUI.button(box, self, "Commit", callback=self.commit, default=True) self.executor = ThreadExecutor() def set_data(self, data): self.closeContext() self.data = data if data is not None: self.varmodel[:] = string_variables(data.domain) taxid = data_hints.get_hint(data, "taxid", default=self.taxid) if taxid in self.taxids: self.set_organism(self.taxids.index(taxid)) self.use_attr_names = data_hints.get_hint( data, "genesinrows", default=self.use_attr_names) if not (0 <= self.gene_var_index < len(self.varmodel)): self.gene_var_index = len(self.varmodel) - 1 self.openContext("", data) self.invalidate() self.commit() else: self.varmodel[:] = [] self.send("Network", None) def set_source_db(self, dbindex): self.network_source = dbindex self.invalidate() def set_organism(self, index): self.current_taxid_index = index self.taxid = self.taxids[index] self.invalidate() def set_gene_var(self, index): self.gene_var_index = index self.invalidate() def query_genes(self): if self.use_attr_names: if self.data is not None: return [var.name for var in self.data.domain.attributes] else: return [] elif self.gene_var_index >= 0: var = self.varmodel[self.gene_var_index] genes = [ str(inst[var]) for inst in self.data if not inst[var].isSpecial() ] return list(unique(genes)) else: return [] def invalidate(self): self._invalidated = True if self.nettask is not None: self.nettask.finished.disconnect(self._on_result_ready) self.nettask.future().cancel() self.nettask = None if self.autocommit: QTimer.singleShot(10, self._maybe_commit) @Slot() def _maybe_commit(self): if self._invalidated: self.commit() @Slot() def advance(self): self.progressBarValue = (self.progressBarValue + 1) % 100 @Slot(float) def set_progress(self, value): self.progressBarValue = value def commit(self): include_neighborhood = self.include_neighborhood query_genes = self.query_genes() source = SOURCES[self.network_source] if source.score_filter: min_score = self.min_score assert source.name == "STRING" min_score = min_score * 1000 else: min_score = None taxid = self.taxid progress = methodinvoke(self, "advance") if self.geneinfo is None: self.geneinfo = self.executor.submit(fetch_ncbi_geneinfo, taxid, progress) geneinfo_f = self.geneinfo taxmap = source.tax_mapping db_taxid = taxmap.get(taxid, taxid) if db_taxid is None: raise ValueError("invalid taxid for this network") def fetch_network(): geneinfo = geneinfo_f.result() ppidb = fetch_ppidb(source, db_taxid, progress) return get_gene_network(ppidb, geneinfo, db_taxid, query_genes, include_neighborhood=include_neighborhood, min_score=min_score, progress=methodinvoke( self, "set_progress", (float, ))) self.nettask = Task(function=fetch_network) self.nettask.finished.connect(self._on_result_ready) self.executor.submit(self.nettask) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._invalidated = False self._update_info() @Slot(object) def _on_result_ready(self, ): self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) net = self.nettask.result() self._update_info() self.send("Network", net) def _on_source_db_changed(self): source = SOURCES[self.network_source] self.score_spin.setEnabled(source.score_filter) self.invalidate() def _update_organism(self): self.taxid = self.taxids[self.current_taxid_index] if self.geneinfo is not None: self.geneinfo.cancel() self.geneinfo = None self.invalidate() def _update_query_genes(self): self.invalidate() def _update_info(self): if self.data is None: self.info.setText("No data on input\n") else: names = self.query_genes() lines = ["%i unique genes on input" % len(set(names))] if self.nettask is not None: if not self.nettask.future().done(): lines.append("Retrieving ...") else: net = self.nettask.result() lines.append("%i nodes %i edges" % (len(net.nodes()), len(net.edges()))) else: lines.append("") self.info.setText("\n".join(lines))
class OWGeneNetwork(OWWidget.OWWidget): settingsList = ["taxid", "use_attr_names", "network_source", "include_neighborhood", "min_score"] contextHandlers = { "": OWWidget.DomainContextHandler( "", ["taxid", "gene_var_index", "use_attr_names"] ) } def __init__(self, parent=None, signalManager=None, title="Gene Network"): super(OWGeneNetwork, self).__init__( parent, signalManager, title, wantMainArea=False, resizingEnabled=False ) self.taxid = "9606" self.gene_var_index = -1 self.use_attr_names = False self.network_source = 1 self.include_neighborhood = True self.autocommit = False self.min_score = 0.9 self.loadSettings() self.taxids = taxonomy.common_taxids() self.current_taxid_index = self.taxids.index(self.taxid) self.data = None self.geneinfo = None self.nettask = None self._invalidated = False box = OWGUI.widgetBox(self.controlArea, "Info") self.info = OWGUI.widgetLabel(box, "No data on input\n") box = OWGUI.widgetBox(self.controlArea, "Organism") self.organism_cb = OWGUI.comboBox( box, self, "current_taxid_index", items=map(taxonomy.name, self.taxids), callback=self._update_organism ) box = OWGUI.widgetBox(self.controlArea, "Genes") self.genes_cb = OWGUI.comboBox( box, self, "gene_var_index", callback=self._update_query_genes ) self.varmodel = OWItemModels.VariableListModel() self.genes_cb.setModel(self.varmodel) OWGUI.checkBox( box, self, "use_attr_names", "Use attribute names", callback=self._update_query_genes ) box = OWGUI.widgetBox(self.controlArea, "Network") OWGUI.comboBox( box, self, "network_source", items=[s.name for s in SOURCES], callback=self._on_source_db_changed ) OWGUI.checkBox( box, self, "include_neighborhood", "Include immediate gene neighbors", callback=self.invalidate ) self.score_spin = OWGUI.doubleSpin( box, self, "min_score", 0.0, 1.0, step=0.001, label="Minimal edge score", callback=self.invalidate ) self.score_spin.setEnabled(SOURCES[self.network_source].score_filter) box = OWGUI.widgetBox(self.controlArea, "Commit") OWGUI.button(box, self, "Commit", callback=self.commit, default=True) self.executor = ThreadExecutor() def set_data(self, data): self.closeContext() self.data = data if data is not None: self.varmodel[:] = string_variables(data.domain) taxid = data_hints.get_hint(data, "taxid", default=self.taxid) if taxid in self.taxids: self.set_organism(self.taxids.index(taxid)) self.use_attr_names = data_hints.get_hint( data, "genesinrows", default=self.use_attr_names ) if not (0 <= self.gene_var_index < len(self.varmodel)): self.gene_var_index = len(self.varmodel) - 1 self.openContext("", data) self.invalidate() self.commit() else: self.varmodel[:] = [] self.send("Network", None) def set_source_db(self, dbindex): self.network_source = dbindex self.invalidate() def set_organism(self, index): self.current_taxid_index = index self.taxid = self.taxids[index] self.invalidate() def set_gene_var(self, index): self.gene_var_index = index self.invalidate() def query_genes(self): if self.use_attr_names: if self.data is not None: return [var.name for var in self.data.domain.attributes] else: return [] elif self.gene_var_index >= 0: var = self.varmodel[self.gene_var_index] genes = [str(inst[var]) for inst in self.data if not inst[var].isSpecial()] return list(unique(genes)) else: return [] def invalidate(self): self._invalidated = True if self.nettask is not None: self.nettask.finished.disconnect(self._on_result_ready) self.nettask.future().cancel() self.nettask = None if self.autocommit: QTimer.singleShot(10, self._maybe_commit) @Slot() def _maybe_commit(self): if self._invalidated: self.commit() @Slot() def advance(self): self.progressBarValue = (self.progressBarValue + 1) % 100 @Slot(float) def set_progress(self, value): self.progressBarValue = value def commit(self): include_neighborhood = self.include_neighborhood query_genes = self.query_genes() source = SOURCES[self.network_source] if source.score_filter: min_score = self.min_score assert source.name == "STRING" min_score = min_score * 1000 else: min_score = None taxid = self.taxid progress = methodinvoke(self, "advance") if self.geneinfo is None: self.geneinfo = self.executor.submit( fetch_ncbi_geneinfo, taxid, progress ) geneinfo_f = self.geneinfo taxmap = source.tax_mapping db_taxid = taxmap.get(taxid, taxid) if db_taxid is None: raise ValueError("invalid taxid for this network") def fetch_network(): geneinfo = geneinfo_f.result() ppidb = fetch_ppidb(source, db_taxid, progress) return get_gene_network(ppidb, geneinfo, db_taxid, query_genes, include_neighborhood=include_neighborhood, min_score=min_score, progress=methodinvoke(self, "set_progress", (float,))) self.nettask = Task(function=fetch_network) self.nettask.finished.connect(self._on_result_ready) self.executor.submit(self.nettask) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._invalidated = False self._update_info() @Slot(object) def _on_result_ready(self,): self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) net = self.nettask.result() self._update_info() self.send("Network", net) def _on_source_db_changed(self): source = SOURCES[self.network_source] self.score_spin.setEnabled(source.score_filter) self.invalidate() def _update_organism(self): self.taxid = self.taxids[self.current_taxid_index] if self.geneinfo is not None: self.geneinfo.cancel() self.geneinfo = None self.invalidate() def _update_query_genes(self): self.invalidate() def _update_info(self): if self.data is None: self.info.setText("No data on input\n") else: names = self.query_genes() lines = ["%i unique genes on input" % len(set(names))] if self.nettask is not None: if not self.nettask.future().done(): lines.append("Retrieving ...") else: net = self.nettask.result() lines.append("%i nodes %i edges" % (len(net.nodes()), len(net.edges()))) else: lines.append("") self.info.setText("\n".join(lines))
class OWGeneInfo(OWWidget): settingsList = ["organismIndex", "geneAttr", "useAttr", "autoCommit", "taxid"] contextHandlers = { "": DomainContextHandler( "", ["organismIndex", "geneAttr", "useAttr", "useAltSource", "taxid"] ) } def __init__(self, parent=None, signalManager=None, name="Gene Info"): OWWidget.__init__(self, parent, signalManager, name) self.inputs = [("Examples", Orange.data.Table, self.setData)] self.outputs = [("Selected Examples", Orange.data.Table)] self.organismIndex = 0 self.taxid = None self.geneAttr = 0 self.useAttr = False self.autoCommit = False self.searchString = "" self.selectionChangedFlag = False self.useAltSource = 0 self.loadSettings() self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = OWGUI.widgetLabel( OWGUI.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n" ) self.organisms = None self.organismBox = OWGUI.widgetBox( self.controlArea, "Organism", addSpace=True) self.organismComboBox = OWGUI.comboBox( self.organismBox, self, "organismIndex", callback=self._onSelectedOrganismChanged, debuggingEnabled=0) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = OWGUI.checkBox(self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange, # debuggingEnabled=0, ) self.altSourceCheck.hide() box = OWGUI.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = OWGUI.comboBox( box, self, "geneAttr", "Gene atttibute", callback=self.updateInfoItems ) OWGUI.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems, disables=[(-1, self.geneAttrComboBox)]) self.geneAttrComboBox.setDisabled(bool(self.useAttr)) box = OWGUI.widgetBox(self.controlArea, "Commit", addSpace=True) b = OWGUI.button(box, self, "Commit", callback=self.commit) c = OWGUI.checkBox(box, self, "autoCommit", "Commit on change") OWGUI.setStopper(self, b, c, "selectionChangedFlag", callback=self.commit) # A label for dictyExpress link self.dictyExpressBox = OWGUI.widgetBox( self.controlArea, "Dicty Express") self.linkLabel = OWGUI.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.connect(self.linkLabel, SIGNAL("linkActivated(QString)"), self.onDictyExpressLink) self.dictyExpressBox.hide() OWGUI.rubber(self.controlArea) OWGUI.lineEdit(self.mainArea, self, "searchString", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSelectionMode( QAbstractItemView.ExtendedSelection) self.treeWidget.setItemDelegate( LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.setUniformRowHeights(True) self.treeWidget.viewport().setMouseTracking(True) self.treeWidget.setSortingEnabled(True) self.mainArea.layout().addWidget(self.treeWidget) box = OWGUI.widgetBox(self.mainArea, "", orientation="horizontal") OWGUI.button(box, self, "Select Filtered", callback=self.selectFiltered) OWGUI.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.resize(1000, 700) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.selectionUpdateInProgress = False self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial( taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()) ) ) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEventsFlags=None) def initialize(self): if self.__initialized: # Already initialized return self.progressBarFinished() self.organisms = sorted( set([name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo")] + gene.NCBIGeneInfo.essential_taxids()) ) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms] ) if self.taxid in self.organisms: self.organismIndex = self.organisms.index(self.taxid) self.infoLabel.setText("No data on input\n") self.__initialized = True self.initfuture = None self.setBlocking(False) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc.args, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): self.taxid = self.organisms[self.organismIndex] if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in (data.domain.variables + data.domain.getmetas().values()) if isinstance(attr, (Orange.feature.String, Orange.feature.Discrete))] self.geneAttrComboBox.addItems( [attr.name for attr in self.attributes] ) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint( self.data, "genesinrows", self.useAttr) self.openContext("", data) self.geneAttr = min(self.geneAttr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organismIndex = self.organisms.index(self.taxid) self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organismIndex, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.geneAttr] genes = [str(ex[attr]) for ex in self.data if not ex[attr].isSpecial()] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if not self.data: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.geneAttr] genes = [str(ex[attr]) for ex in self.data if not ex[attr].isSpecial()] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organismIndex, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == "352472") self.altSourceCheck.setVisible(org == "352472") self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task( function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ())) ) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.connect(self.treeWidget.selectionModel(), SIGNAL("selectionChanged(QItemSelection , QItemSelection )"), self.commitIf) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200) ) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], ["NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature"], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Selected Examples", None) def commitIf(self, *args): if self.autoCommit and not self.selectionUpdateInProgress: self.commit() else: self.selectionChangedFlag = True def commit(self): if not self.data: return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [attr for attr in self.data.domain.attributes if is_selected(attr)] domain = Orange.data.Domain(attrs, self.data.domain.classVar) domain.addmetas(self.data.domain.getmetas()) newdata = Orange.data.Table(domain, self.data) self.send("Selected Examples", newdata) elif self.attributes: attr = self.attributes[self.geneAttr] examples = [ex for ex in self.data if str(ex[attr]) in selectedIds] # Add gene info domain = Orange.data.Domain( self.data.domain, self.data.domain.classVar) domain.addmetas(self.data.domain.getmetas()) n_columns = model.columnCount() headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole) .toString()) for i in range(n_columns)] new_meta_attrs = [(Orange.feature.Descriptor.new_meta_id(), Orange.feature.String(name)) for name in headers] domain.addmetas(dict(new_meta_attrs)) examples = [Orange.data.Instance(domain, ex) for ex in examples] for ex in examples: for i, (_, meta) in enumerate(new_meta_attrs): index = model.index(gene2row[str(ex[attr])], i) ex[meta] = str( model.data(index, Qt.DisplayRole).toString() ) if examples: newdata = Orange.data.Table(examples) else: newdata = None self.send("Selected Examples", newdata) else: self.send("Selected Examples", None) def rowFiltered(self, row): searchStrings = self.searchString.lower().split() row = unicode(" ".join(self.cells[row]).lower(), errors="ignore") return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.searchString.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = unicode(" ".join(row).lower(), errors="ignore") self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def sendReport(self): from Orange.OrangeWidgets import OWReport genes, matched = self.matchedInfo if self.organisms: org = self.organisms[min(self.organismIndex, len(self.organisms) - 1)] org_name = taxonomy.name(org) else: org = None org_name = None if self.data is not None: self.reportRaw( "<p>Input: %i genes of which %i (%.1f%%) matched NCBI synonyms" "<br>" "Organism: %s" "<br>" "Filter: %s" "</p>" % (genes, matched, 100.0 * matched / genes, org_name, self.searchString) ) self.reportSubsection("Gene list") self.reportRaw(reportItemView(self.treeWidget)) else: self.reportRaw("<p>No input</p>") def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information( self, "No gene ids selected", "Please select some genes and try again." ) return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): OWWidget.onDeleteWidget(self) # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown()
class OWKEGGPathwayBrowser(OWWidget): settingsList = ["organismIndex", "geneAttrIndex", "autoCommit", "autoResize", "useReference", "useAttrNames", "showOrthology"] contextHandlers = { "": DomainContextHandler( "", [ContextField("organismIndex", DomainContextHandler.Required + DomainContextHandler.IncludeMetaAttributes), ContextField("geneAttrIndex", DomainContextHandler.Required + DomainContextHandler.IncludeMetaAttributes), ContextField("useAttrNames", DomainContextHandler.Required + DomainContextHandler.IncludeMetaAttributes)] ) } def __init__(self, parent=None, signalManager=None, name="KEGG Pathways"): OWWidget.__init__(self, parent, signalManager, name, wantGraph=True) self.inputs = [("Examples", Orange.data.Table, self.SetData), ("Reference", Orange.data.Table, self.SetRefData)] self.outputs = [("Selected Examples", Orange.data.Table), ("Unselected Examples", Orange.data.Table)] self.organismIndex = 0 self.geneAttrIndex = 0 self.autoCommit = False self.autoResize = True self.useReference = False self.useAttrNames = 0 self.showOrthology = True self.loadSettings() self.organismCodes = [] self._changedFlag = False self.controlArea.setMaximumWidth(250) box = OWGUI.widgetBox(self.controlArea, "Info") self.infoLabel = OWGUI.widgetLabel(box, "No data on input\n") # Organism selection. box = OWGUI.widgetBox(self.controlArea, "Organism") self.organismComboBox = OWGUI.comboBox( box, self, "organismIndex", items=[], callback=self.Update, addSpace=True, debuggingEnabled=0, tooltip="Select the organism of the input genes") # Selection of genes attribute box = OWGUI.widgetBox(self.controlArea, "Gene attribute") self.geneAttrCandidates = VariableListModel(parent=self) self.geneAttrCombo = OWGUI.comboBox( box, self, "geneAttrIndex", callback=self.Update) self.geneAttrCombo.setModel(self.geneAttrCandidates) OWGUI.checkBox(box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update) self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) OWGUI.separator(self.controlArea) OWGUI.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) OWGUI.separator(self.controlArea) OWGUI.checkBox(self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView) OWGUI.checkBox(self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform) box = OWGUI.widgetBox(self.controlArea, "Cache Control") OWGUI.button(box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.") OWGUI.separator(self.controlArea) box = OWGUI.widgetBox(self.controlArea, "Selection") cb = OWGUI.checkBox(box, self, "autoCommit", "Commit on update") button = OWGUI.button(box, self, "Commit", callback=self.Commit, default=True) OWGUI.setStopper(self, button, cb, "_changedFlag", self.Commit) OWGUI.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged ) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget(spliter) spliter.addWidget(self.listView) self.listView.setAllColumnsShowFocus(1) self.listView.setColumnCount(4) self.listView.setHeaderLabels(["Pathway", "P value", "Genes", "Reference"]) self.listView.setSelectionMode(QTreeWidget.SingleSelection) self.listView.setSortingEnabled(True) self.listView.setMaximumHeight(200) self.connect(self.listView, SIGNAL("itemSelectionChanged()"), self.UpdatePathwayView) self.connect(self.graphButton, SIGNAL("clicked()"), self.saveGraph) select = QAction( "Select All", self, shortcut=QKeySequence.SelectAll ) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.refData = None self.resize(800, 600) self.connect(self, SIGNAL("widgetStateChanged(QString, int, QString)"), self.onStateChange) self.has_new_data = False self.has_new_reference_set = False self._executor = ThreadExecutor() self.setEnabled(False) self.setBlocking(True) QTimer.singleShot(0, self._initialize) self.infoLabel.setText("Fetching organism definitions\n") def _initialize(self): # First try to import slumber to see if we can even use the # kegg module. try: import slumber except ImportError: QMessageBox.warning(self, "'slumber' library required.", '<p>Please install ' '<a href="http://pypi.python.org/pypi/slumber">slumber</a> ' 'library to use KEGG Pathways widget.</p>' ) self.infoLabel.setText( '<p>Please install ' '<a href="http://pypi.python.org/pypi/slumber">slumber</a> ' 'library to use KEGG Pathways widget.</p>' ) self.error(0, "Missing slumber/requests library") return progress = methodinvoke(self, "setProgress", (float,)) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = map(genome.org_code_to_entry_key, essential + common) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = Task(function=get_genome) task.finished.connect(self._initializeOrganisms) self.progressBarInit() self._executor.submit(task) def _initializeOrganisms(self): self.progressBarFinished() try: keys, genome = self._genomeTask.result() except Exception as err: self.error(0, str(err)) return entries = [genome[key] for key in keys] items = [entry.definition for entry in entries] codes = [entry.organism_code for entry in entries] self.organismCodes = codes self.organismComboBox.clear() self.organismComboBox.addItems(items) self.organismComboBox.setCurrentIndex(self.organismIndex) self.setEnabled(True) self.setBlocking(False) self.infoLabel.setText("No data on input\n") def Clear(self): """ Clear the widget state. """ self.queryGenes = [] self.referenceGenes = [] self.genes = {} self.uniqueGenesDict = {} self.revUniqueGenesDict = {} self.pathways = {} self.org = None self.geneAttrCandidates[:] = [] self.infoLabel.setText("No data on input\n") self.listView.clear() self.pathwayView.SetPathway(None) self.send("Selected Examples", None) self.send("Unselected Examples", None) def SetData(self, data=None): self.closeContext() self.data = data self.warning(0) self.error(0) self.information(0) if data is not None: vars = data.domain.variables + data.domain.getmetas().values() vars = [var for var in vars if isinstance(var, (Orange.feature.String, Orange.feature.Discrete))] self.geneAttrCandidates[:] = vars # Try to guess the gene name variable names_lower = [v.name.lower() for v in vars] scores = [(name == "gene", "gene" in name) for name in names_lower] imax, _ = max(enumerate(scores), key=itemgetter(1)) self.geneAttrIndex = imax taxid = data_hints.get_hint(data, "taxid", None) if taxid: try: code = kegg.from_taxid(taxid) self.organismIndex = self.organismCodes.index(code) except Exception as ex: print ex, taxid self.useAttrNames = data_hints.get_hint(data, "genesinrows", self.useAttrNames) self.openContext("", data) else: self.Clear() self.has_new_data = True def SetRefData(self, data=None): self.refData = data self.information(1) self.has_new_reference_set = True def handleNewSignals(self): if self.has_new_data or (self.has_new_reference_set and \ self.useReference): self.Update() self.has_new_data = False self.has_new_reference_set = False def UpdateListView(self): self.bestPValueItem = None self.listView.clear() if not self.data: return allPathways = self.org.pathways() allRefPathways = kegg.pathways("map") items = [] kegg_pathways = kegg.KEGGPathways() org_code = self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] if self.showOrthology: self.koOrthology = kegg.KEGGBrite("ko00001") self.listView.setRootIsDecorated(True) path_ids = set([s[-5:] for s in self.pathways.keys()]) def _walkCollect(koEntry): num = koEntry.title[:5] if koEntry.title else None if num in path_ids: return ([koEntry] + reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], [])) else: c = reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []) return c + (c and [koEntry] or []) allClasses = reduce(lambda li1, li2: li1 + li2, [_walkCollect(c) for c in self.koOrthology], []) def _walkCreate(koEntry, lvItem): item = QTreeWidgetItem(lvItem) id = "path:" + org_code + koEntry.title[:5] if koEntry.title[:5] in path_ids: p = kegg_pathways.get_entry(id) if p is None: # In case the genesets still have obsolete entries name = koEntry.title else: name = p.name genes, p_value, ref = self.pathways[id] item.setText(0, name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id if p is not None else None else: if id in allPathways: text = kegg_pathways.get_entry(id).name else: text = koEntry.title item.setText(0, text) if id in allPathways: item.pathway_id = id elif "path:map" + koEntry.title[:5] in allRefPathways: item.pathway_id = "path:map" + koEntry.title[:5] else: item.pathway_id = None for child in koEntry.entries: if child in allClasses: _walkCreate(child, item) for koEntry in self.koOrthology: if koEntry in allClasses: _walkCreate(koEntry, self.listView) self.listView.update() else: self.listView.setRootIsDecorated(False) pathways = self.pathways.items() pathways.sort(lambda a, b: cmp(a[1][1], b[1][1])) for id, (genes, p_value, ref) in pathways: item = QTreeWidgetItem(self.listView) item.setText(0, kegg_pathways.get_entry(id).name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id items.append(item) self.bestPValueItem = items and items[0] or None self.listView.expandAll() for i in range(4): self.listView.resizeColumnToContents(i) if self.bestPValueItem: index = self.listView.indexFromItem(self.bestPValueItem) self.listView.selectionModel().select( index, QItemSelectionModel.ClearAndSelect ) def UpdatePathwayView(self): items = self.listView.selectedItems() if len(items) > 0: item = items[0] else: item = None self.Commit() item = item or self.bestPValueItem if not item or not item.pathway_id: self.pathwayView.SetPathway(None) return def get_kgml_and_image(pathway_id): """Return an initialized KEGGPathway with pre-cached data""" p = kegg.KEGGPathway(pathway_id) p._get_kgml() # makes sure the kgml file is downloaded p._get_image_filename() # makes sure the image is downloaded return (pathway_id, p) self.setEnabled(False) self._pathwayTask = Task( function=lambda: get_kgml_and_image(item.pathway_id) ) self._pathwayTask.finished.connect(self._onPathwayTaskFinshed) self._executor.submit(self._pathwayTask) def _onPathwayTaskFinshed(self): self.setEnabled(True) pathway_id, self.pathway = self._pathwayTask.result() self.pathwayView.SetPathway( self.pathway, self.pathways.get(pathway_id, [[]])[0] ) def UpdatePathwayViewTransform(self): self.pathwayView.updateTransform() def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even alow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information(0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames \ and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information(1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance.") org_code = self.SelectedOrganismCode() def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_genes() # Map 'genes' and 'reference' sets to unique KEGG identifiers unique_genes, _, _ = org.get_unique_gene_ids(set(genes)) unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference)) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict((v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. # Ensure we are using the latest genesets # TODO: ?? Is updating the index enough? serverfiles.update(geneset.sfdomain, "index.pck") kegg_gs_collections = geneset.collections( (("KEGG", "pathways"), taxid) ) pathways = pathway_enrichment( kegg_gs_collections, unique_genes.keys(), unique_ref_genes.keys(), callback=progress ) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache( pathways.keys(), progress_callback=progress ) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = methodinvoke(self, "setProgress", (float,)) self._enrichTask = Task( function=lambda: run_enrichment(org_code, genes, reference, progress) ) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask) def _onEnrichTaskFinished(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished() try: pathways, org, unique_genes, unique_ref_genes = \ self._enrichTask.result() except Exception: raise self.org = org self.genes = unique_genes.keys() self.uniqueGenesDict = unique_genes self.revUniqueGenesDict = dict([(val, key) for key, val in self.uniqueGenesDict.items()]) self.referenceGenes = unique_ref_genes.keys() self.pathways = pathways if not self.pathways: self.warning(0, "No enriched pathways found.") else: self.warning(0) count = len(set(self.queryGenes)) self.infoLabel.setText( "%i unique gene names on input\n" "%i (%.1f%%) genes names matched" % (count, len(unique_genes), 100.0 * len(unique_genes) / count if count else 0.0) ) self.UpdateListView() @pyqtSlot(float) def setProgress(self, value): self.progressBarValue = value def GeneNamesFromData(self, data): """ Extract and return gene names from `data`. """ if self.useAttrNames: genes = [str(v.name).strip() for v in data.domain.attributes] elif self.geneAttrCandidates: index = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1) geneAttr = self.geneAttrCandidates[index] genes = [str(e[geneAttr]) for e in data if not e[geneAttr].isSpecial()] else: raise ValueError("No gene names in data.") return genes def SelectedOrganismCode(self): """ Return the selected organism code. """ return self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] def selectAll(self): """ Select all items in the pathway view. """ changed = False scene = self.pathwayView.scene() with disconnected(scene.selectionChanged, self._onSelectionChanged): for item in scene.items(): if item.flags() & QGraphicsItem.ItemIsSelectable and \ not item.isSelected(): item.setSelected(True) changed = True if changed: self._onSelectionChanged() def _onSelectionChanged(self): # Item selection in the pathwayView/scene has changed if self.autoCommit: self.Commit() else: self._changedFlag = True def Commit(self): if self.data: selectedItems = self.pathwayView.scene().selectedItems() selectedGenes = reduce(set.union, [item.marked_objects for item in selectedItems], set()) if self.useAttrNames: selectedVars = [self.data.domain[self.uniqueGenesDict[gene]] for gene in selectedGenes] newDomain = Orange.data.Domain(selectedVars, 0) data = Orange.data.Table(newDomain, self.data) self.send("Selected Examples", data) elif self.geneAttrCandidates: geneAttr = self.geneAttrCandidates[min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1)] selectedExamples = [] otherExamples = [] for ex in self.data: names = [self.revUniqueGenesDict.get(name, None) for name in split_and_strip(str(ex[geneAttr]), ",")] if any(name and name in selectedGenes for name in names): selectedExamples.append(ex) else: otherExamples.append(ex) if selectedExamples: selectedExamples = Orange.data.Table(selectedExamples) else: selectedExamples = None if otherExamples: otherExamples = Orange.data.Table(otherExamples) else: otherExamples = None self.send("Selected Examples", selectedExamples) self.send("Unselected Examples", otherExamples) else: self.send("Selected Examples", None) self.send("Unselected Examples", None) def ClearCache(self): from ..kegg import caching try: caching.clear_cache() except Exception, ex: QMessageBox.warning(self, "Cache clear", ex.args[0])