def list_all(**kwargs):
    """ Returns available gene sets from the server files repository.

    :param kwargs:
        * *organism* (``str``) -- Taxonomy id (NCBI taxonomy database)


    :rtype: :obj:`list` of (hierarchy, organism)


    Example
    --------
    The available gene set collection can be listed with:
        >>> list_all(organism='10090')

    """

    organism = kwargs.get("organism", None)

    all_available = {
        filename_parse(f_name)
        for domain, f_name in serverfiles.ServerFiles().listfiles(DOMAIN) + serverfiles.listfiles(DOMAIN)
    }

    if organism:
        return [hier for hier, org in all_available if org == organism]
    else:
        return all_available
示例#2
0
def listAvailable():
    taxids = taxonomy.common_taxids()
    essential = [(taxonomy.name(taxid), 'gene_association.{}'.format(taxid))
                 for taxid in taxids
                 if (DOMAIN, 'gene_association.{}'.format(taxid)
                     ) in serverfiles.ServerFiles().listfiles(DOMAIN)]
    return dict(essential)
def list_all(**kwargs):
    """ Returns available gene sets from the server files repository: a list of (hierarchy, organism)
    """
    organism = kwargs.get("organism", None)

    all_available = [
        filename_parse(f_name)
        for domain, f_name in serverfiles.ServerFiles().listfiles(DOMAIN)
    ]
    if organism:
        return [(hier, org) for hier, org in all_available if org == organism]
    else:
        return all_available
    def _load_data(self):
        self.Warning.using_local_files.clear()

        found_sources = {}
        try:
            found_sources.update(
                serverfiles.ServerFiles().allinfo(serverfiles_domain))
        except requests.exceptions.ConnectionError:
            found_sources.update(serverfiles.allinfo(serverfiles_domain))
            self.Warning.using_local_files()

        self.available_db_sources = {
            item.get('title').split(': ')[-1]: item
            for item in found_sources.values()
        }

        if self.available_db_sources:
            file_name = self.available_db_sources[
                self.selected_db_source]['filename']

            try:
                serverfiles.update(serverfiles_domain, file_name)
            except requests.exceptions.ConnectionError:
                # try to update file. Ignore network errors.
                pass

            try:
                file_path = serverfiles.localpath_download(
                    serverfiles_domain, file_name)
            except requests.exceptions.ConnectionError as err:
                # Unexpected error.
                raise err

            data = Table(file_path)
            # enforce order
            old_domain = data.domain
            new_domain = Domain(
                [],
                metas=[
                    old_domain['Organism'],
                    old_domain['Name'],
                    old_domain['Entrez ID'],
                    old_domain['Cell Type'],
                    old_domain['Function'],
                    old_domain['Reference'],
                    old_domain['URL'],
                ],
            )
            data = data.transform(new_domain)
            self.data = data
def retrieveFilesList(advance=lambda: None):
    """
    Retrieve and return serverfiles.allinfo for all domains.
    """
    import requests.exceptions
    advance()

    try:
        serverInfo = serverfiles.ServerFiles().allinfo()
    except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
        raise ConnectionError

    advance()
    return serverInfo
    def _load_data(self) -> None:
        """
        Collect available data sources (marker genes data sets).
        """
        self.Warning.using_local_files.clear()

        found_sources = {}
        try:
            found_sources.update(serverfiles.ServerFiles().allinfo(SERVER_FILES_DOMAIN))
        except requests.exceptions.ConnectionError:
            found_sources.update(serverfiles.allinfo(SERVER_FILES_DOMAIN))
            self.Warning.using_local_files()

        self.available_sources = {item.get('title').split(': ')[-1]: item for item in found_sources.values()}
def get_available_db_sources():
    found_sources = {}

    try:
        found_sources.update(
            serverfiles.ServerFiles().allinfo(serverfiles_domain))
    except ConnectionError:
        raise ConnectionError(
            'Can not connect to {}. Using only local files.'.format(
                serverfiles.server_url))
    finally:
        found_sources.update(serverfiles.allinfo(serverfiles_domain))
        return {
            item.get('title').split(': ')[-1]: item
            for item in found_sources.values()
        }
def evaluate_files_state(progress_callback):
    progress_callback.emit()
    files = []

    # fetch remote info
    try:
        server_info = serverfiles.ServerFiles().allinfo()
    except (Timeout, ConnectionError) as e:
        raise e
    progress_callback.emit()

    # fetch local info
    local_info = serverfiles.allinfo()

    all_info = set(local_info.keys()).union(server_info.keys())

    for domain, file_name in sorted(all_info):
        files.append(
            FileState(domain, file_name,
                      server_info.get((domain, file_name), None),
                      local_info.get((domain, file_name), None)))
    progress_callback.emit()
    return files
    def __init__(self, parent=None):
        super().__init__(self, parent)

        self.input_data = None
        self.ref_data = None
        self.ontology = None
        self.annotations = None
        self.loaded_annotation_code = None
        self.treeStructRootKey = None
        self.probFunctions = [statistics.Binomial(), statistics.Hypergeometric()]
        self.selectedTerms = []

        self.selectionChanging = 0
        self.__state = State.Ready
        self.__scheduletimer = QTimer(self, singleShot=True)
        self.__scheduletimer.timeout.connect(self.__update)

        #############
        # GUI
        #############
        self.tabs = gui.tabWidget(self.controlArea)
        # Input tab
        self.inputTab = gui.createTabPage(self.tabs, "Input")
        box = gui.widgetBox(self.inputTab, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.button(box, self, "Ontology/Annotation Info",
                   callback=self.ShowInfo,
                   tooltip="Show information on loaded ontology and annotations")

        self.referenceRadioBox = gui.radioButtonsInBox(
            self.inputTab, self, "useReferenceDataset",
            ["Entire genome", "Reference set (input)"],
            tooltips=["Use entire genome for reference",
                      "Use genes from Referece Examples input signal as reference"],
            box="Reference", callback=self.__invalidate)

        self.referenceRadioBox.buttons[1].setDisabled(True)
        gui.radioButtonsInBox(
            self.inputTab, self, "aspectIndex",
            ["Biological process", "Cellular component", "Molecular function"],
            box="Aspect", callback=self.__invalidate)

        # Filter tab
        self.filterTab = gui.createTabPage(self.tabs, "Filter")
        box = gui.widgetBox(self.filterTab, "Filter GO Term Nodes")
        gui.checkBox(box, self, "filterByNumOfInstances", "Genes",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by number of input genes mapped to a term")
        ibox = gui.indentedBox(box)
        gui.spin(ibox, self, 'minNumOfInstances', 1, 100,
                 step=1, label='#:', labelWidth=15,
                 callback=self.FilterAndDisplayGraph,
                 callbackOnReturn=True,
                 tooltip="Min. number of input genes mapped to a term")

        gui.checkBox(box, self, "filterByPValue_nofdr", "p-value",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term p-value")

        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue_nofdr', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        # use filterByPValue for FDR, as it was the default in prior versions
        gui.checkBox(box, self, "filterByPValue", "FDR",
                     callback=self.FilterAndDisplayGraph,
                     tooltip="Filter by term FDR")
        gui.doubleSpin(gui.indentedBox(box), self, 'maxPValue', 1e-8, 1,
                       step=1e-8,  label='p:', labelWidth=15,
                       callback=self.FilterAndDisplayGraph,
                       callbackOnReturn=True,
                       tooltip="Max term p-value")

        box = gui.widgetBox(box, "Significance test")

        gui.radioButtonsInBox(box, self, "probFunc", ["Binomial", "Hypergeometric"],
                              tooltips=["Use binomial distribution test",
                                        "Use hypergeometric distribution test"],
                              callback=self.__invalidate)  # TODO: only update the p values
        box = gui.widgetBox(self.filterTab, "Evidence codes in annotation",
                              addSpace=True)
        self.evidenceCheckBoxDict = {}
        for etype in go.evidenceTypesOrdered:
            ecb = QCheckBox(
                etype, toolTip=go.evidenceTypes[etype],
                checked=self.useEvidenceType[etype])
            ecb.toggled.connect(self.__on_evidenceChanged)
            box.layout().addWidget(ecb)
            self.evidenceCheckBoxDict[etype] = ecb

        # Select tab
        self.selectTab = gui.createTabPage(self.tabs, "Select")
        box = gui.radioButtonsInBox(
            self.selectTab, self, "selectionDirectAnnotation",
            ["Directly or Indirectly", "Directly"],
            box="Annotated genes",
            callback=self.ExampleSelection)

        box = gui.widgetBox(self.selectTab, "Output", addSpace=True)
        gui.radioButtonsInBox(
            box, self, "selectionDisjoint",
            btnLabels=["All selected genes",
                       "Term-specific genes",
                       "Common term genes"],
            tooltips=["Outputs genes annotated to all selected GO terms",
                      "Outputs genes that appear in only one of selected GO terms",
                      "Outputs genes common to all selected GO terms"],
            callback=self.ExampleSelection)

        # ListView for DAG, and table for significant GOIDs
        self.DAGcolumns = ['GO term', 'Cluster', 'Reference', 'p-value',
                           'FDR', 'Genes', 'Enrichment']

        self.splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(self.splitter)

        # list view
        self.listView = GOTreeWidget(self.splitter)
        self.listView.setSelectionMode(QTreeView.ExtendedSelection)
        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(len(self.DAGcolumns))
        self.listView.setHeaderLabels(self.DAGcolumns)

        self.listView.header().setSectionsClickable(True)
        self.listView.header().setSortIndicatorShown(True)
        self.listView.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder)
        self.listView.setSortingEnabled(True)
        self.listView.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))
        self.listView.setRootIsDecorated(True)

        self.listView.itemSelectionChanged.connect(self.ViewSelectionChanged)

        # table of significant GO terms
        self.sigTerms = QTreeWidget(self.splitter)
        self.sigTerms.setColumnCount(len(self.DAGcolumns))
        self.sigTerms.setHeaderLabels(self.DAGcolumns)
        self.sigTerms.setSortingEnabled(True)
        self.sigTerms.setSelectionMode(QTreeView.ExtendedSelection)
        self.sigTerms.header().setSortIndicator(self.DAGcolumns.index('p-value'), Qt.AscendingOrder)
        self.sigTerms.setItemDelegateForColumn(
            6, EnrichmentColumnItemDelegate(self))

        self.sigTerms.itemSelectionChanged.connect(self.TableSelectionChanged)

        self.sigTableTermsSorted = []
        self.graph = {}
        self.originalGraph = None

        self.inputTab.layout().addStretch(1)
        self.filterTab.layout().addStretch(1)
        self.selectTab.layout().addStretch(1)

        class AnnotationSlot(SimpleNamespace):
            taxid = ...  # type: str
            name = ...   # type: str
            filename = ...  # type:str

            @staticmethod
            def parse_tax_id(f_name):
                return f_name.split('.')[1]

        try:
            remote_files = serverfiles.ServerFiles().listfiles(DOMAIN)
        except (ConnectTimeout, RequestException, ConnectionError):
            # TODO: Warn user about failed connection to the remote server
            remote_files = []

        self.available_annotations = [
            AnnotationSlot(
                taxid=AnnotationSlot.parse_tax_id(annotation_file),
                name=taxonomy.common_taxid_to_name(AnnotationSlot.parse_tax_id(annotation_file)),
                filename=FILENAME_ANNOTATION.format(AnnotationSlot.parse_tax_id(annotation_file))
            )
            for _, annotation_file in set(remote_files + serverfiles.listfiles(DOMAIN))
            if annotation_file != FILENAME_ONTOLOGY

        ]
        self._executor = ThreadExecutor()