Python EUtils.EFetch примеры использования

Язык программирования: Python

Пространство имен/Пакет: bioservices

Класс/Тип: EUtils

Метод/Функция: EFetch

Примеров на hotexamples.com: 5

Python EUtils.EFetch - 5 примеров найдено. Это лучшие примеры Python кода для bioservices.EUtils.EFetch, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

EUtils(11)

EFetch(5)

ESearch(3)

ELink(1)

EPost(1)

ESummary(1)

taxonomy(1)

taxonomy_summary(1)

Пример #1

Показать файл

def download_fasta_and_genbank(identifier, tag, genbank=True, fasta=True):
    """

    :param identifier: valid identifier to retrieve from NCBI (genbank) and
        ENA (fasta)
    :param tag: name of the filename for the genbank and fasta files.
    """
    if genbank:
        from bioservices import EUtils
        eu = EUtils()
        data = eu.EFetch(db="nuccore",
                         id=identifier,
                         rettype="gbwithparts",
                         retmode="text")
        if isinstance(data, int) and data == 400:
            raise ValueError("{} not found on NCBI".format(identifier))
        else:
            with open("%s.gbk" % tag, "w") as fout:
                fout.write(data.decode())

    if fasta:
        from bioservices import ENA
        ena = ENA()
        data = ena.get_data(identifier, 'fasta')
        if isinstance(data, int) and data == 400:
            raise ValueError("{} not found on ENA".format(identifier))
        else:
            with open("%s.fa" % tag, "w") as fout:
                try:
                    # change in API in v1.7.8
                    fout.write(data)
                except:
                    fout.write(data.decode())

Пример #2

Показать файл

Файл: makeTree.py Проект: Mestalbet/PhyloClustering

def findName(accession):
    s = EUtils()
    theID = None
    geneOrProtein = "gene"
    # Check gene database
    res = s.ESearch("gene", accession)
    if len(res["idlist"]) > 0:
        # Get the ID in the "gene" database
        theID = res["idlist"][0]
    # If that fails, check the protein database
    if theID == None:
        res = s.ESearch("protein", accession)
        if len(res["idlist"]) > 0:
            theID = res["idlist"][0]
            geneOrProtein = "protein"
    # Couldn't find in either database
    if not theID:
        print("ERROR: couldn't find link for %s" % accession)

    # Get link to the corresponding ID in the Taxonomy database
    link = s.ELink(db="taxonomy",
                   dbfrom=geneOrProtein,
                   id=theID,
                   retmode="json")
    taxID = json.loads(link)["linksets"][0]["linksetdbs"][0]["links"][0]

    # Download taxonomy record
    tax = s.EFetch(db="taxonomy", id=taxID)
    #print(tax)
    tree = ElementTree.fromstring(tax)
    xmlP = parseString(tax)
    #print(xmlP.toprettyxml())
    taxonTag = tree.find("Taxon")
    sciName = taxonTag.find("ScientificName")
    return sciName.text

Пример #3

Показать файл

Файл: metadata.py Проект: kmcluskey/FlyOmics_1

def get_entrez_summary(gene_id):
    e = EUtils()
    ret = e.ESearch('gene', gene_id)
    ret = e.EFetch('gene', ret['idlist'])
    return ret.decode("utf-8")

Пример #4

Показать файл

Файл: databases.py Проект: naveen584/sequana

class EUtilsTools(object):
    """Simple wrapper around EUtils to fetch basic informatino about an accession number


    ::

        >>> from sequana.databases import EUtilsTools
        >>> et.accession_to_info("K01711.1")
        {'K01711.1': {'accession': '331784',
          'comment': 'Measles virus (strain Edmonston), complete genome',
          'gi': '331784',
          'identifier': 'gi|331784|gb|K01711.1|MEANPCG[331784]',
          'taxid': '11234'}}


    """
    def __init__(self):
        from bioservices import EUtils
        self.eutils = EUtils()

    def accession_to_info(self, ids):
        """An accession or list of them returns list of dictionaries"""
        res = self.eutils.EFetch(db="nuccore",
                                 id=ids,
                                 rettype="docsum",
                                 retmode="dict")

        res = res['eSummaryResult']['DocSum']

        # if one id provided, it will be a dict, otherwise a list of dicts
        try:
            res[0]
        except:
            res = [res]

        # now we can loop over all identifiers
        records = {}
        accessions = [x.strip() for x in ids.split(',')]

        for i, entry in enumerate(res):
            # first, save the acc number
            accession = entry['Id']
            # then various info
            items = entry['Item']
            identifier = [x for x in items
                          if x['@Name'] == "Extra"][0]['#text']
            if "||" in identifier:
                # strip content after ||
                identifier = identifier.split("||")[0]

            title = [x for x in items if x['@Name'] == "Title"][0]['#text']
            taxid = [x for x in items if x['@Name'] == "TaxId"][0]['#text']
            gi = [x for x in items if x['@Name'] == "Gi"][0]['#text']
            record = {
                "taxid": taxid,
                'accession': accession,
                "identifier": identifier,
                'gi': gi,
                'comment': title
            }

            records[accessions[i]] = AttrDict(**record)
        return records

Пример #5

Показать файл

class KrakenBuilderBase():
    def __init__(self, dbname):
        from bioservices import EUtils
        self.dbname = dbname
        self.eutils = EUtils()
        self.enadb = ENADownload()

        self.category = [
            'archaea', 'bacteria', 'fungi', 'invertebrate', 'mitonchondrion',
            'other', 'plant', 'plasmid', 'plastid', 'protozoa',
            'vertebrate_mammalian', 'vertebrate_other', 'viral'
        ]

    def download_ncbi_refseq(self, category):
        """Download all files of type *fna* from ncbi FTP.

        ::

            kb = KrakenBuilder()
            kb.download_ncbi_refseq("viral")

        """
        import ftplib
        import os
        assert category in self.category, "Please use one of {}".format(
            self.category)

        ftp = ftplib.FTP("ftp.ncbi.nlm.nih.gov")
        ftp.login("anonymous", "*****@*****.**")
        ftp.cwd("refseq/release/{}".format(category))

        import io
        file_mapper = {}
        for filename in ftp.nlst():
            if "genomic.fna" in filename:
                ftp.retrbinary('RETR ' + filename, open(filename, "wb").write)
                print(filename)

    def download_genomes_from_ncbi(self, email, category):
        """This downloads all genomes on ncbi for a given category looking at
        their ftp. This could be highly redundant.


        """
        assert category in self.category

        ftp = ftplib.FTP("ftp.ncbi.nlm.nih.gov")
        ftp.login("anonymous", email)
        ftp.cwd("refseq/release/{}".format(category))

        file_mapper = {}
        for filename in ftp.nlst():
            if "genomic.fna" in filename:
                ftp.retrbinary('RETR ' + filename, open(filename, "wb").write)
                logger.info(filename)

    def _download_assembly_report(self, category):
        assert category in self.category

        ftp = ftplib.FTP("ftp.ncbi.nlm.nih.gov")
        ftp.login("anonymous", "anonymous")
        ftp.cwd("genomes/refseq/{}".format(category))

        filename = "assembly_summary.txt"
        ftp.retrbinary(
            'RETR ' + filename,
            open(filename.replace(".txt", "_{}.txt".format(category)),
                 "wb").write)
        logger.info(filename)

    def download_accession_from_ncbi(self, accession):
        # a list of accessions in a file
        # can be a list, a unique string, a filename with 1-column wit accession
        # to retrieve
        if isinstance(accession, list):
            pass
        elif isinstance(accession, str):
            if os.path.exists(accession):
                with open(accession, "r") as fin:
                    accessions = fin.read().split()
            else:
                accessions = [accession]

        from easydev import Progress
        N = len(accessions)
        pb = Progress(N)
        logger.info("Fetching {} accession fasta files from NCBI".format(N))
        for i, accession in enumerate(accessions):
            data = self.eutils.EFetch("nucleotide",
                                      rettype="fasta",
                                      id=accession,
                                      retmode="text")
            if isinstance(data, int):
                logger.info(
                    "Could not fetch this accession: {}. continue".format(
                        accession))
                print("Could not fetch this accession: {}. continue".format(
                    accession))
            else:
                outname = "{}/library/{}.fa".format(self.dbname, accession)
                with open(outname, "wb") as fout:
                    fout.write(data)
            pb.animate(i + 1)