Python get_lineage примеры использования

Язык программирования: Python

Пространство имен/Пакет: ncbi

Метод/Функция: get_lineage

Примеров на hotexamples.com: 4

Python get_lineage - 4 примера найдено. Это лучшие примеры Python кода для ncbi.get_lineage, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: get_homologsDB.py Проект: vincentdavis/Phylogenetic_pipeline

		  ) i2 on i.taxid = i2.taxid
		  WHERE qseqid in (%s, %s)
		  GROUP BY qseqid, genus
		  ORDER BY i.taxid, pident, coverage""")
#GROUP BY qseqid, i.taxid
results = dbconnection.cursor()
results.execute(query, (novelseq1, novelseq2, novelseq1, novelseq2))
results = list(results)

# get homolog data
homologs = ()
for homolog in results:
    taxid = homolog[1]
    sgi = homolog[4]
    print(taxid)
    lineage = ncbi.get_lineage(taxid)
    sequence = ncbi.get_gene_seq(sgi)
    seqdef = ncbi.get_gene_data(sgi)[0]['GBSeq_definition']
    homologs = homologs + (homolog + (seqdef, lineage, sequence),)


# Build a fasta file from homolog seq
homologsRec1=[]
homologsRec2=[]
# Add novel sequences
handle = open("notes/transcriptome/BothNyAd.fa", "rU")
record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
handle.close()
sid = 'Diaphorina citri'
name = 'Diaphorina citri'

Пример #2

Показать файл

def search(i, q, l, f_l):
    """Search the strain name on the database and establish a species-domain
    look-up dictionary, first search on ENA database, if no exact match is 
    found, search on NCBI database, if still no exact match, the species and
    the search result with most similar name will be added into fail_list.
    """
    while q.qsize() > 0:
        ex_match = False
        sp = q.get()
        print(
            "[Thread:%d Queue:%d]Searching taxon information for %s on ENA database"
            % (i, q.qsize(), sp))
        ena_hits = taxon(sp)
        sm_score = list()
        #Sleep for 0.5 second to prevent the database from rejecting access.
        for idx, hit in enumerate(ena_hits):
            sp_norm = sp.replace('+', ' ').replace('_',
                                                   ' ').replace('sp. ', '')
            hit_norm = [
                name.replace('+', ' ').replace('_', ' ').replace('sp. ', '')
                for name in hit['name']
            ]
            idty_chck = any([sp_norm == name for name in hit_norm])
            if idty_chck:
                l[sp] = ena_hits[idx]
                ex_match = True
                q.task_done()
                print(
                    "[Thread:%d Queue:%d]Taxon information hasbee found for %s on ENA database"
                    % (i, q.qsize(), sp))
                break
            else:
                sm_score.append(
                    max([SM(None, sp_norm, name).ratio()
                         for name in hit_norm]))

        if not ex_match:
            print(
                "[Thread:%d Queue:%d]Exact match is not found in ENA database, searching %s on NCBI database"
                % (i, q.qsize(), sp))
            ncbi_hits = get_lineage(sp)
            for idx, hit in enumerate(ncbi_hits):
                hit_norm = [
                    name.replace('+', ' ').replace('_',
                                                   ' ').replace('sp. ', '')
                    for name in hit['name']
                ]
                idty_chck = any([sp_norm == name for name in hit_norm])
                if idty_chck:
                    l[sp] = ncbi_hits[idx]
                    ex_match = True
                    q.task_done()
                    print(
                        "[Thread:%d Queue:%d]Taxon information hasbee found for %s on NCBI database"
                        % (i, q.qsize(), sp))
                    break
                else:
                    sm_score.append(
                        max([
                            SM(None, sp_norm, name).ratio()
                            for name in hit_norm
                        ]))
        if not ex_match:
            print(
                "[Thread:%d Queue:%d]%s can't be found in both database, added into fail list."
                % (i, q.qsize(), sp))
            hits = ena_hits + ncbi_hits
            if len(hits) == 0:
                f_l[sp] = []
            else:
                print(sm_score)
                print(len(hits))
                max_idx = sm_score.index(max(sm_score))
                f_l[sp] = hits[max_idx]
            q.task_done()

Пример #3

Показать файл

Файл: Get_homologTAXA2.py Проект: vincentdavis/Phylogenetic_pipeline

     taxainfo['blast_record'] = get_BLAST(taxainfo['taxid'], qseqid)
     try:
         taxainfo['sbjctseq'] = taxainfo['blast_record'].alignments[0].hsps[0].sbjct[:50]
         print(taxainfo['sbjctseq'])
         try:
             taxainfo['GI'] = int(taxainfo['blast_record'].alignments[0].title.split('|')[1])
             print("GI is: " + str(taxainfo['GI']))
         except:
             print("GI not valid for " + taxa + " : " + taxainfo['blast_record'].alignments[0].title.split('|')[1])
         try:
             taxainfo['seqdef'] = ncbi.get_gene_data(taxainfo['GI'])[0]['GBSeq_definition']
             print('GBSeq_definition is ' + taxainfo['seqdef'])
         except:
             print("Unable to get GBSeq_definition")
         try:
             taxainfo['lineage'] = ncbi.get_lineage(taxainfo['taxid'])
         except:
             print("Unable to get Lineage")
         try:
             taxainfo['sequence'] = ncbi.get_gene_seq(taxainfo['GI'])
         except:
             print("unable to get sequence")
     except:
         print("No match found for " + taxa)
     taxarecords.append(taxainfo)
 handle = open('temp/' + record + '.pickle', 'w')
 pickle.dump(taxarecords, handle)
 handle.close()
 homologrec = []
 for trecord in taxarecords:
     try:

Пример #4

Показать файл

Файл: get_homologsTAXA.py Проект: vincentdavis/Phylogenetic_pipeline

record_dict[novelseq1].description = novelseq1
record_dict[novelseq2].description = novelseq2

homologs = ()
for ataxa in list(alltaxa)[:4]:
    print("lookup " + ataxa)
    taxid = ncbi.get_taxid(ataxa)
    print(ataxa + " tax id is " + str(taxid))
    qseq1 = str(record_dict[novelseq2].seq)
    blast_record = get_BLAST(taxid, qseq1)
    try:
        sbjctseq1 = blast_record.alignments[0].hsps[0].sbjct[:50]
        sgi1 = blast_record.alignments[0].title.split('|')[1]
        print("1", sbjctseq1)
        seqdef1 = ncbi.get_gene_data(sgi1)[0]['GBSeq_definition']
        lineage1 = ncbi.get_lineage(taxid)
        sequence1 = ncbi.get_gene_seq(sgi1)
    except:
        print("No match")
        sgi1 = 0


    qseq2 = str(record_dict[novelseq1].seq)
    blast_record = get_BLAST(taxid, qseq2)
    try:
        sbjctseq2 = blast_record.alignments[0].hsps[0].sbjct[:50]
        sgi2 = blast_record.alignments[0].title.split('|')[1]
        print("2", sbjctseq2)
        seqdef2 = ncbi.get_gene_data(sgi2)[0]['GBSeq_definition']
        lineage2 = ncbi.get_lineage(taxid)
        sequence2 = ncbi.get_gene_seq(sgi2)