Пример #1
0
#! /usr/bin/env python

import sys, os
import time
from Bio import Fasta

DEFAULT_DICT_FILE = '/project1/structure/mliang/pdb/derived_data/pdb_seqres.idx'
DEFAULT_OUTFH = sys.stdout

dict_file = DEFAULT_DICT_FILE
outfh = DEFAULT_OUTFH

start_time = time.time()
fdict = Fasta.Dictionary(dict_file)
elapse_time = time.time() - start_time
print >> sys.stderr, "Time to load dictionary:", elapse_time

start_time = time.time()
chainmap = {}
for key in fdict.keys():
    chainmap.setdefault(key[:4], []).append(key)
elapse_time = time.time() - start_time
print >> sys.stderr, "Time to build chain map:", elapse_time

start_time = time.time()
args = sys.argv[1:]
if not args:
    args = sys.stdin

for field in args:
    fields = field.strip().split()
Пример #2
0
# Note that the alphabet is explicitly defined for the sequences.

import os
from Bio import Fasta
from Bio.Alphabet import IUPAC


def get_accession_num(fasta_record):
    title_atoms = fasta_record.title.split()
    accession_atoms = title_atoms[0].split('|')
    gb_name = accession_atoms[3]
    # strip the version info before returning
    return gb_name[:-2]


if not os.path.isdir("my_orchid_dict.idx"):
    #Build a new index
    Fasta.index_file("ls_orchid.fasta", "my_orchid_dict.idx",
                     get_accession_num)
else:
    print "Reusing existing index"

dna_parser = Fasta.SequenceParser(IUPAC.ambiguous_dna)

orchid_dict = Fasta.Dictionary("my_orchid_dict.idx", dna_parser)

for id_num in orchid_dict.keys():
    print 'id number:', id_num
    print 'description:', orchid_dict[id_num].description
    print 'sequence:', orchid_dict[id_num].seq