Python FastaIterator примеры, Bio.SeqIO.FastaIO.FastaIterator Python примеры использования

Пример #1

0

Показать файл

Файл: test_SeqIO_FastaIO.py Проект: jlaliberte10/Python_Primers

def read_single_with_titles(filename, alphabet):
    global title_to_ids
    iterator = FastaIterator(open(filename), alphabet, title_to_ids)
    record = iterator.next()
    try:
        second = iterator.next()
    except StopIteration:
        second = None
    assert record is not None and second is None
    return record

Пример #2

0

Показать файл

Файл: test_SeqIO_FastaIO.py Проект: barwil/biopython

def read_single_with_titles(filename, alphabet):
    global title_to_ids
    iterator = FastaIterator(open(filename), alphabet, title_to_ids)
    record = iterator.next()
    try:
        second = iterator.next()
    except StopIteration:
        second = None
    assert record is not None and second is None
    return record

Пример #3

0

Показать файл

Файл: analysis_trinucleotide_counts.py Проект: phiweger/master-thesis

def filter_influenza_fa(in_fasta, out_fasta, pattern, accession_set):
    '''
    accession_set .. a set of accession IDs that we query
    the fasta header against

    l, count = filter_influenza_fa(path_fa, pattern, include_accession)
    '''
    cache_previous = ()
    count, l = 0, []

    with open(in_fasta) as handle, open(out_fasta, 'a+') as out:
        for record in FastaIterator(handle):
            # [^1]
            if '(' not in list(record.description):
                cache_current = re.search(pattern,
                                          record.description).group(1, 2, 3)
                if cache_current[0] in cache_previous:
                    # [^2]
                    continue
                acc = cache_current[0]
                cache_previous = cache_current
                if acc in accession_set:
                    count += 1
                    l.append(acc)
                    out.write('>' + acc + '\n')
                    out.write(str(record.seq) + '\n')
    return (count)  # l could be returned also

Пример #4

0

Показать файл

Файл: FeatureGen.py Проект: zjx1230/ProFET

def writeClassifiedFastas(classType,Dirr,resultsDir, df):
    fasta_files_dict = Get_Dirr_All_Fasta (classType,Dirr)
    classDict = {}
    writerDict = {}
    for key, value in fasta_files_dict.items():
        files = {key:value}
        for filename, classname in files.items():
            with open(filename) as fasta:
                for record in FastaIterator(fasta): #SeqIO.SimpleFastaParser(fasta):
                    title = record[0]
                    seq_id = title.split(None, 1)[0]
                    if (record.id in df.index):
                        classname = df[record.id]
                        if (classname not in writerDict):
                            classname = "".join([c for c in classname if c.isalpha() or c.isdigit() or c==' ']).rstrip()
                            file = resultsDir + '\\' + classname + '.fasta'
                            classHandle = open(file, "w")
                            classDict[classname] = classHandle
                            myWriter = FastaWriter(classDict[classname])
                            myWriter.write_header()
                            writerDict[classname] = myWriter
                        writerDict[classname].write_record(record)
    for classname, classHandle in classDict.items():
        writerDict[classname].write_footer()
        classDict[classname].close()

Пример #5

0

Показать файл

Файл: contigs_to_gene.py Проект: yangfangming/metapi

def cut_fasta_by_len(fa_file, len_cutoff, outdir, prefix, suffix):
    # https://stackoverflow.com/questions/273192/how-can-i-create-a-directory-if-it-does-not-exist
    # Defeats race condition when another thread created the path
    #if not os.path.exists(outdir):
    #    os.mkdir(outdir)
    try:
        os.makedirs(outdir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    cut_fa_file = os.path.join(outdir,
                               prefix + ".ge" + str(len_cutoff) + suffix)
    if os.path.exists(cut_fa_file) and (os.path.getsize(cut_fa_file) > 0):
        return cut_fa_file

    if fa_file.endswith(".gz"):
        in_h = gzip.open(fa_file, 'rt')
    else:
        in_h = open(fa_file, 'r')
    with open(cut_fa_file, 'w') as out_h:
        #for rec in SeqIO.parse(in_h, 'fasta'):
        #    if len(rec.seq) >= len_cutoff:
        #        SeqIO.write(rec, out_h, 'fasta')
        # yes, the SeqIO.parse() API is more simple to use, easy to understand
        # but, try different method, you will find something
        writer = FastaWriter(out_h)
        writer.write_header()
        for rec in FastaIterator(in_h):
            if len(rec) >= len_cutoff:
                writer.write_record(rec)
        writer.write_footer()
    in_h.close()
    return cut_fa_file

Пример #6

0

Показать файл

Файл: dataloader.py Проект: mwhitesi/notatum

def align(fh, transl=True):
    """
        Translate and align pangenome cluster fasta file

    """

    align_exe = MuscleCommandline(
        r'C:\Users\matthewwhiteside\workspace\b_ecoli\muscle\muscle3.8.31_i86win32.exe',
        clwstrict=True)

    # Align on stdin/stdout
    proc = subprocess.Popen(str(align_exe),
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            universal_newlines=True,
                            shell=False)

    sequences = FastaIterator(fh)
    inp = [
        ">" + record.id + "\n" + str(record.translate(table="Bacterial").seq) +
        "\n" for record in sequences
    ]
    inp = "".join(inp)

    align, err = proc.communicate(input=inp)

    return (align)

Пример #7

0

Показать файл

 def parse(fasta_file):
     ref_prot_fasta_file = RefProtFastaFile(fasta_file)
     with open(ref_prot_fasta_file.filename) as ff:
         for record in FastaIterator(ff):
             entry = RefProtFastaEntry.parse_fasta_record(record, ref_prot_fasta_file.taxon_id)
             ref_prot_fasta_file.add_entry(entry)
     return ref_prot_fasta_file

Пример #8

0

Показать файл

def fasta_reader(filename):
    from Bio.SeqIO.FastaIO import FastaIterator
    input = []
    with open(filename) as handle:
        for record in FastaIterator(handle):
            input += [[str(record.id), str(record.seq)]]
    return input

Пример #9

0

Показать файл

Файл: __init__.py Проект: biowdl/biotdg

def generate_fake_genome(sample: str,
                         reference: Path,
                         vcf_path: Path,
                         ploidy_dict: Dict[str, int]
                         ) -> Generator[SeqRecord, None, None]:
    """
    Generate a fake genome given a VCF, a reference, and a ploidy dict. A
    fasta record for each chromosome will be created.
    :param sample: The name in the sample of the VCF to use
    :param reference: The reference fasta file to use
    :param vcf_path: The path to the VCF
    :param ploidy_dict: A dictionary containing the ploidies for each contig.
    :return: A Generator that creates the chromosomes one by one.
    """
    mutations_dict = vcf_to_mutations(str(vcf_path), sample)
    with reference.open("rt") as reference_h:
        for seqrecord in FastaIterator(reference_h):
            ploidy = ploidy_dict.get(seqrecord.id, 2)
            for allele_no in range(ploidy):
                # Default to empty list if no mutations were listed.
                mutations = mutations_dict.get(seqrecord.id, {}
                                               ).get(allele_no, [])
                new_sequence = sequence_with_mutations(
                    sequence=str(seqrecord.seq),
                    mutations=mutations)
                new_id = seqrecord.id + "_" + str(allele_no)
                yield SeqRecord(
                    Seq(new_sequence, seqrecord.seq.alphabet),
                    id=new_id,
                    name=new_id,
                    description=new_id)

Пример #10

0

Показать файл

def add_gc_content(df, df_sub, args):
    if args.feature == 'gene':
        fasta = {c[0]:c[1] for c in SimpleFastaParser(args.fasta)}
        gene2exon_indx = df.groupby(['gene_id', 'feature'])
        exons = defaultdict(str)
        gene_ids = set(df.gene_id)
        for gene in gene_ids:
            idx = gene2exon_indx.groups[(gene, 'exon')]
            nn = 0
            for ii, row in df.iloc[idx,:].iterrows():
                exon_key = '{}:{}-{}'.format(row['seqname'], row['start']-1, row['end'])
                seq = fasta.get(exon_key)
                exons[gene] += seq
        for gene_id in df_sub.index:
            if gene_id in exons:
                seq = exons[gene_id]
                gc_content = GC(seq)
                df_sub.at[gene_id, 'gc_content'] = gc_content
            else:
                print("missing gene_id in exons dict:")
                print(gene_id)
    elif args.feature == 'transcript':
        tx_ids = set(df['transcript_id'].values)
        for rec in FastaIterator(args.fasta):
            if rec.id in tx_ids:
                gc_content = GC(rec.seq)
                df_sub.loc[rec.id, 'gc_content'] = gc_content
            else:
                print(rec.id)
    else:
        raise ValueError('check feature type!')
    return df_sub

Пример #11

0

Показать файл

Файл: default.py Проект: jannafierst/SIDR

def readFasta(fastaFile):
    """
    Reads a FASTA file and parses contigs for GC content.

    Args:
        fastaFile: The path to the FASTA file.
    Returns:
        contigs A dictionary mapping contigIDs to sidr.common.Contig objects with GC content as a variable.
    """
    contigs = []
    if ".gz" in fastaFile:  # should support .fa.gz files in a seamless (if slow) way
        openFunc = gzip.open
    else:
        openFunc = open
    with openFunc(fastaFile) as data:
        click.echo("Reading %s" % fastaFile)
        with click.progressbar(FastaIterator(data)) as fi:
            for record in fi:  # TODO: conditional formatting
                contigs.append(
                    common.Contig(record.id.split(' ')[0],
                                  variables={"GC": GC(record.seq)}))
    if len(contigs) != len(
            set([x.contigid for x in contigs])
    ):  # exit if duplicate contigs, https://stackoverflow.com/questions/5278122/checking-if-all-elements-in-a-list-are-unique
        raise ValueError("Input FASTA contains duplicate contigIDs, exiting")
    return dict(
        (x.contigid, x) for x in contigs
    )  # https://stackoverflow.com/questions/3070242/reduce-python-list-of-objects-to-dict-object-id-object

Пример #12

0

Показать файл

Файл: translocate.py Проект: biowdl/biowdl-test-data

def main():
    args = argument_parser().parse_args()
    source = Position.from_string(args.source)
    target = Position.from_string(args.target)
    with open(args.fasta, "rt") as fasta_h:
        records = FastaIterator(fasta_h)
        result = mutate(records, source, target)
    print(result.format("fasta"), end='')

Пример #13

0

Показать файл

Файл: aux.py Проект: condnsdmatters/bioinformatics

def load_files():
    '''Load all files in to an arrary, unshuffled'''
    data = []
    for i, filename in enumerate(FILES):
        with open("data/" + filename) as f:
            filedata = [(values, i) for values in FastaIterator(f)]
            data.extend(filedata)
    return data

Пример #14

0

Показать файл

    def _fasta_reader(filename: str) -> SeqRecord:
        """
        FASTA file reader as iterator
        """

        with open(filename) as handle:
            for record in FastaIterator(handle):
                yield record

Пример #15

0

Показать файл

Файл: amptklib.py Проект: irawand07/amptk

def fastarename(input, relabel, output):
    from Bio.SeqIO.FastaIO import FastaIterator
    with open(output, 'w') as outfile:
        counter = 1
        for record in FastaIterator(open(input)):
            newName = relabel + str(counter)
            outfile.write(">%s\n%s\n" % (newName, record.seq))
            counter += 1

Пример #16

0

Показать файл

    def _fasta_reader(filename: str) -> Iterator:
        """
        Read FASTA file content including multifasta format
        """

        with open(filename) as handle:
            for record in FastaIterator(handle):
                yield record

Пример #17

0

Показать файл

def get_base(fasta: str, chromosome: str, start: int, end: Optional[int]):
    if end is None:
        end = start + 1

    with open(fasta, "rt") as fasta_handle:
        records = FastaIterator(fasta_handle)
        for record in records:
            if record.id == chromosome:
                return record[start:end].seq
        # If we have not returned the chromosome was not there.
        raise ValueError(f"{chromosome} not found in {fasta}")

Пример #18

0

Показать файл

Файл: fasta_parser.py Проект: schlogl2017/BioinfoCourse

def fasta_reader(filename):
    """
    Read a multi or single fasta file.
    
    Inputs:
    
        filename - string that represents a name of the file or a path to
                   the file.
    
    Outputs:
    
        A generator object containing a Seq and ID biopython objects.
    """
    if filename.endswith('.gz'):
        with gzip.open(filename, 'rt') as handle:
            for record in FastaIterator(handle):
                yield str(record.id), str(record.seq)
    else:
        with open(filename) as handle:
            for record in FastaIterator(handle):
                yield str(record.id), str(record.seq)

Пример #19

0

Показать файл

Файл: test_SeqIO_FastaIO.py Проект: sgalpha01/biopython

 def multi_check(self, filename):
     """Test parsing multi-record FASTA files."""
     msg = f"Test failure parsing file {filename}"
     re_titled = list(FastaIterator(filename, title2ids=title_to_ids))
     default = list(SeqIO.parse(filename, "fasta"))
     self.assertEqual(len(re_titled), len(default), msg=msg)
     for old, new in zip(default, re_titled):
         idn, name, descr = title_to_ids(old.description)
         self.assertEqual(new.id, idn, msg=msg)
         self.assertEqual(new.name, name, msg=msg)
         self.assertEqual(new.description, descr, msg=msg)
         self.assertEqual(new.seq, old.seq, msg=msg)

Пример #20

0

Показать файл

Файл: test_SeqIO_FastaIO.py Проект: jlaliberte10/Python_Primers

 def multi_check(self, filename, alphabet):
     """Basic test for parsing multi-record FASTA files."""
     re_titled = list(FastaIterator(open(filename), alphabet, title_to_ids))
     default = list(SeqIO.parse(open(filename), "fasta", alphabet))
     self.assertEqual(len(re_titled), len(default))
     for old, new in zip(default, re_titled):
         idn, name, descr = title_to_ids(old.description)
         self.assertEqual(new.id, idn)
         self.assertEqual(new.name, name)
         self.assertEqual(new.description, descr)
         self.assertEqual(str(new.seq), str(old.seq))
         self.assertEqual(new.seq.alphabet, old.seq.alphabet)

Пример #21

0

Показать файл

def read_single_with_titles(filename, alphabet):
    global title_to_ids
    handle = open(filename)
    iterator = FastaIterator(handle, alphabet, title_to_ids)
    record = next(iterator)
    try:
        second = next(iterator)
    except StopIteration:
        second = None
    handle.close()
    assert record is not None and second is None
    return record

Пример #22

0

Показать файл

def read_single_with_titles(filename, alphabet):
    """Parser wrapper to confirm single entry FASTA file."""
    global title_to_ids
    with open(filename) as handle:
        iterator = FastaIterator(handle, alphabet, title_to_ids)
        record = next(iterator)
        try:
            second = next(iterator)
        except StopIteration:
            second = None
    assert record is not None and second is None
    return record

Пример #23

0

Показать файл

Файл: test_SeqIO_FastaIO.py Проект: wenh06/biopython

 def multi_check(self, filename, alphabet):
     """Test parsing multi-record FASTA files."""
     msg = "Test failure parsing file %s" % filename
     re_titled = list(FastaIterator(filename, alphabet, title_to_ids))
     default = list(SeqIO.parse(filename, "fasta", alphabet))
     self.assertEqual(len(re_titled), len(default), msg=msg)
     for old, new in zip(default, re_titled):
         idn, name, descr = title_to_ids(old.description)
         self.assertEqual(new.id, idn, msg=msg)
         self.assertEqual(new.name, name, msg=msg)
         self.assertEqual(new.description, descr, msg=msg)
         self.assertEqual(str(new.seq), str(old.seq), msg=msg)
         self.assertEqual(new.seq.alphabet, old.seq.alphabet, msg=msg)

Пример #24

0

Показать файл

def read_fasta(inputfile):
	"""Method for loading sequences from a FASTA formatted file and storing them into a list of sequences and names.

	:param inputfile: .fasta file with sequences and headers to read
	:return: lists of sequences and names.
	"""
	names = list()  # list for storing names
	sequences = list()  # list for storing sequences
	with open(inputfile) as handle:
		for record in FastaIterator(handle):  # use biopythons SeqIO module
			names.append(record.description)
			sequences.append(str(record.seq))
	return sequences, names

Пример #25

0

Показать файл

 def parse_file(file_path):
 records_letters = {}
 with open(file_path) as in_handle:
     for record in FastaIterator(in_handle):
         records_letters[record.id] = {'A': 0, 'C': 0, 'G': 0, 'T': 0, 'Y': 0, 'M': 0, 'S': 0, 'R': 0, 'W': 0,
                                       'K': 0, 'N': 0, 'D': 0, 'B': 0, 'H': 0, 'V': 0, 'all': 0, 'a': 0, 'c': 0, 'g': 0, 't': 0, 'y': 0, 'm': 0, 's': 0, 'r': 0, 'w': 0,
                                       'k': 0, 'n': 0, 'd': 0, 'b': 0, 'h': 0, 'v': 0, 'all_small': 0, 'all_big': 0}
         for letter in record.seq:
             if letter.islower():
               records_letters[record.id]['all_small'] += 1
             else:
               records_letters[record.id]['all_big'] += 1
             records_letters[record.id][letter] += 1
             records_letters[record.id]['all'] += 1
 return records_letters

Пример #26

0

Показать файл

Файл: rename_fasta_id.py Проект: yangfangming/metapi

def reheader_fasta(fa_in, fa_out, header_function, in_gz, gz):
    if in_gz:
        in_h = gzip.open(fa_in, 'rt')
    else:
        in_h = open(fa_in, 'r')
    if gz:
        out_h = bgzf.BgzfWriter(fa_out, 'wb')
    else:
        out_h = open(fa_out, 'w')
    writer = FastaWriter(out_h)
    writer.write_header()
    for rec in FastaIterator(in_h, title2ids=header_function):
        writer.write_record(rec)
    writer.write_footer()
    out_h.close()
    in_h.close()

Пример #27

0

Показать файл

Файл: random.py Проект: kako-f/Generator

    def create_rs(self, file):

        newpath = Cf().create_file_folder(file=file)
        filename, extension = os.path.splitext(os.path.basename(file))
        random_genome_file = os.path.join(newpath, os.path.normpath(os.path.join(filename + '_random' + extension)))
        with open(file, 'rU') as GenomeFile:
            with open(random_genome_file, 'w') as RgFile:
                for record in FastaIterator(handle=GenomeFile):
                    print('Creating random record for: ' + record.id)
                    created_random_seq = self.generate_rs(str(record.seq))
                    random_record = SeqRecord(BioPythonSeq(created_random_seq),
                                              id=record.id + '_random_',
                                              name=record.name + '_random_',
                                              description=record.description + '_random_')
                    SeqIO.write(random_record, RgFile, 'fasta')
            RgFile.close()

        return random_genome_file

Пример #28

0

Показать файл

    def parse_file(self, file_path):
        data = {}
        print("Analysing: " + file_path)
        with open(file_path) as file:
            for record in FastaIterator(file):
                data[record.id] = {}
                start_index = 0
                end_index = len(record.seq) - 1

                while start_index + self.window_size < end_index:
                    data[record.id][start_index] = self.parse_sequence(
                        record.seq[start_index:(start_index +
                                                self.window_size)])
                    start_index += self.window_size

                data[record.id][start_index] = self.parse_sequence(
                    record.seq[start_index:end_index])
            return data

Пример #29

0

Показать файл

Файл: test_SeqIO_FastaIO.py Проект: sgalpha01/biopython

 def simple_check(self, filename):
     """Test parsing single record FASTA files."""
     msg = f"Test failure parsing file {filename}"
     title, seq = read_title_and_seq(filename)  # crude parser
     idn, name, descr = title_to_ids(title)
     # First check using Bio.SeqIO.FastaIO directly with title function.
     records = FastaIterator(filename, title2ids=title_to_ids)
     record = next(records)
     with self.assertRaises(StopIteration):
         next(records)
     self.assertEqual(record.id, idn, msg=msg)
     self.assertEqual(record.name, name, msg=msg)
     self.assertEqual(record.description, descr, msg=msg)
     self.assertEqual(record.seq, seq, msg=msg)
     # Now check using Bio.SeqIO (default settings)
     record = SeqIO.read(filename, "fasta")
     self.assertEqual(record.id, title.split()[0], msg=msg)
     self.assertEqual(record.name, title.split()[0], msg=msg)
     self.assertEqual(record.description, title, msg=msg)
     self.assertEqual(record.seq, seq, msg=msg)

Пример #30

0

Показать файл

Файл: FeatureSelctionXGB.py Проект: TahaAslani/AAk-mer

def FindGene(PATRICID, Header):

    OUT = dict()

    SPGENE = pd.read_csv('/pylon5/br5phhp/tv349/AMR/PATRIC/SPGENE/' +
                         PATRICID + '.PATRIC.spgene.tab',
                         sep='\t')
    LocalPos = SPGENE.index[SPGENE['patric_id'] == Header].tolist()
    # if the sequence exists here:
    OUTSPGENE = dict()
    if len(LocalPos) == 1:
        OUTSPGENE = (SPGENE.loc[LocalPos,
                                ['gene', 'product', 'property', 'function']]
                     ).to_dict('records')[0]

    FEATURES = pd.read_csv('/pylon5/br5phhp/tv349/AMR/PATRIC/FEATURES/' +
                           PATRICID + '.PATRIC.features.tab',
                           sep='\t')
    LocalPos = FEATURES.index[FEATURES['patric_id'] == Header].tolist()
    OUTFEATURES = dict()
    if len(LocalPos) == 1:
        OUTFEATURES = (FEATURES.loc[LocalPos,
                                    ['gene', 'product']]).to_dict('records')[0]

    OUT = {**OUTFEATURES, **OUTSPGENE}

    # Get sequence
    with open("/pylon5/br5phhp/tv349/AMR/PATRIC/PROTEIN/" + PATRICID +
              ".PATRIC.faa") as handle:
        for record in FastaIterator(handle):
            if record.id == Header:
                AAseq = str(record.seq)

    OUT['translation'] = AAseq

    return OUT

Пример #31

0

Показать файл

FNULL = open(os.devnull, 'w')
pid = os.getpid()
#reverse complement rev primer
ForPrimer = args.fwdprimer
RevPrimer = revcomp_lib.RevComp(args.revprimer)

print 'Loading ' + '{0:,}'.format(amptklib.countfasta(
    args.input)) + ' sequence records'
print 'Searching for forward primer: %s, and reverse primer: %s' % (ForPrimer,
                                                                    RevPrimer)
print 'Requiring reverse primer match with at least %i mismatches' % args.primer_mismatch
#loop through seqs, remove primer if found, and truncate to length
truncated = 'bold2amptk_' + str(pid) + '.truncate.tmp'
with open(truncated, 'w') as output:
    for record in FastaIterator(open(args.input)):
        Seq = str(record.seq)
        StripSeq = ''
        ForCutPos = amptklib.findFwdPrimer(ForPrimer, Seq,
                                           args.primer_mismatch,
                                           amptklib.degenNucSimple)
        RevCutPos = amptklib.findRevPrimer(RevPrimer, Seq,
                                           args.primer_mismatch,
                                           amptklib.degenNucSimple)
        if ForCutPos and RevCutPos:
            StripSeq = Seq[ForCutPos:RevCutPos]
        elif not ForCutPos and RevCutPos:
            StripSeq = Seq[:RevCutPos]
        if len(StripSeq) >= args.minlen:
            output.write('>%s\n%s\n' % (record.description, StripSeq))

Пример #32

0

Показать файл

Файл: QualityIO.py Проект: chapmanb/biosqlweb

def PairedFastaQualIterator(fasta_handle, qual_handle, alphabet = single_letter_alphabet, title2ids = None) :
    """Iterate over matched FASTA and QUAL files as SeqRecord objects.

    For example, consider this short QUAL file::

        >EAS54_6_R1_2_1_413_324
        26 26 18 26 26 26 26 26 26 26 26 26 26 26 26 22 26 26 26 26
        26 26 26 23 23
        >EAS54_6_R1_2_1_540_792
        26 26 26 26 26 26 26 26 26 26 26 22 26 26 26 26 26 12 26 26
        26 18 26 23 18
        >EAS54_6_R1_2_1_443_348
        26 26 26 26 26 26 26 26 26 26 26 24 26 22 26 26 13 22 26 18
        24 18 18 18 18
    
    And a matching FASTA file::

        >EAS54_6_R1_2_1_413_324
        CCCTTCTTGTCTTCAGCGTTTCTCC
        >EAS54_6_R1_2_1_540_792
        TTGGCAGGCCAAGGCCGATGGATCA
        >EAS54_6_R1_2_1_443_348
        GTTGCTTCTGGCGTGGGTGGGGGGG

    You can parse these separately using Bio.SeqIO with the "qual" and
    "fasta" formats, but then you'll get a group of SeqRecord objects with
    no sequence, and a matching group with the sequence but not the
    qualities.  Because it only deals with one input file handle, Bio.SeqIO
    can't be used to read the two files together - but this function can!
    For example,
    
    >>> rec_iter = PairedFastaQualIterator(open("Quality/example.fasta", "rU"),
    ...                                    open("Quality/example.qual", "rU"))
    >>> for record in rec_iter :
    ...     print record.id, record.seq
    EAS54_6_R1_2_1_413_324 CCCTTCTTGTCTTCAGCGTTTCTCC
    EAS54_6_R1_2_1_540_792 TTGGCAGGCCAAGGCCGATGGATCA
    EAS54_6_R1_2_1_443_348 GTTGCTTCTGGCGTGGGTGGGGGGG

    As with the FASTQ or QUAL parsers, if you want to look at the qualities,
    they are in each record's per-letter-annotation dictionary as a simple
    list of integers:

    >>> print record.letter_annotations["phred_quality"]
    [26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 26, 22, 26, 26, 13, 22, 26, 18, 24, 18, 18, 18, 18]

    If you have access to data as a FASTQ format file, using that directly
    would be simpler and more straight forward.  Note that you can easily use
    this function to convert paired FASTA and QUAL files into FASTQ files:

    >>> from Bio import SeqIO
    >>> rec_iter = PairedFastaQualIterator(open("Quality/example.fasta", "rU"),
    ...                                    open("Quality/example.qual", "rU"))
    >>> out_handle = open("Quality/temp.fastq", "w")
    >>> SeqIO.write(rec_iter, out_handle, "fastq")
    3
    >>> out_handle.close()

    And don't forget to clean up the temp file if you don't need it anymore:

    >>> import os
    >>> os.remove("Quality/temp.fastq")    
    """
    from Bio.SeqIO.FastaIO import FastaIterator    
    fasta_iter = FastaIterator(fasta_handle, alphabet=alphabet, \
                               title2ids=title2ids)
    qual_iter = QualPhredIterator(qual_handle, alphabet=alphabet, \
                                  title2ids=title2ids)

    #Using zip(...) would create a list loading everything into memory!
    #It would also not catch any extra records found in only one file.
    while True :
        try :
            f_rec = fasta_iter.next()
        except StopIteration :
            f_rec = None
        try :
            q_rec = qual_iter.next()
        except StopIteration :
            q_rec = None
        if f_rec is None and q_rec is None :
            #End of both files
            break
        if f_rec is None :
            raise ValueError("FASTA file has more entries than the QUAL file.")
        if q_rec is None :
            raise ValueError("QUAL file has more entries than the FASTA file.")
        if f_rec.id != q_rec.id :
            raise ValueError("FASTA and QUAL entries do not match (%s vs %s)." \
                             % (f_rec.id, q_rec.id))
        if len(f_rec) != len(q_rec.letter_annotations["phred_quality"]) :
            raise ValueError("Sequence length and number of quality scores disagree for %s" \
                             % f_rec.id)
        #Merge the data....
        f_rec.letter_annotations["phred_quality"] = q_rec.letter_annotations["phred_quality"]
        yield f_rec

Python FastaIterator примеры использования