Exemplo n.º 1
0
def _genbank_convert_fasta(in_handle, out_handle, alphabet=None):
    """Fast GenBank to FASTA (PRIVATE)."""
    #We don't need to parse the features...
    from Bio.GenBank.Scanner import GenBankScanner
    records = GenBankScanner().parse_records(in_handle, do_features=False)
    #For FASTA output we can ignore the alphabet too
    return SeqIO.write(records, out_handle, "fasta")
Exemplo n.º 2
0
def GenBankCdsFeatureIterator(handle, alphabet=Alphabet.generic_protein):
    """Breaks up a Genbank file into SeqRecord objects for each CDS feature.

    Every section from the LOCUS line to the terminating // can contain
    many CDS features.  These are returned as with the stated amino acid
    translation sequence (if given).
    """
    #This calls a generator function:
    return GenBankScanner(debug=0).parse_cds_features(handle, alphabet)
Exemplo n.º 3
0
def GenBankIterator(handle):
    """Breaks up a Genbank file into SeqRecord objects.

    Every section from the LOCUS line to the terminating // becomes
    a single SeqRecord with associated annotation and features.
    
    Note that for genomes or chromosomes, there is typically only
    one record."""
    #This calls a generator function:
    return GenBankScanner(debug=0).parse_records(handle)
Exemplo n.º 4
0
def reformat_genbank_first_line(first_line_inp, test_line=True):

    str_out = ''
    error_name = None

    line_parts = first_line_inp.split()
    if not 'LOCUS' in line_parts[0]:
        error_name = 'Massive error: LOCUS not on first line'
        return str_out, error_name

    try:
        bp_index = line_parts.index('bp')
    except ValueError:
        error_name = 'Missing bp'
        return str_out, error_name

    name_slice = line_parts[1:bp_index - 1]
    name = '_'.join(name_slice)

    padding_data = ' '.join([x.lower() for x in line_parts[bp_index - 1:]])
    total_len = 12 + len(name) + len(padding_data)
    if total_len >= 80:
        extra_space = ' '
    else:
        extra_space = ' ' * (80 - total_len)

    str_out = 'LOCUS' + '       ' + name + extra_space + padding_data + '\n'
    print(str_out.split())

    if test_line:
        consumer = _FeatureConsumer(use_fuzziness=1,
                                    feature_cleaner=FeatureValueCleaner())
        try:
            GenBankScanner(debug=1)._feed_first_line(consumer, str_out)
        except Exception as err:
            error_name = err

    return str_out, error_name
Exemplo n.º 5
0
def GenBankIterator(handle):
    """Breaks up a Genbank file into SeqRecord objects.

    Every section from the LOCUS line to the terminating // becomes
    a single SeqRecord with associated annotation and features.

    Note that for genomes or chromosomes, there is typically only
    one record.

    This gets called internally by Bio.SeqIO for the GenBank file format:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("GenBank/cor6_6.gb", "gb"):
    ...     print(record.id)
    ...
    X55053.1
    X62281.1
    M81224.1
    AJ237582.1
    L31939.1
    AF297471.1

    Equivalently,

    >>> with open("GenBank/cor6_6.gb") as handle:
    ...     for record in GenBankIterator(handle):
    ...         print(record.id)
    ...
    X55053.1
    X62281.1
    M81224.1
    AJ237582.1
    L31939.1
    AF297471.1

    """
    # This calls a generator function:
    return GenBankScanner(debug=0).parse_records(handle)