Exemplo n.º 1
0
def to_alignment(sequences, alphabet=None, strict=True):
    """Returns a multiple sequence alignment (OBSOLETE).

     - sequences -An iterator that returns SeqRecord objects,
                  or simply a list of SeqRecord objects.  All
                  the record sequences must be the same length.
     - alphabet - Optional alphabet.  Stongly recommended.
     - strict   - Optional, defaults to True.  Should error checking
                  be done?

    Using this function is now discouraged.  Rather doing this:

    >>> from Bio import SeqIO
    >>> handle = open("Clustalw/protein.aln")
    >>> alignment = SeqIO.to_alignment(SeqIO.parse(handle, "clustal"))
    >>> handle.close()

    You are now encouraged to use Bio.AlignIO instead, e.g.

    >>> from Bio import AlignIO
    >>> handle = open("Clustalw/protein.aln")
    >>> alignment = AlignIO.read(handle, "clustal")
    >>> handle.close()
    """
    #TODO - Move this functionality into the Alignment class instead?
    from Bio.Alphabet import generic_alphabet
    from Bio.Alphabet import _consensus_alphabet
    if alphabet is None:
        sequences = list(sequences)
        alphabet = _consensus_alphabet([rec.seq.alphabet for rec in sequences \
                                        if rec.seq is not None])

    if not (isinstance(alphabet, Alphabet) or isinstance(alphabet, AlphabetEncoder)):
        raise ValueError("Invalid alphabet")

    alignment_length = None
    alignment = Alignment(alphabet)
    for record in sequences:
        if strict:
            if alignment_length is None:
                alignment_length = len(record.seq)
            elif alignment_length != len(record.seq):
                raise ValueError("Sequences must all be the same length")

            assert isinstance(record.seq.alphabet, Alphabet) \
            or isinstance(record.seq.alphabet, AlphabetEncoder), \
                "Sequence does not have a valid alphabet"

            #TODO - Move this alphabet comparison code into the Alphabet module/class?
            #TODO - Is a normal alphabet "ungapped" by default, or does it just mean
            #undecided?
            if isinstance(record.seq.alphabet, Alphabet) \
            and isinstance(alphabet, Alphabet):
                #Comparing two non-gapped alphabets            
                if not isinstance(record.seq.alphabet, alphabet.__class__):
                    raise ValueError("Incompatible sequence alphabet " \
                                     + "%s for %s alignment" \
                                     % (record.seq.alphabet, alphabet))
            elif isinstance(record.seq.alphabet, AlphabetEncoder) \
            and isinstance(alphabet, Alphabet):
                raise ValueError("Sequence has a gapped alphabet, alignment does not")
            elif isinstance(record.seq.alphabet, Alphabet) \
            and isinstance(alphabet, Gapped):
                #Sequence isn't gapped, alignment is.
                if not isinstance(record.seq.alphabet, alphabet.alphabet.__class__):
                    raise ValueError("Incompatible sequence alphabet " \
                                     + "%s for %s alignment" \
                                     % (record.seq.alphabet, alphabet))
            else:
                #Comparing two gapped alphabets
                if not isinstance(record.seq.alphabet, alphabet.__class__):
                    raise ValueError("Incompatible sequence alphabet " \
                                     + "%s for %s alignment" \
                                     % (record.seq.alphabet, alphabet))
                if record.seq.alphabet.gap_char != alphabet.gap_char:
                    raise ValueError("Sequence gap characters != alignment gap char")
            #ToDo, additional checks on the specified alignment...
            #Should we look at the alphabet.contains() method?
        if record.seq is None:
            raise TypeError("SeqRecord (id=%s) has None for its sequence." % record.id)
            
        #This is abusing the "private" records list,
        #we should really have a method like add_sequence
        #but which takes SeqRecord objects.  See also Bug 1944
        alignment._records.append(record)
    return alignment
Exemplo n.º 2
0
def to_alignment(sequences, alphabet=None, strict=True):
    """Returns a multiple sequence alignment (OBSOLETE).

     - sequences -An iterator that returns SeqRecord objects,
                  or simply a list of SeqRecord objects.  All
                  the record sequences must be the same length.
     - alphabet - Optional alphabet.  Stongly recommended.
     - strict   - Optional, defaults to True.  Should error checking
                  be done?

    Using this function is now discouraged.  Rather doing this:

    >>> from Bio import SeqIO
    >>> handle = open("Clustalw/protein.aln")
    >>> alignment = SeqIO.to_alignment(SeqIO.parse(handle, "clustal"))
    >>> handle.close()

    You are now encouraged to use Bio.AlignIO instead, e.g.

    >>> from Bio import AlignIO
    >>> handle = open("Clustalw/protein.aln")
    >>> alignment = AlignIO.read(handle, "clustal")
    >>> handle.close()
    """
    #TODO - Move this functionality into the Alignment class instead?
    from Bio.Alphabet import generic_alphabet
    from Bio.Alphabet import _consensus_alphabet
    if alphabet is None:
        sequences = list(sequences)
        alphabet = _consensus_alphabet([rec.seq.alphabet for rec in sequences \
                                        if rec.seq is not None])

    if not (isinstance(alphabet, Alphabet)
            or isinstance(alphabet, AlphabetEncoder)):
        raise ValueError("Invalid alphabet")

    alignment_length = None
    alignment = Alignment(alphabet)
    for record in sequences:
        if strict:
            if alignment_length is None:
                alignment_length = len(record.seq)
            elif alignment_length != len(record.seq):
                raise ValueError("Sequences must all be the same length")

            assert isinstance(record.seq.alphabet, Alphabet) \
            or isinstance(record.seq.alphabet, AlphabetEncoder), \
                "Sequence does not have a valid alphabet"

            #TODO - Move this alphabet comparison code into the Alphabet module/class?
            #TODO - Is a normal alphabet "ungapped" by default, or does it just mean
            #undecided?
            if isinstance(record.seq.alphabet, Alphabet) \
            and isinstance(alphabet, Alphabet):
                #Comparing two non-gapped alphabets
                if not isinstance(record.seq.alphabet, alphabet.__class__):
                    raise ValueError("Incompatible sequence alphabet " \
                                     + "%s for %s alignment" \
                                     % (record.seq.alphabet, alphabet))
            elif isinstance(record.seq.alphabet, AlphabetEncoder) \
            and isinstance(alphabet, Alphabet):
                raise ValueError(
                    "Sequence has a gapped alphabet, alignment does not")
            elif isinstance(record.seq.alphabet, Alphabet) \
            and isinstance(alphabet, Gapped):
                #Sequence isn't gapped, alignment is.
                if not isinstance(record.seq.alphabet,
                                  alphabet.alphabet.__class__):
                    raise ValueError("Incompatible sequence alphabet " \
                                     + "%s for %s alignment" \
                                     % (record.seq.alphabet, alphabet))
            else:
                #Comparing two gapped alphabets
                if not isinstance(record.seq.alphabet, alphabet.__class__):
                    raise ValueError("Incompatible sequence alphabet " \
                                     + "%s for %s alignment" \
                                     % (record.seq.alphabet, alphabet))
                if record.seq.alphabet.gap_char != alphabet.gap_char:
                    raise ValueError(
                        "Sequence gap characters != alignment gap char")
            #ToDo, additional checks on the specified alignment...
            #Should we look at the alphabet.contains() method?
        if record.seq is None:
            raise TypeError("SeqRecord (id=%s) has None for its sequence." %
                            record.id)

        #This is abusing the "private" records list,
        #we should really have a method like add_sequence
        #but which takes SeqRecord objects.  See also Bug 1944
        alignment._records.append(record)
    return alignment