Exemplo n.º 1
0
def clustal_from_alignment(aln, interleave_len=None):
    """Returns a string in Clustal format.
    
        - aln: can be an Alignment object or a dict.
        - interleave_len: sequence line width.  Only available if sequences are
            aligned.
    """
    if not aln:
        return ''
    
     # get seq output order
    try:
        order = aln.RowOrder
    except:
        order = aln.keys()
        order.sort()
    
    seqs = SequenceCollection(aln)
    clustal_list = ["CLUSTAL\n"]
    
    if seqs.isRagged():
        raise ValueError,\
             "Sequences in alignment are not all the same length." +\
             "Cannot generate Clustal format."
    
    aln_len = seqs.SeqLen
    #Get all labels
    labels = copy(seqs.Names)
    
    #Find all label lengths in order to get padding.
    label_lengths = [len(l) for l in labels]
    label_max = max(label_lengths)
    max_spaces = label_max+4
    
    #Get ordered seqs
    ordered_seqs = [seqs.NamedSeqs[label] for label in order]
    
    if interleave_len is not None:
        curr_ix = 0
        while curr_ix < aln_len:
            clustal_list.extend(["%s%s%s"%(x,' '*(max_spaces-len(x)),\
                y[curr_ix:curr_ix+ \
                interleave_len]) for x,y in zip(order,ordered_seqs)])
            clustal_list.append("")
            curr_ix += interleave_len
    else:
        clustal_list.extend(["%s%s%s"%(x,' '*(max_spaces-len(x)),y) \
            for x,y in zip(order,ordered_seqs)])
        clustal_list.append("")
    
    return '\n'.join(clustal_list)    
        
        
Exemplo n.º 2
0
def clustal_from_alignment(aln, interleave_len=None):
    """Returns a string in Clustal format.
    
        - aln: can be an Alignment object or a dict.
        - interleave_len: sequence line width.  Only available if sequences are
            aligned.
    """
    if not aln:
        return ''
    
     # get seq output order
    try:
        order = aln.RowOrder
    except:
        order = aln.keys()
        order.sort()
    
    seqs = SequenceCollection(aln)
    clustal_list = ["CLUSTAL\n"]
    
    if seqs.isRagged():
        raise ValueError,\
             "Sequences in alignment are not all the same length." +\
             "Cannot generate Clustal format."
    
    aln_len = seqs.SeqLen
    #Get all labels
    labels = copy(seqs.Names)
    
    #Find all label lengths in order to get padding.
    label_lengths = [len(l) for l in labels]
    label_max = max(label_lengths)
    max_spaces = label_max+4
    
    #Get ordered seqs
    ordered_seqs = [seqs.NamedSeqs[label] for label in order]
    
    if interleave_len is not None:
        curr_ix = 0
        while curr_ix < aln_len:
            clustal_list.extend(["%s%s%s"%(x,' '*(max_spaces-len(x)),\
                y[curr_ix:curr_ix+ \
                interleave_len]) for x,y in zip(order,ordered_seqs)])
            clustal_list.append("")
            curr_ix += interleave_len
    else:
        clustal_list.extend(["%s%s%s"%(x,' '*(max_spaces-len(x)),y) \
            for x,y in zip(order,ordered_seqs)])
        clustal_list.append("")
    
    return '\n'.join(clustal_list)    
Exemplo n.º 3
0
def stockholm_from_alignment(aln, interleave_len=None, GC_annotation=None):
    """Returns a string in Stockholm format.
    
        - aln: can be an Alignment object or a dict.
        - interleave_len: sequence line width.  Only available if sequences are
            aligned.
        - GC_annotation: dict containing Per-column annotation {<tag>:<s>},
            added to Stockholm file in the following format: #=GC <tag> <s>
            - <s> is an aligned text line of annotation type <tag>.
            - #=GC lines are associated with a sequence alignment block;
            - <s> is aligned to the residues in the alignment block, and has the 
            same length as the rest of the block. #=GC lines are
            placed at the end of each block. 
    """
    if not aln:
        return ""

    # get seq output order
    try:
        order = aln.RowOrder
    except:
        order = aln.keys()
        order.sort()

    seqs = SequenceCollection(aln)
    stockholm_list = ["# STOCKHOLM 1.0\n"]

    if seqs.isRagged():
        raise ValueError, "Sequences in alignment are not all the same length." + "Cannot generate Stockholm format."

    aln_len = seqs.SeqLen
    # Get all labels
    labels = copy(seqs.Names)

    # Get ordered seqs
    ordered_seqs = [seqs.NamedSeqs[label] for label in order]

    if GC_annotation is not None:
        GC_annotation_list = [(k, GC_annotation[k]) for k in sorted(GC_annotation.keys())]
        # Add GC_annotation to list of labels.
        labels.extend(["#=GC " + k for k in GC_annotation.keys()])
        for k, v in GC_annotation.items():
            if len(v) != aln_len:
                raise ValueError, """GC annotation %s is not same length as alignment. Cannot generate Stockholm format.""" % (
                    k
                )

    # Find all label lengths in order to get padding.
    label_lengths = [len(l) for l in labels]
    label_max = max(label_lengths)
    max_spaces = label_max + 4

    if interleave_len is not None:
        curr_ix = 0
        while curr_ix < aln_len:
            stockholm_list.extend(
                [
                    "%s%s%s" % (x, " " * (max_spaces - len(x)), y[curr_ix : curr_ix + interleave_len])
                    for x, y in zip(order, ordered_seqs)
                ]
            )
            if GC_annotation is not None:
                stockholm_list.extend(
                    [
                        "#=GC %s%s%s" % (x, " " * (max_spaces - len(x) - 5), y[curr_ix : curr_ix + interleave_len])
                        for x, y in GC_annotation_list
                    ]
                )
            stockholm_list.append("")
            curr_ix += interleave_len
    else:
        stockholm_list.extend(["%s%s%s" % (x, " " * (max_spaces - len(x)), y) for x, y in zip(order, ordered_seqs)])
        if GC_annotation is not None:
            stockholm_list.extend(
                ["#=GC %s%s%s" % (x, " " * (max_spaces - len(x) - 5), y) for x, y in GC_annotation_list]
            )
        stockholm_list.append("")

    return "\n".join(stockholm_list) + "//"
Exemplo n.º 4
0
def stockholm_from_alignment(aln, interleave_len=None, GC_annotation=None):
    """Returns a string in Stockholm format.
    
        - aln: can be an Alignment object or a dict.
        - interleave_len: sequence line width.  Only available if sequences are
            aligned.
        - GC_annotation: dict containing Per-column annotation {<tag>:<s>},
            added to Stockholm file in the following format: #=GC <tag> <s>
            - <s> is an aligned text line of annotation type <tag>.
            - #=GC lines are associated with a sequence alignment block;
            - <s> is aligned to the residues in the alignment block, and has the 
            same length as the rest of the block. #=GC lines are
            placed at the end of each block. 
    """
    if not aln:
        return ''

    # get seq output order
    try:
        order = aln.RowOrder
    except:
        order = aln.keys()
        order.sort()

    seqs = SequenceCollection(aln)
    stockholm_list = ["# STOCKHOLM 1.0\n"]

    if seqs.isRagged():
        raise ValueError,\
             "Sequences in alignment are not all the same length." +\
             "Cannot generate Stockholm format."

    aln_len = seqs.SeqLen
    #Get all labels
    labels = copy(seqs.Names)

    #Get ordered seqs
    ordered_seqs = [seqs.NamedSeqs[label] for label in order]

    if GC_annotation is not None:
        GC_annotation_list = \
            [(k,GC_annotation[k]) for k in sorted(GC_annotation.keys())]
        #Add GC_annotation to list of labels.
        labels.extend(['#=GC ' + k for k in GC_annotation.keys()])
        for k, v in GC_annotation.items():
            if len(v) != aln_len:
                raise ValueError, """GC annotation %s is not same length as alignment. Cannot generate Stockholm format.""" % (
                    k)

    #Find all label lengths in order to get padding.
    label_lengths = [len(l) for l in labels]
    label_max = max(label_lengths)
    max_spaces = label_max + 4

    if interleave_len is not None:
        curr_ix = 0
        while curr_ix < aln_len:
            stockholm_list.extend(["%s%s%s"%(x,' '*(max_spaces-len(x)),\
                y[curr_ix:curr_ix+ \
                interleave_len]) for x,y in zip(order, ordered_seqs)])
            if GC_annotation is not None:
                stockholm_list.extend(["#=GC %s%s%s"%(x,\
                    ' '*(max_spaces-len(x)-5),\
                    y[curr_ix:curr_ix + interleave_len]) for x,y in\
                    GC_annotation_list])
            stockholm_list.append("")
            curr_ix += interleave_len
    else:
        stockholm_list.extend(["%s%s%s"%(x,' '*(max_spaces-len(x)),y) \
            for x,y in zip(order, ordered_seqs)])
        if GC_annotation is not None:
            stockholm_list.extend(["#=GC %s%s%s"%(x,' '*(max_spaces-len(x)-5),\
                y) for x,y in GC_annotation_list])
        stockholm_list.append("")

    return '\n'.join(stockholm_list) + '//'