Пример #1
0
def load_weighted_msa(work_msa):
    """
    The given multiple sequence alignment (MSA) should contain the reference 
    sequence.  The reference will be removed from the alignment and returned 
    separately.  The alignment will also be changed such that "." is used to 
    indicate terminal deletions while "-" is used to indicate internal 
    deletions.
    """
    msa_with_ref = AlignIO.read(work_msa.output_aln, 'clustal')

    ref = None
    msa = MultipleSeqAlignment([])

    for record in msa_with_ref:
        # Use "." to indicate terminal mismatches, and "-" to indicate internal
        # mismatches.

        to_dots = lambda m: '.' * (m.end() - m.start())
        record.seq = Seq(
            re.sub('^-+|-+$', to_dots, str(record.seq)),
            record.seq.alphabet,
        )

        if record.id == work_msa.shared.target_id:
            ref = record
        else:
            msa.append(record)

    msa.ref = ref
    msa.ref_ungapped = remove_gaps(ref.seq)

    weight_alignments(msa)

    return msa