Exemplo n.º 1
0
    def __init__(self, consensus_string: str, min_match_length: int, alignment: MSA):
        self._match_intervals: Intervals = list()
        self._non_match_intervals: Intervals = list()
        self.mml = min_match_length

        if len(consensus_string) < self.mml:
            # In this case, a match of less than the min_match_length gets counted
            # as a match (usually, it counts as a non_match)
            it_type = IntervalType.Match
            if any(map(is_non_match, consensus_string)):
                it_type = IntervalType.NonMatch
            self._append(Interval(it_type, 0, len(consensus_string) - 1))

        else:
            cur_interval = self._new_interval(consensus_string[0], 0)

            for i, letter in enumerate(consensus_string[1:], start=1):
                if is_type(letter, cur_interval.type):
                    cur_interval.modify_by(0, 1)  # simple interval extension
                else:
                    new_interval = self._add_interval(cur_interval, alignment)
                    if new_interval is None:
                        cur_interval = self._new_interval(letter, i)
                    else:
                        cur_interval = new_interval
            self._add_interval(cur_interval, alignment, end=True)

        self.enforce_multisequence_nonmatch_intervals(
            self._match_intervals, self._non_match_intervals, alignment
        )
        self.enforce_alignment_interval_bijection(
            self._match_intervals,
            self._non_match_intervals,
            alignment.get_alignment_length(),
        )
Exemplo n.º 2
0
 def get_sub_alignment_by_list_id(
     self, id_list: List[str], alignment: MSA, interval=None
 ):
     list_records = [record for record in alignment if record.id in id_list]
     sub_alignment = MSA(list_records)
     if interval is not None:
         sub_alignment = sub_alignment[:, interval[0] : interval[1] + 1]
     return sub_alignment
Exemplo n.º 3
0
def make_alignment(seqs: List[str], ids: List[str] = None) -> MSA:
    seq_lengths = set(map(len, seqs))
    assert (
        len(seq_lengths) == 1
    ), "Sequences are not the same length, does not represent an alignment"
    if ids is None:
        seqrecords = [
            SeqRecord(Seq(seq), id=f"s{i}") for i, seq in enumerate(seqs)
        ]
    else:
        seqrecords = [SeqRecord(Seq(seq), id=ID) for seq, ID in zip(seqs, ids)]
    return MSA(seqrecords)
Exemplo n.º 4
0
 def get_consensus(cls, alignment: MSA):
     """Produces a 'consensus string' from an MSA: at each position of the
     MSA, the string has a base if all aligned sequences agree, and a "*" if not.
     IUPAC ambiguous bases result in non-consensus and are later expanded in the prg.
     N results in consensus at that position unless they are all N."""
     consensus_string = ""
     for i in range(alignment.get_alignment_length()):
         column = set([record.seq[i] for record in alignment])
         column = column.difference({"N"})
         if (len(ambiguous_bases.intersection(column)) > 0
                 or len(column) != 1 or column == {"-"}):
             consensus_string += NONMATCH
         else:
             consensus_string += column.pop()
     return consensus_string