Пример #1
0
    def __call__(self, read1, read2):
        len1 = len(read1.sequence)
        len2 = len(read2.sequence)
        min_overlap = self.min_overlap
        if min_overlap <= 1:
            min_overlap = max(2, round(self.min_overlap * min(len1, len2)))

        if len1 < min_overlap or len2 < min_overlap:
            return (read1, read2)

        insert_matched = read1.insert_overlap and read2.insert_overlap

        if insert_matched:
            # If we've already determined that there is an insert overlap
            # with a 3' overhang, we can constrain our alignment
            aflags = START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2
        else:
            aflags = SEMIGLOBAL
        # align read1 to read2 reverse-complement to be compatible with
        # InsertAligner
        read2_rc = reverse_complement(read2.sequence)
        aligner = Aligner(read2_rc, self.error_rate, aflags)
        alignment = aligner.locate(read1.sequence)

        if alignment:
            r2_start, r2_stop, r1_start, r1_stop, matches, errors = alignment
            if matches >= min_overlap:
                # Only correct errors if we haven't already done correction in
                # the InsertAligner
                if (self.mismatch_action and errors > 0 and not insert_matched
                        and read1.corrected == 0 and read2.corrected == 0):
                    self.correct_errors(read1, read2, alignment)

                if r2_start == 0 and r2_stop == len2:
                    # r2 is fully contained in r1
                    pass
                elif r1_start == 0 and r1_stop == len1:
                    # r1 is fully contained in r2
                    read1.sequence = read2_rc
                    read1.qualities = "".join(reversed(read2.qualities))
                elif r1_start > 0:
                    read1.sequence += read2_rc[r2_stop:]
                    if read1.qualities and read2.qualities:
                        read1.qualities += "".join(reversed(
                            read2.qualities))[r2_stop:]
                elif r2_start > 0:
                    read1.sequence = read2_rc + read1.sequence[r1_stop:]
                    if read1.qualities and read2.qualities:
                        read1.qualities = ("".join(reversed(read2.qualities)) +
                                           read1.qualities[r1_stop:])
                else:
                    raise AtroposError(
                        "Invalid alignment while trying to merge read "
                        "{}: {}".format(read1.name,
                                        ",".join(str(i) for i in alignment)))

                read1.merged = True
                read2 = None

        return (read1, read2)
Пример #2
0
 def __call__(self, read1, read2):
     len1 = len(read1.sequence)
     len2 = len(read2.sequence)
     min_overlap = self.min_overlap
     if min_overlap <= 1:
         min_overlap = max(2, round(self.min_overlap * min(len1, len2)))
     
     if len1 < min_overlap or len2 < min_overlap:
         return (read1, read2)
     
     insert_matched = read1.insert_overlap and read2.insert_overlap
     
     if insert_matched:
         # If we've already determined that there is an insert overlap
         # with a 3' overhang, we can constrain our alignment
         aflags = START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2
     else:
         aflags = SEMIGLOBAL
     # align read1 to read2 reverse-complement to be compatible with
     # InsertAligner
     read2_rc = reverse_complement(read2.sequence)
     aligner = Aligner(read2_rc, self.error_rate, aflags)
     alignment = aligner.locate(read1.sequence)
     
     if alignment:
         r2_start, r2_stop, r1_start, r1_stop, matches, errors = alignment
         if matches >= min_overlap:
             # Only correct errors if we haven't already done correction in
             # the InsertAligner
             if self.mismatch_action and errors > 0 and not insert_matched:
                 self.correct_errors(read1, read2, alignment)
             
             if r2_start == 0 and r2_stop == len2:
                 # r2 is fully contained in r1
                 pass
             elif r1_start == 0 and r1_stop == len1:
                 # r1 is fully contained in r2
                 read1.sequence = read2_rc
                 read1.qualities = "".join(reversed(read2.qualities))
             elif r1_start > 0:
                 read1.sequence += read2_rc[r2_stop:]
                 if read1.qualities and read2.qualities:
                     read1.qualities += "".join(
                         reversed(read2.qualities))[r2_stop:]
             elif r2_start > 0:
                 read1.sequence = read2_rc + read1.sequence[r1_stop:]
                 if read1.qualities and read2.qualities:
                     read1.qualities = (
                         "".join(reversed(read2.qualities)) +
                         read1.qualities[r1_stop:])
             else:
                 raise AtroposError(
                     "Invalid alignment while trying to merge read "
                     "{}: {}".format(
                         read1.name, ",".join(str(i) for i in alignment)))
             
             read1.merged = True
             read2 = None
             
     return (read1, read2)
Пример #3
0
def align(seq1, seq2, min_overlap_frac=0.9):
    aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False)
    aligner.min_overlap = math.ceil(min(len(seq1), len(seq2)) * min_overlap_frac)
    aligner.indel_cost = 100000
    match = aligner.locate(seq2)
    if match:
        return seq1[match[0] : match[1]]
    else:
        return None
Пример #4
0
def align(seq1, seq2, min_overlap_frac=0.9):
    aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False)
    aligner.min_overlap = math.ceil(
        min(len(seq1), len(seq2)) * min_overlap_frac)
    aligner.indel_cost = 100000
    match = aligner.locate(seq2)
    if match:
        return seq1[match[0]:match[1]]
    else:
        return None
Пример #5
0
def align(seq1, seq2, min_overlap_frac=0.9):
    """Align two sequences.

    Args:
        seq1: The second sequence to align.
        seq2: The first sequence to align.
        min_overlap_frac: Minimum fraction of overlapping bases required for a
            match.

    Returns:
        The matching portion of the sequence.
    """
    aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False)
    aligner.min_overlap = math.ceil(
        min(len(seq1), len(seq2)) * min_overlap_frac)
    aligner.indel_cost = 100000
    match = aligner.locate(seq2)
    if match:
        return seq1[match[0]:match[1]]
    else:
        return None
Пример #6
0
def align(seq1, seq2, min_overlap_frac=0.9):
    """Align two sequences.
    
    Args:
        seq1, seq2: The sequences to align.
        min_overlap_frac: Minimum fraction of overlapping bases required for a
            match.
    
    Returns:
        The matching portion of the sequence.
    """
    aligner = Aligner(
        seq1, 0.0,
        SEMIGLOBAL,
        False, False)
    aligner.min_overlap = math.ceil(
        min(len(seq1), len(seq2)) * min_overlap_frac)
    aligner.indel_cost = 100000
    match = aligner.locate(seq2)
    if match:
        return seq1[match[0]:match[1]]
    else:
        return None
Пример #7
0
 def test_100_percent_error_rate(self):
     reference = 'GCTTAGACATATC'
     aligner = Aligner(reference, 1.0, flags=BACK)
     aligner.locate('CAA')
Пример #8
0
 def test(self):
     reference = 'CTCCAGCTTAGACATATC'
     aligner = Aligner(reference, 0.1, flags=BACK)
     aligner.locate('CC')
Пример #9
0
 def test(self):
     reference = 'CTCCAGCTTAGACATATC'
     aligner = Aligner(reference, 0.1, flags=BACK)
     aligner.locate('CC')
Пример #10
0
 def test_100_percent_error_rate(self):
     reference = 'GCTTAGACATATC'
     aligner = Aligner(reference, 1.0, flags=BACK)
     aligner.locate('CAA')