Пример #1
0
 def testOmicronPartialInsertionRead(self):
     """
     A query that overlaps part of an insertion in the reference must be
     handled correctly.
     """
     self.assertEqual(
         12,
         matchOffset(
             'TAATTTAGTGCG---------TGATCTCCCTCAGGGTTTTTCGGCTTTAGAAC',
             '             AGCCAGAATGATCTCCCTCAGGGTTTTTCGGCTTT'))
Пример #2
0
 def testQueryPaddedLeftByFiveReferenceOneGapLeftWithGaps(self):
     """
     A query that is padded on the left by five spaces must correctly match
     a reference that starts with a gap and that contains gaps.
     """
     self.assertEqual(2, matchOffset('-AA--G', '     A'))
Пример #3
0
 def testQueryPaddedLeftByTwoReferencePaddedLeftByOne(self):
     """
     A query that is padded on the left by two spaces must match a reference
     that is padded on the left by one space at position 1.
     """
     self.assertEqual(1, matchOffset(' AA', '  A'))
Пример #4
0
 def testQueryPaddedLeftByFiveReferencePaddedLeftByOneWithGaps(self):
     """
     A query that is padded on the left by five spaces must correctly match
     a reference containing gaps that is padded on the left by one space.
     """
     self.assertEqual(2, matchOffset(' AA--G', '     A'))
Пример #5
0
 def testQueryPaddedLeftByTwo(self):
     """
     A query that is padded on the left by two spaces must match at
     position 2.
     """
     self.assertEqual(2, matchOffset('AAA', '  A'))
Пример #6
0
 def testQueryPaddedLeftByOne(self):
     """
     A query that is padded on the left by one space must match at
     position 1.
     """
     self.assertEqual(1, matchOffset('AA', ' A'))
Пример #7
0
 def testEqualStrings(self):
     """
     An non-empty reference must match an identical non-empty query at
     position 0.
     """
     self.assertEqual(0, matchOffset('AA', 'AA'))
Пример #8
0
 def testEmpty(self):
     """
     An empty query must match an empty reference at position 0.
     """
     self.assertEqual(0, matchOffset('', ''))
Пример #9
0
def makeBAM(template, bamReferences=None, fastaReferences=None):
    """
    A context manager decorator to make a simple BAM file from a template.
    Note that this code invokes samtools.

    @param template: An iterable of C{str} sequences. The first will be treated
        as the reference, and then subsequent pairs (if any) will be treated as
        read and quality strings. Reads and quality strings can be indented
        with spaces to show where the read aligns with the reference.
    @return: A context manager that produces a 2-tuple containing the reference
        C{DNARead} instance and the C{Path} of the BAM file.
    """
    if len(template) % 2 != 1:
        raise ValueError(
            'The template must have an odd number of strings, specifying the '
            'reference sequence, then zero or more read/quality pairs.')

    leftPaddedReference = template[0]
    templateSequence = leftPaddedReference.lstrip().replace('-', '')

    if bamReferences is None:
        matchedReference = DNARead(REF_ID, templateSequence)
        bamReferences = Reads([matchedReference])
    else:
        matchedReference = bamReferences[0]
        # Sanity check: The first BAM reference must have the same sequence
        # as the template.
        assert matchedReference.sequence == templateSequence
        bamReferences = Reads(bamReferences)

    fastaReferences = Reads(
        bamReferences if fastaReferences is None else fastaReferences)

    nSeqs = (len(template) - 1) >> 1
    dirname = mkdtemp(prefix='test-consensus-')
    e = Executor()

    try:
        fastaFile = Path(dirname) / 'references.fasta'
        samFile = Path(dirname) / 'file.sam'
        bamFile = Path(dirname) / 'file.bam'

        fastaReferences.save(fastaFile)

        with open(samFile, 'w') as fp:
            for reference in bamReferences:
                print(f'@SQ\tSN:{reference.id}\tLN:{len(reference)}', file=fp)

            for count in range(nSeqs):
                leftPaddedQuery = template[count * 2 + 1].rstrip()
                leftPaddedQuality = template[count * 2 + 2].rstrip()
                assert len(leftPaddedQuery) == len(leftPaddedQuality)
                query = leftPaddedQuery.lstrip()
                quality = leftPaddedQuality.lstrip()
                queryNoGaps = qualityNoGaps = ''
                for queryBase, qualityBase in zip(query, quality):
                    if queryBase != '-':
                        queryNoGaps += queryBase
                        qualityNoGaps += qualityBase

                print(
                    '\t'.join(
                        map(
                            str,
                            (
                                f'read{count}',  # QNAME (query name)
                                0,  # FLAGS
                                matchedReference.id,  # RNAME (reference name)
                                matchOffset(leftPaddedReference,
                                            leftPaddedQuery) + 1,
                                30,  # MAPQ (mapping quality)
                                makeCigar(leftPaddedReference,
                                          leftPaddedQuery),  # CIGAR
                                '*',  # MRNM (mate reference name)
                                0,  # MPOS (mate position)
                                0,  # ISIZE (insert size)
                                queryNoGaps,  # SEQ
                                qualityNoGaps,  # QUAL
                            ))),
                    file=fp)

        e.execute(f'samtools sort -O BAM --write-index -o {str(bamFile)!r} '
                  f'{str(samFile)!r}')
        yield (fastaFile, bamFile)
    finally:
        # import sys; print(f'{samFile}', file=sys.stderr)
        e.execute(f'rm -fr {dirname!r}')