示例#1
0
def test_nocall():
    # Intentionally mismatched
    qfile = data_file('phony-deletion-01.contig.fa')
    tfile = data_file('phony-insertion-01.gdna.fa')

    qinstream = kevlar.parse_augmented_fastx(kevlar.open(qfile, 'r'))
    query = [record for record in qinstream][0]
    tinstream = kevlar.reference.load_refr_cutouts(kevlar.open(tfile, 'r'))
    target = [record for record in tinstream][0]

    aln = VariantMapping(query, target, 1e6, '25D5M22I5M46D8M13D2M35I')
    assert aln.offset is None
    assert aln.targetshort is None
    assert aln.match is None
    assert aln.leftflank is None
    assert aln.indel is None
    assert aln.indeltype is None
    assert aln.rightflank is None

    variants = list(aln.call_variants(21))
    assert len(variants) == 1
    assert variants[0].vcf == (
        'yourchr\t801\t.\t.\t.\t.\tInscrutableCigar\t'
        'CIGAR=25D5M22I5M46D8M13D2M35I;KSW2=1000000.0;CONTIG=AACTGGTGGGCTCAAGA'
        'CTAAAAAGACTTTTTTGGTGACAAGCAGGGCGGCCTGCCCTTCCTGTAGTGCAAGAAAAT')
示例#2
0
def test_call_near_end(query, target, dist, n, trimcount):
    contig = next(
        kevlar.parse_augmented_fastx(kevlar.open(data_file(query), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(kevlar.open(data_file(target),
                                                       'r')))
    aln = VariantMapping(contig, cutout)
    calls = list(aln.call_variants(31, mindist=dist))
    assert len(calls) == n
    assert aln.trimmed == trimcount
示例#3
0
def test_call_num_interesting_kmers():
    contig = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('iktest.contig.fa'), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(
            kevlar.open(data_file('iktest.gdna.fa'), 'r')))
    aln = VariantMapping(contig, cutout)
    calls = list(aln.call_variants(29))
    assert len(calls) == 1
    assert calls[0].attribute('IKMERS') == '1'
示例#4
0
def test_no_margin(query, target, refr, alt):
    contig = next(
        kevlar.parse_augmented_fastx(kevlar.open(data_file(query), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(kevlar.open(data_file(target),
                                                       'r')))
    aln = VariantMapping(contig, cutout)
    calls = list(aln.call_variants(31))
    assert len(calls) == 1
    assert calls[0].filterstr == 'PASS'
    assert calls[0]._refr == refr
    assert calls[0]._alt == alt
示例#5
0
def test_passenger_screen():
    contig = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('wasp-pass.contig.augfasta'), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(
            kevlar.open(data_file('wasp.gdna.fa'), 'r')))
    aln = VariantMapping(contig, cutout)
    calls = list(aln.call_variants(29))
    assert len(calls) == 2
    assert calls[0].filterstr == 'PASS'
    assert calls[1].filterstr == 'PassengerVariant'
示例#6
0
def test_variant_window(prefix, cigar, refrwindow, altwindow):
    qfile = data_file(prefix + '.contig.fa')
    tfile = data_file(prefix + '.gdna.fa')

    qinstream = kevlar.parse_augmented_fastx(kevlar.open(qfile, 'r'))
    query = [record for record in qinstream][0]
    tinstream = kevlar.reference.load_refr_cutouts(kevlar.open(tfile, 'r'))
    target = [record for record in tinstream][0]

    aln = VariantMapping(query, target, 1e6, cigar)
    variants = list(aln.call_variants(21))
    assert len(variants) == 1
    assert variants[0].window == altwindow
    assert variants[0].refrwindow == refrwindow
示例#7
0
def test_call_ssc_1bpdel():
    """Test 1bp deletion"""
    qfile = data_file('ssc218.contig.augfasta')
    tfile = data_file('ssc218.gdna.fa')

    qinstream = kevlar.parse_augmented_fastx(kevlar.open(qfile, 'r'))
    query = [record for record in qinstream][0]
    tinstream = kevlar.reference.load_refr_cutouts(kevlar.open(tfile, 'r'))
    target = [record for record in tinstream][0]
    aln = VariantMapping(query, target, 1e6, '50D132M1D125M50D')
    variants = list(aln.call_variants(31))

    assert len(variants) == 1
    assert str(variants[0]) == '6:23230160:1D'
示例#8
0
def test_drop_numerous_mismatches():
    contig = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('drop-polysnp-contig.augfasta'), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(
            kevlar.open(data_file('drop-polysnp-gdna.fa'), 'r')))
    aln = VariantMapping(contig, cutout)
    calls = list(aln.call_variants(21))
    for c in calls:
        print(c.vcf)
    assert len(calls) == 1
    assert calls[0].filterstr == 'NumerousMismatches'
    assert calls[0]._refr == '.'
    assert calls[0]._alt == '.'
示例#9
0
文件: call.py 项目: gsc0107/kevlar
def prelim_call(targetlist, querylist, match=1, mismatch=2, gapopen=5,
                gapextend=0, ksize=31, refrfile=None, debug=False, mindist=5,
                logstream=sys.stderr):
    """Implement the `kevlar call` procedure as a generator function."""
    for query in sorted(querylist, reverse=True, key=len):
        alignments = list()
        for target in sorted(targetlist, key=lambda cutout: cutout.defline):
            mapping = VariantMapping(
                query, target, match=match, mismatch=mismatch, gapopen=gapopen,
                gapextend=gapextend
            )
            alignments.append(mapping)
        aligns2report = alignments_to_report(alignments)
        if len(aligns2report) > 1:
            if refrfile and len(query.mates) > 0:
                mate_pos = list(align_mates(query, refrfile))
                if len(mate_pos) > 0:
                    for aln in aligns2report:
                        aln.matedist = mate_distance(mate_pos, aln.interval)
                    aligns2report.sort(key=lambda aln: aln.matedist)

        for n, alignment in enumerate(aligns2report):
            if debug:
                print('DEBUG ', alignment.cutout.defline, ' vs ',
                      alignment.contig.name, '\n', str(alignment), sep='',
                      end='\n\n', file=logstream)
            for varcall in alignment.call_variants(ksize, mindist, logstream):
                if alignment.matedist:
                    varcall.annotate('MATEDIST', alignment.matedist)
                yield varcall
示例#10
0
def test_call_ssc_two_proximal_snvs():
    """Test two proximal SNVs

    Currently this serves as a negative control for calling isolated SNVs, but
    distinguishing which (if any) of a set of proximal SNVs is novel will be
    supported soon, and this test will need to be updated.
    """
    qfile = data_file('ssc107.contig.augfasta.gz')
    tfile = data_file('ssc107.gdna.fa.gz')

    qinstream = kevlar.parse_augmented_fastx(kevlar.open(qfile, 'r'))
    query = [record for record in qinstream][0]
    tinstream = kevlar.reference.load_refr_cutouts(kevlar.open(tfile, 'r'))
    target = [record for record in tinstream][0]

    aln = VariantMapping(query, target, 1e6, '25D263M25D')
    variants = list(aln.call_variants(31))
    assert len(variants) == 2
示例#11
0
def test_call_truncated_windows(query, target, vw, rw):
    contig = next(
        kevlar.parse_augmented_fastx(kevlar.open(data_file(query), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(kevlar.open(data_file(target),
                                                       'r')))
    aln = VariantMapping(contig, cutout)
    if aln.vartype == 'snv':
        assert aln.leftflank is None
        assert aln.indeltype is None
        assert aln.indel is None
        assert aln.rightflank is None

    calls = list(aln.call_variants(31))
    assert len(calls) == 1
    print('VW:', calls[0].window, file=sys.stderr)
    print('RW:', calls[0].refrwindow, file=sys.stderr)
    assert calls[0].window == vw
    assert calls[0].refrwindow == rw
示例#12
0
def test_call_indel_snv():
    contig = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('indel-snv.contig.augfasta'), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(
            kevlar.open(data_file('indel-snv.gdna.fa'), 'r')))
    aln = VariantMapping(contig, cutout)
    calls = list(aln.call_variants(31))
    assert len(calls) == 2

    assert calls[0]._refr == 'CA'
    assert calls[0]._alt == 'C'
    assert calls[0]._pos == 501 - 1

    assert calls[1]._refr == 'C'
    assert calls[1]._alt == 'A'
    assert calls[1]._pos == 474 - 1

    calls = list(aln.call_variants(31, mindist=None))
    assert len(calls) == 2
示例#13
0
def test_variant_mapping():
    contig = screed.Record(
        name='contig1',
        sequence='CCTGAGCCCTCTCAAGTCGGGTCCTGGCCCGGTCTGCCCATGAGGCTGGGCCTGAGCCCC'
    )
    cutout = kevlar.reference.ReferenceCutout(
        defline='chr1_10000-10060',
        sequence='CCTGAGCCCTCTCAAGTCGGGTCCTGGCCCAGTCTGCCCATGAGGCTGGGCCTGAGCCCC'
    )
    mapping = VariantMapping(contig, cutout, score=1e6, cigar='60M')

    assert mapping.seqid == 'chr1'
    assert mapping.interval == ('chr1', 10000, 10060)
示例#14
0
def test_varmap_str():
    contig = next(
        kevlar.parse_augmented_fastx(
            kevlar.open(data_file('wasp-pass.contig.augfasta'), 'r')))
    cutout = next(
        kevlar.reference.load_refr_cutouts(
            kevlar.open(data_file('wasp.gdna.fa'), 'r')))
    aln = VariantMapping(contig, cutout)
    alignstr = kevlar.open(data_file('wasp-align.txt'), 'r').read().strip()

    print(str(aln), file=sys.stderr)
    print(alignstr, file=sys.stderr)

    assert str(aln) == alignstr
示例#15
0
文件: call.py 项目: kevlar-dev/kevlar
def prelim_call(targetlist,
                querylist,
                partid=None,
                match=1,
                mismatch=2,
                gapopen=5,
                gapextend=0,
                ksize=31,
                refrfile=None,
                debug=False,
                mindist=5,
                homopolyfilt=True,
                maxtargetlen=10000):
    """Implement the `kevlar call` procedure as a generator function."""
    for query in sorted(querylist, reverse=True, key=len):
        alignments = list()
        for target in sorted(targetlist, key=lambda cutout: cutout.defline):
            nocall = False
            if maxtargetlen and len(target) > maxtargetlen:
                nocall = True
            mapping = VariantMapping(
                query,
                target,
                match=match,
                mismatch=mismatch,
                gapopen=gapopen,
                gapextend=gapextend,
                homopolyfilt=homopolyfilt,
                nocall=nocall,
            )
            alignments.append(mapping)
        aligns2report = alignments_to_report(alignments)
        for n, alignment in enumerate(aligns2report):
            if debug:
                kevlar.plog(
                    'DEBUG ',
                    alignment.cutout.defline,
                    ' vs ',
                    alignment.contig.name,
                    '\n',
                    str(alignment),
                    sep='',
                    end='\n\n',
                )
            for varcall in alignment.call_variants(ksize, mindist):
                if partid is not None:
                    varcall.annotate('PART', partid)
                yield varcall