Exemplo n.º 1
0
def test_search_multi():
    rsidlist = ['rs60995877']
    vcffile = data_file('chr9-multi.vcf.gz')
    idxfile = data_file('chr9-multi.rsidx')
    conn = sqlite3.connect(idxfile)
    outlines = list(rsidx.search.search(rsidlist, conn, vcffile))
    assert len(outlines) == 7
    for line in outlines:
        assert line.split('\t')[2] == 'rs60995877'
Exemplo n.º 2
0
def test_search_overlapping_variants(doheader, numlines):
    rsidlist = ['rs8051733']
    vcffile = data_file('overlap.vcf.gz')
    idxfile = data_file('overlap.sqlite3')
    conn = sqlite3.connect(idxfile)
    outlines = list(rsidx.search.search(rsidlist, conn, vcffile, doheader))
    assert len(outlines) == numlines
    assert '\trs8051733\t' in outlines[-1]
    assert '\trs967556605\t' not in outlines[-1]
Exemplo n.º 3
0
def test_search_multiple_rsids_single_query():
    for rsidlist in [['rs72634902'], ['rs145742571']]:
        vcffile = data_file('multiple_id.vcf.gz')
        idxfile = data_file('multiple_id.rsidx')
        conn = sqlite3.connect(idxfile)
        outlines = list(rsidx.search.search(rsidlist, conn, vcffile))
        assert len(outlines) == 1
        assert outlines[0].startswith(
            '1\t1900106\trs72634902;rs145742571\tT\tC,TCTC')
        conn.close()
Exemplo n.º 4
0
def test_search_missing_rsid(capsys):
    rsidlist = [123456789]
    vcffile = data_file('chr17-sample.vcf.gz')
    idxfile = data_file('chr17-sample.rsidx')
    conn = sqlite3.connect(idxfile)
    outlines = list(rsidx.search.search(rsidlist, conn, vcffile))
    assert len(outlines) == 0
    conn.close()
    terminal = capsys.readouterr()
    assert '[rsidx::search] WARNING: no rsID matches' in terminal.err
Exemplo n.º 5
0
def test_search_stdout(capsys):
    arglist = [
        'search',
        data_file('chr17-sample.vcf.gz'),
        data_file('chr17-sample.rsidx'), 'rs1472751972', 'rs1287502205',
        'rs897983471', 'rs1172219431', 'rs189123651'
    ]
    args = rsidx.cli.get_parser().parse_args(arglist)
    rsidx.search.main(args)
    terminal = capsys.readouterr()
    outlines = terminal.out.strip().split('\n')
    assert len(outlines) == 5
Exemplo n.º 6
0
def test_search_bad_rsids():
    rsidlist = [
        'rs538736078',  # replaced by . in VCF
        'rs547329663',  # replaced by bogus ID in VCF
        'rs1440788236',  # valid RSID not present in VCF
        'rs1234497371',  # valid RSID present in VCF
    ]
    vcffile = data_file('chr4-sample-corrupted-ids.vcf.gz')
    idxfile = data_file('chr4-sample-corrupted-ids.rsidx')
    conn = sqlite3.connect(idxfile)
    outlines = list(rsidx.search.search(rsidlist, conn, vcffile))
    assert len(outlines) == 1
    assert outlines[0].startswith('4\t218446\trs1234497371\tC\tCA,CAA')
    conn.close()
Exemplo n.º 7
0
def test_search(rsidlist):
    vcffile = data_file('chr17-sample.vcf.gz')
    idxfile = data_file('chr17-sample.rsidx')
    conn = sqlite3.connect(idxfile)
    outlines = list(rsidx.search.search(rsidlist, conn, vcffile))
    assert len(outlines) == 5
    outdata = [line.split('\t')[:5] for line in outlines]
    assert sorted(outdata) == sorted(
        [['17', '944196', 'rs182553373', 'G', 'A'],
         ['17', '611663', 'rs544992196', 'T', 'C'],
         ['17', '1946968', 'rs1245348147', 'T', 'C'],
         ['17', '567599', 'rs1335948438', 'C', 'T'],
         ['17', '374561', 'rs1440788236', 'G', 'T']])
    conn.close()
Exemplo n.º 8
0
def test_search_cli(doheader, numlines, suffix):
    with NamedTemporaryFile(suffix=suffix) as outfile:
        arglist = [
            'search',
            data_file('chr17-sample.vcf.gz'),
            data_file('chr17-sample.rsidx'), '--out', outfile.name,
            'rs1472751972', 'rs1287502205', 'rs897983471', 'rs1172219431',
            'rs189123651'
        ]
        args = rsidx.cli.get_parser().parse_args(arglist)
        args.header = doheader
        rsidx.search.main(args)
        with rsidx.open(outfile.name, 'r') as fh:
            outlines = fh.read().strip().split('\n')
            assert len(outlines) == numlines
Exemplo n.º 9
0
def test_index_force_reindex(capsys):
    with TempFileName(suffix='.rsidx') as idxfile:
        arglist = ['index', '--force', data_file('chr9-multi.vcf.gz'), idxfile]
        args = rsidx.cli.get_parser().parse_args(arglist)
        rsidx.index.main(args)
        rsidx.index.main(args)
    terminal = capsys.readouterr()
    assert ', overwriting' in terminal.err
Exemplo n.º 10
0
def test_index_no_force_reindex(capsys):
    with TempFileName(suffix='.rsidx') as idxfile:
        arglist = ['index', data_file('chr9-multi.vcf.gz'), idxfile]
        args = rsidx.cli.get_parser().parse_args(arglist)
        rsidx.index.main(args)
        with pytest.raises(SystemExit):
            rsidx.index.main(args)
    terminal = capsys.readouterr()
    assert ', stubbornly refusing to proceed' in terminal.err
Exemplo n.º 11
0
def test_index_multi(capsys):
    vcffile = data_file('chr9-multi.vcf.gz')
    with TempFileName(suffix='.rsidx') as idxfile, rsidx.open(vcffile,
                                                              'r') as vcffh:
        with sqlite3.connect(idxfile) as dbconn:
            rsidx.index.index(dbconn, vcffh)
        arglist = ['search', vcffile, idxfile, 'rs60995877']
        args = rsidx.cli.get_parser().parse_args(arglist)
        rsidx.search.main(args)
    terminal = capsys.readouterr()
    assert terminal.out.count('\trs60995877\t') == 7
Exemplo n.º 12
0
def test_index_bogus_rsids():
    with NamedTemporaryFile(suffix='.sqlite3') as db:
        with sqlite3.connect(db.name) as dbconn:
            vcffile = data_file('chr4-sample-corrupted-ids.vcf.gz')
            with rsidx.open(vcffile, 'r') as vcffh:
                rsidx.index.index(dbconn, vcffh)
            c = dbconn.cursor()
            query = ('SELECT * FROM rsid_to_coord WHERE rsid IN '
                     '(538736078, 547329663, 1440788236, 1234497371)')
            results = list(c.execute(query))
            assert results == [(1234497371, '4', 218446)]
Exemplo n.º 13
0
def test_index_cli(mainfunc):
    with TempFileName(suffix='.rsidx') as idxfile:
        arglist = ['index', data_file('chr17-sample.vcf.gz'), idxfile]
        args = rsidx.cli.get_parser().parse_args(arglist)
        mainfunc(args)
        conn = sqlite3.connect(idxfile)
        c = conn.cursor()
        query = ('SELECT * FROM rsid_to_coord WHERE rsid IN '
                 '(548749810, 956322221)')
        results = list(c.execute(query))
        assert sorted(results) == sorted([(548749810, '17', 1098730),
                                          (956322221, '17', 1227227)])
Exemplo n.º 14
0
def test_index_multi_rsids():
    with NamedTemporaryFile(suffix='.sqlite3') as db:
        with sqlite3.connect(db.name) as dbconn:
            vcffile = data_file('multiple_id.vcf.gz')
            with rsidx.open(vcffile, 'r') as vcffh:
                rsidx.index.index(dbconn, vcffh)
            c = dbconn.cursor()
            query = ('SELECT * FROM rsid_to_coord WHERE rsid IN '
                     '(72634902, 145742571)')
            results = list(c.execute(query))
            assert sorted(results) == sorted([(72634902, '1', 1900106),
                                              (145742571, '1', 1900106)])
Exemplo n.º 15
0
def test_index(cachesize, mmapsize):
    with NamedTemporaryFile(suffix='.sqlite3') as db:
        with sqlite3.connect(db.name) as dbconn:
            with rsidx.open(data_file('chr17-sample.vcf.gz'), 'r') as vcffh:
                rsidx.index.index(dbconn,
                                  vcffh,
                                  cache_size=cachesize,
                                  mmap_size=mmapsize,
                                  logint=10)
            c = dbconn.cursor()
            query = ('SELECT * FROM rsid_to_coord WHERE rsid IN '
                     '(1238461543, 1472751972)')
            results = list(c.execute(query))
            assert sorted(results) == sorted([(1238461543, '17', 624973),
                                              (1472751972, '17', 132359)])