def test_caching_cachedir(): vcf_fn = 'fixture/sample.vcf.gz' cachedir = 'fixture/custom.vcfnp_cache/foo' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert np.all(a == a2)
def test_condition(): v = variants('fixture/sample.vcf') eq_(9, len(v)) c = calldata('fixture/sample.vcf', condition=v['FILTER']['PASS']) eq_(5, len(c)) vf = variants('fixture/sample.vcf', condition=v['FILTER']['PASS']) eq_(5, len(vf))
def test_caching_cachedir(): vcf_fn = 'fixture/sample.vcf.gz' cachedir = 'fixture/custom.vcfnp_cache/foo' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert_array_equal(a, a2)
def test_caching_compression(): vcf_fn = 'fixture/sample.vcf.gz' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants', compress=True) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, compress_cache=True, verbose=True) a2 = np.load(cache_fn)['data'] assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata', compress=True) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, compress_cache=True, verbose=True) a2 = np.load(cache_fn)['data'] assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d', compress=True) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, compress_cache=True, verbose=True) a2 = np.load(cache_fn)['data'] assert_array_equal(a, a2)
def test_caching(): vcf_fn = 'fixture/sample.vcf.gz' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants') debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, verbose=True) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata') debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, verbose=True) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d') debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, verbose=True) a2 = np.load(cache_fn) assert np.all(a == a2)
def test_variable_calldata(): c = calldata('fixture/test1.vcf') eq_((1, 0), tuple(c['test2']['AD'][0])) eq_((1, 0), tuple(c['test2']['AD'][1])) eq_((1, 0), tuple(c['test2']['AD'][2])) eq_(b'.', c['test2']['GT'][0]) eq_(b'0', c['test2']['GT'][1]) eq_(b'1', c['test2']['GT'][2])
def test_calldata(): a = calldata('fixture/sample.vcf') debug(repr(a)) eq_(b'0|0', a[0]['NA00001']['GT']) eq_(True, a[0]['NA00001']['is_called']) eq_(True, a[0]['NA00001']['is_phased']) eq_((0, 0), tuple(a[0]['NA00001']['genotype'])) eq_((-1, -1), tuple(a[6]['NA00003']['genotype'])) eq_((-1, -1), tuple(a[7]['NA00003']['genotype'])) eq_((10, 10), tuple(a[0]['NA00001']['HQ']))
def test_truncate(): # https://github.com/alimanfoo/vcfnp/issues/54 vcf_fn = 'fixture/test54.vcf.gz' # truncate by default v = variants(vcf_fn, region='chr1:10-100') eq_(2, len(v)) c = calldata(vcf_fn, region='chr1:10-100') eq_(2, len(c)) c2d = calldata_2d(vcf_fn, region='chr1:10-100') eq_(2, len(c2d)) # don't truncate v = variants(vcf_fn, region='chr1:10-100', truncate=False) eq_(3, len(v)) c = calldata(vcf_fn, region='chr1:10-100', truncate=False) eq_(3, len(c)) c2d = calldata_2d(vcf_fn, region='chr1:10-100', truncate=False) eq_(3, len(c2d))
def test_missing_calldata_cleared(): c = calldata('fixture/test32.vcf')['AC0093-C'] # first variant, non-missing eq_(b'0/0', c['GT'][0]) eq_((0, 0), tuple(c['genotype'][0])) eq_(8, c['DP'][0]) eq_(3, c['GQ'][0]) # second variant, missing eq_(b'./.', c['GT'][1]) eq_((-1, -1), tuple(c['genotype'][1])) eq_(0, c['DP'][1]) # should be default fill value eq_(0, c['GQ'][1]) # should be default fill value
def test_missing_calldata(): c = calldata('fixture/test1.vcf') # first variant, second sample eq_(b'.', c['test2']['GT'][0]) eq_((-1, -1), tuple(c['test2']['genotype'][0])) eq_((1, 0), tuple(c['test2']['AD'][0])) # data are present # third variant, third sample eq_(b'.', c['test3']['GT'][2]) eq_((-1, -1), tuple(c['test3']['genotype'][2])) eq_((0, 0), tuple(c['test3']['AD'][2])) # default fill # third variant, fourth sample eq_(b'./.', c['test4']['GT'][2]) eq_((-1, -1), tuple(c['test4']['genotype'][2])) eq_((0, 0), tuple(c['test4']['AD'][2])) # default fill
def test_calldata_region(): a = calldata('fixture/sample.vcf.gz', region='20') eq_(6, len(a))
def test_missing_format_definition(): # FORMAT field DP not declared in VCF header c = calldata('fixture/test14.vcf', fields=['DP'], vcf_types={'DP': 'Integer'}) eq_(1, c[2]['NA00001']['DP'])
def test_duplicate_field_definitions(): variants('fixture/test10.vcf') # should not raise, but print useful message to stderr calldata('fixture/test10.vcf')
def test_calldata_region_empty(): a = calldata('fixture/sample.vcf.gz', region='18') eq_(0, len(a)) a = calldata('fixture/sample.vcf.gz', region='19:113-200') eq_(0, len(a))