def test_condition(): V = variants('fixture/sample.vcf') eq_(9, len(V)) C = calldata('fixture/sample.vcf', condition=V['FILTER']['PASS']) eq_(5, len(C)) Vf = variants('fixture/sample.vcf', condition=V['FILTER']['PASS']) eq_(5, len(Vf))
def test_condition(): V = variants("fixture/sample.vcf") eq_(9, len(V)) C = calldata("fixture/sample.vcf", condition=V["FILTER"]["PASS"]) eq_(5, len(C)) Vf = variants("fixture/sample.vcf", condition=V["FILTER"]["PASS"]) eq_(5, len(Vf))
def test_variable_calldata(): C = calldata("fixture/test1.vcf") eq_((1, 0), tuple(C["test2"]["AD"][0])) eq_((1, 0), tuple(C["test2"]["AD"][1])) eq_((1, 0), tuple(C["test2"]["AD"][2])) eq_(".", C["test2"]["GT"][0]) eq_("0", C["test2"]["GT"][1]) eq_("1", C["test2"]["GT"][2])
def test_variable_calldata(): C = calldata('fixture/test1.vcf') eq_((1, 0), tuple(C['test2']['AD'][0])) eq_((1, 0), tuple(C['test2']['AD'][1])) eq_((1, 0), tuple(C['test2']['AD'][2])) eq_('.', C['test2']['GT'][0]) eq_('0', C['test2']['GT'][1]) eq_('1', C['test2']['GT'][2])
def test_condition(): v = variants('fixture/sample.vcf') eq_(9, len(v)) c = calldata('fixture/sample.vcf', condition=v['FILTER']['PASS']) eq_(5, len(c)) i = info('fixture/sample.vcf', condition=v['FILTER']['PASS']) eq_(5, len(i)) vf = variants('fixture/sample.vcf', condition=v['FILTER']['PASS']) eq_(5, len(vf))
def test_calldata(): a = calldata('fixture/sample.vcf') print repr(a) eq_('0|0', a[0]['NA00001']['GT']) eq_(True, a[0]['NA00001']['is_called']) eq_(True, a[0]['NA00001']['is_phased']) eq_((0, 0), tuple(a[0]['NA00001']['genotype'])) eq_((-1, -1), tuple(a[6]['NA00003']['genotype'])) eq_((-1, -1), tuple(a[7]['NA00003']['genotype'])) eq_((10, 10), tuple(a[0]['NA00001']['HQ']))
def test_calldata(): a = calldata("fixture/sample.vcf") print repr(a) eq_("0|0", a[0]["NA00001"]["GT"]) eq_(True, a[0]["NA00001"]["is_called"]) eq_(True, a[0]["NA00001"]["is_phased"]) eq_((0, 0), tuple(a[0]["NA00001"]["genotype"])) eq_((-1, -1), tuple(a[6]["NA00003"]["genotype"])) eq_((-1, -1), tuple(a[7]["NA00003"]["genotype"])) eq_((10, 10), tuple(a[0]["NA00001"]["HQ"]))
def test_error_handling(): # try to open a directory vcf_fn = '.' with assert_raises(ValueError): vcfnp.variants(vcf_fn) # try to open a file that doesn't exist vcf_fn = 'doesnotexist' with assert_raises(ValueError): vcfnp.variants(vcf_fn) # file is nothing like a VCF (has no header etc.) vcf_fn = 'fixture/test48a.vcf' with assert_raises(RuntimeError): vcfnp.variants(vcf_fn) # file has mode sample columns than in header row vcf_fn = 'fixture/test48b.vcf' with assert_raises(RuntimeError): vcfnp.calldata(vcf_fn)
def test_caching(): vcf_fn = "fixture/sample.vcf.gz" cache_fn = vcfnp._mk_cache_fn(vcf_fn, array_type="variants") if os.path.exists(cache_fn): os.remove(cache_fn) A = variants(vcf_fn, cache=True, verbose=True) A2 = np.load(cache_fn) assert np.all(A == A2) cache_fn = vcfnp._mk_cache_fn(vcf_fn, array_type="calldata") if os.path.exists(cache_fn): os.remove(cache_fn) A = calldata(vcf_fn, cache=True, verbose=True) A2 = np.load(cache_fn) assert np.all(A == A2) cache_fn = vcfnp._mk_cache_fn(vcf_fn, array_type="calldata_2d") if os.path.exists(cache_fn): os.remove(cache_fn) A = calldata_2d(vcf_fn, cache=True, verbose=True) A2 = np.load(cache_fn) assert np.all(A == A2)
def test_calldata_region_empty(): a = calldata("fixture/sample.vcf.gz", region="18") eq_(0, len(a)) a = calldata("fixture/sample.vcf.gz", region="19:113-200") eq_(0, len(a))
def profile(): v = vcfnp.variants(sys.argv[1], count=int(sys.argv[2])) c = vcfnp.calldata(sys.argv[1], condition=v['FILTER']['PASS'])
def test_missing_calldata(): C = calldata('fixture/test1.vcf') eq_('.', C['test3']['GT'][2]) eq_((-1, -1), tuple(C['test3']['genotype'][2])) eq_('./.', C['test4']['GT'][2]) eq_((-1, -1), tuple(C['test4']['genotype'][2]))
def test_duplicate_field_definitions(): V = variants("fixture/test10.vcf") # should not raise, but print useful message to stderr C = calldata("fixture/test10.vcf")
def test_missing_format_definition(): # FORMAT field DP not declared in VCF header C = calldata("fixture/test14.vcf", fields=["DP"], vcf_types={"DP": "Integer"}) eq_(1, C[2]["NA00001"]["DP"])
def test_missing_calldata(): C = calldata("fixture/test1.vcf") eq_(".", C["test3"]["GT"][2]) eq_((-1, -1), tuple(C["test3"]["genotype"][2])) eq_("./.", C["test4"]["GT"][2]) eq_((-1, -1), tuple(C["test4"]["genotype"][2]))
def test_missing_format_definition(): # FORMAT field DP not declared in VCF header C = calldata('fixture/test14.vcf', fields=['DP'], vcf_types={'DP':'Integer'}) eq_(1, C[2]['NA00001']['DP'])
def test_calldata_region(): a = calldata('fixture/sample.vcf.gz', region='20') eq_(6, len(a))
def test_calldata_region_empty(): a = calldata('fixture/sample.vcf.gz', region='18') eq_(0, len(a)) a = calldata('fixture/sample.vcf.gz', region='19:113-200') eq_(0, len(a))
def test_calldata_region(): a = calldata("fixture/sample.vcf.gz", region="20") eq_(6, len(a))