def test_sample_vcfs_return_infinite_positions(self): vcf = Vcf(testdata.GATK_VCF, 'test_name', 'test_aliner', 'test_snpcaller') contig = vcf.get_contig('500WT1_test') positions = contig.positions expected = ( Position(call='C', simple_call='C', coverage=19049, proportion='-'), Position(call='C', simple_call='C', coverage=19049, proportion='-'), Position(call='T', simple_call='T', coverage=18824, proportion='-'), Position(call='G', simple_call='G', coverage=18804, proportion='-'), Position(call='X', simple_call='N', coverage='?', proportion='?'), Position(call='X', simple_call='N', coverage='?', proportion='?'), Position(call='G', simple_call='G', coverage=18895, proportion='-'), Position(call='A', simple_call='A', coverage=19005, proportion='-'), ) # It should yield all the contig positions. position = 0 for expect, observe in zip(expected, positions): position += 1 self.assertEqual(expect, observe) self.assertEqual(len(expected), position) # It should yield empty positions after the contig is exhausted. self.assertEqual(VcfContig.VCF_EMPTY_POSITION, next(positions)) self.assertEqual(VcfContig.VCF_EMPTY_POSITION, next(positions))
def test_varscan_call_cannot_be_made(self): """ VarScan may include a position with ALT values when a call cannot be made. It should still be called missing (X). TODO: Add See Also VarScan documentation """ # The following is from a SRR011186 sample using bwamem and varscan. # The positions from the source data were 34072-34074. vcf_data = ( "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR011186\n" "gi|561108321|ref|NC_018143.2| 1 . GC C . PASS ADP=114;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:114:114:6:108:94.74%:1.6043E-58:40:38:2:4:46:62\n" # This position should be called missing because the GT column is './.' "gi|561108321|ref|NC_018143.2| 2 . C G . PASS ADP=108;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR ./.:.:108\n" "gi|561108321|ref|NC_018143.2| 3 . A . . PASS ADP=112;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/0:209:112:112:111:0:0%:1E0:38:0:47:64:0:0\n" ) expected = (Position(call='G', simple_call='G', coverage=114, proportion=0.05263157894736842), Position(call='X', simple_call='N', coverage=108.0, proportion='-'), Position(call='A', simple_call='A', coverage=112, proportion=0.9910714285714286)) with tempfile.NamedTemporaryFile('w+') as tmpfile: # Seed the file with test data tmpfile.write(vcf_data) tmpfile.seek(0) # Find the test contig. vcf = Vcf(tmpfile.name, 'SRR011186', 'varscan', 'bwamem') contig = vcf.get_contig('gi|561108321|ref|NC_018143.2|') positions = contig.positions self.assertIsInstance(contig, VcfContig) # Check position values. position = 0 for expect, observe in zip(expected, positions): position += 1 self.assertEqual(expect, observe) # It yields all expected positions self.assertEqual(position, len(expected)) # All following positions should be empty self.assertEqual(VcfContig.VCF_EMPTY_POSITION, next(positions))
def test_varscan_call_cannot_be_made(self): """ VarScan may include a position with ALT values when a call cannot be made. It should still be called missing (X). TODO: Add See Also VarScan documentation """ # The following is from a SRR011186 sample using bwamem and varscan. # The positions from the source data were 34072-34074. vcf_data = ( "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR011186\n" "gi|561108321|ref|NC_018143.2| 1 . GC C . PASS ADP=114;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:114:114:6:108:94.74%:1.6043E-58:40:38:2:4:46:62\n" # This position should be called missing because the GT column is './.' "gi|561108321|ref|NC_018143.2| 2 . C G . PASS ADP=108;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR ./.:.:108\n" "gi|561108321|ref|NC_018143.2| 3 . A . . PASS ADP=112;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/0:209:112:112:111:0:0%:1E0:38:0:47:64:0:0\n" ) expected = ( Position(call='G', simple_call='G', coverage=114, proportion=0.05263157894736842), Position(call='X', simple_call='N', coverage=108.0, proportion='-'), Position(call='A', simple_call='A', coverage=112, proportion=0.9910714285714286) ) with tempfile.NamedTemporaryFile('w+') as tmpfile: # Seed the file with test data tmpfile.write(vcf_data) tmpfile.seek(0) # Find the test contig. vcf = Vcf(tmpfile.name, 'SRR011186', 'varscan', 'bwamem') contig = vcf.get_contig('gi|561108321|ref|NC_018143.2|') positions = contig.positions self.assertIsInstance(contig, VcfContig) # Check position values. position = 0 for expect, observe in zip(expected, positions): position += 1 self.assertEqual(expect, observe) # It yields all expected positions self.assertEqual(position, len(expected)) # All following positions should be empty self.assertEqual(VcfContig.VCF_EMPTY_POSITION, next(positions))
def setUp(self): self.vcf = Vcf(testdata.GATK_VCF, 'test_vcf', 'test_aligner', 'test_snpcaller')
class VcfTestCase(unittest.TestCase): @classmethod def setUp(self): self.vcf = Vcf(testdata.GATK_VCF, 'test_vcf', 'test_aligner', 'test_snpcaller') def test_repr(self): expected = "Vcf(filepath='{0}', name='test_vcf', aligner='test_aligner', snpcaller='test_snpcaller')".format( testdata.GATK_VCF) self.assertEqual(expected, repr(self.vcf)) def test_identifier(self): expected = 'test_vcf::test_aligner,test_snpcaller' self.assertEqual(expected, self.vcf.identifier) def test_get_contig(self): """ The following tests assume VcfContig is working. """ # It should return the correct contig at any file position or in any order. contigs = ( '500WT1_test', ) for contig_name in contigs: contig = self.vcf.get_contig('500WT1_test') # Ensure it is not an EmptyContig self.assertIsInstance(contig, VcfContig) self.assertEqual(contig_name, contig.name) def test_get_contig_empty(self): # If the sample does not contain a contig, it should return an EmptyContig placeholder. contig = self.vcf.get_contig('DoesNotExist') self.assertIsInstance(contig, EmptyContig) def test_varscan_call_cannot_be_made(self): """ VarScan may include a position with ALT values when a call cannot be made. It should still be called missing (X). TODO: Add See Also VarScan documentation """ # The following is from a SRR011186 sample using bwamem and varscan. # The positions from the source data were 34072-34074. vcf_data = ( "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR011186\n" "gi|561108321|ref|NC_018143.2| 1 . GC C . PASS ADP=114;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:114:114:6:108:94.74%:1.6043E-58:40:38:2:4:46:62\n" # This position should be called missing because the GT column is './.' "gi|561108321|ref|NC_018143.2| 2 . C G . PASS ADP=108;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR ./.:.:108\n" "gi|561108321|ref|NC_018143.2| 3 . A . . PASS ADP=112;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/0:209:112:112:111:0:0%:1E0:38:0:47:64:0:0\n" ) expected = ( Position(call='G', simple_call='G', coverage=114, proportion=0.05263157894736842), Position(call='X', simple_call='N', coverage=108.0, proportion='-'), Position(call='A', simple_call='A', coverage=112, proportion=0.9910714285714286) ) with tempfile.NamedTemporaryFile('w+') as tmpfile: # Seed the file with test data tmpfile.write(vcf_data) tmpfile.seek(0) # Find the test contig. vcf = Vcf(tmpfile.name, 'SRR011186', 'varscan', 'bwamem') contig = vcf.get_contig('gi|561108321|ref|NC_018143.2|') positions = contig.positions self.assertIsInstance(contig, VcfContig) # Check position values. position = 0 for expect, observe in zip(expected, positions): position += 1 self.assertEqual(expect, observe) # It yields all expected positions self.assertEqual(position, len(expected)) # All following positions should be empty self.assertEqual(VcfContig.VCF_EMPTY_POSITION, next(positions))
class VcfTestCase(unittest.TestCase): @classmethod def setUp(self): self.vcf = Vcf(testdata.GATK_VCF, 'test_vcf', 'test_aligner', 'test_snpcaller') def test_repr(self): expected = "Vcf(filepath='{0}', name='test_vcf', aligner='test_aligner', snpcaller='test_snpcaller')".format( testdata.GATK_VCF) self.assertEqual(expected, repr(self.vcf)) def test_identifier(self): expected = 'test_vcf::test_aligner,test_snpcaller' self.assertEqual(expected, self.vcf.identifier) def test_get_contig(self): """ The following tests assume VcfContig is working. """ # It should return the correct contig at any file position or in any order. contigs = ('500WT1_test', ) for contig_name in contigs: contig = self.vcf.get_contig('500WT1_test') # Ensure it is not an EmptyContig self.assertIsInstance(contig, VcfContig) self.assertEqual(contig_name, contig.name) def test_get_contig_empty(self): # If the sample does not contain a contig, it should return an EmptyContig placeholder. contig = self.vcf.get_contig('DoesNotExist') self.assertIsInstance(contig, EmptyContig) def test_varscan_call_cannot_be_made(self): """ VarScan may include a position with ALT values when a call cannot be made. It should still be called missing (X). TODO: Add See Also VarScan documentation """ # The following is from a SRR011186 sample using bwamem and varscan. # The positions from the source data were 34072-34074. vcf_data = ( "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SRR011186\n" "gi|561108321|ref|NC_018143.2| 1 . GC C . PASS ADP=114;WT=0;HET=0;HOM=1;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 1/1:255:114:114:6:108:94.74%:1.6043E-58:40:38:2:4:46:62\n" # This position should be called missing because the GT column is './.' "gi|561108321|ref|NC_018143.2| 2 . C G . PASS ADP=108;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR ./.:.:108\n" "gi|561108321|ref|NC_018143.2| 3 . A . . PASS ADP=112;WT=1;HET=0;HOM=0;NC=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/0:209:112:112:111:0:0%:1E0:38:0:47:64:0:0\n" ) expected = (Position(call='G', simple_call='G', coverage=114, proportion=0.05263157894736842), Position(call='X', simple_call='N', coverage=108.0, proportion='-'), Position(call='A', simple_call='A', coverage=112, proportion=0.9910714285714286)) with tempfile.NamedTemporaryFile('w+') as tmpfile: # Seed the file with test data tmpfile.write(vcf_data) tmpfile.seek(0) # Find the test contig. vcf = Vcf(tmpfile.name, 'SRR011186', 'varscan', 'bwamem') contig = vcf.get_contig('gi|561108321|ref|NC_018143.2|') positions = contig.positions self.assertIsInstance(contig, VcfContig) # Check position values. position = 0 for expect, observe in zip(expected, positions): position += 1 self.assertEqual(expect, observe) # It yields all expected positions self.assertEqual(position, len(expected)) # All following positions should be empty self.assertEqual(VcfContig.VCF_EMPTY_POSITION, next(positions))