def test_sample_data_copes_with_mixed_missing_values_in_PL(self): sample_name = 'sample_name' sample_data = SampleData(['PL'], [sample_name]) sample_data.add_sample_data(sample_name, 'PL', [-0.1, '.', -0.2, None, -0.3]) self.assertEqual(sample_data.get_genotype_likelihoods(sample_name), [0.01, None, 0.02, None, 0.03])
def test_should_allow_multiple_samples_for_add_sample_data(self): sample_data = SampleData(['genotype_key1'], ['sample_name1', 'sample_name2']) sample_data.add_sample_data('sample_name1', 'genotype_key1', [1]) sample_data.add_sample_data('sample_name2', 'genotype_key1', [3, 4]) self.assertEqual( sample_data.get_field('sample_name1', 'genotype_key1'), [1]) self.assertEqual( sample_data.get_field('sample_name2', 'genotype_key1'), [3, 4])
def test_should_merge_genotype_call_object_in_sample_data(self): sample_data1 = SampleData(['GT'], ['sample_name']) sample_data1.add_sample_data('sample_name', 'GT', GenotypeCall('0/1')) sample_data2 = SampleData(['GT'], ['sample_name']) sample_data2.add_sample_data('sample_name', 'GT', GenotypeCall('0/1')) sample_data1.merge_genotype_calls(sample_data2.genotypes()) self.assertEqual(sample_data1.get_field("sample_name", "GT"), GenotypeCall("1/1"))
def test_should_fail_if_sample_data_objects_have_different_sample(self): sample_data1 = SampleData(['GT'], ['sample_name_1']) sample_data1.add_sample_data('sample_name_1', 'GT', GenotypeCall('0/0')) sample_data2 = SampleData(['GT'], ['sample_name_2']) sample_data2.add_sample_data('sample_name_2', 'GT', GenotypeCall('0/0')) self.assertRaises(Exception, sample_data1.merge_genotype_calls, sample_data2.genotypes())
class TestGenotypeDataView(unittest.TestCase): def setUp(self): self.sample_data = SampleData(['GT', 'key'], ['sample_name1', 'sample_name2']) self.sample_data.add_sample_data("sample_name1", "key", [1, 2]) self.sample_data.add_sample_data("sample_name2", "GT", GenotypeCall("0/1")) def test_contains_method_returns_expected_value_sample1(self): genotype_data = self.sample_data.get_genotype_data("sample_name1") self.assertNotIn("cheesecake", genotype_data) self.assertNotIn("sample_name1", genotype_data) self.assertIn("GT", genotype_data) self.assertIn("key", genotype_data) def test_contains_method_returns_expected_value_sample2(self): genotype_data = self.sample_data.get_genotype_data("sample_name2") self.assertIn("GT", genotype_data) self.assertIn("key", genotype_data) def test_getitem_method_returns_expected_value(self): genotype_data = self.sample_data.get_genotype_data("sample_name1") self.assertEqual(genotype_data["GT"], GenotypeCall("./.")) self.assertEqual(genotype_data["key"], [1, 2]) genotype_data = self.sample_data.get_genotype_data("sample_name2") self.assertEqual(genotype_data["GT"], GenotypeCall("0/1")) self.assertEqual(genotype_data["key"], []) def test_keys_method_returns_expected_data(self): genotype_data = self.sample_data.get_genotype_data("sample_name1") self.assertEqual(list(genotype_data.keys()), ["GT", "key"]) genotype_data = self.sample_data.get_genotype_data("sample_name2") self.assertEqual(list(genotype_data.keys()), ["GT", "key"]) def test_values_method_returns_expected_data(self): genotype_data = self.sample_data.get_genotype_data("sample_name1") self.assertEqual(list(genotype_data.values()), [GenotypeCall("./."), [1, 2]]) genotype_data = self.sample_data.get_genotype_data("sample_name2") self.assertEqual(list(genotype_data.values()), [GenotypeCall("0/1"), []])
def test_should_add_sample_data(self): sample_data = SampleData(['genotype_key1'], ['sample_name']) sample_data.add_sample_data('sample_name', 'genotype_key1', [1]) self.assertEqual(sample_data.get_field('sample_name', 'genotype_key1'), [1])
def test_gets_value_for_GQ_key(self): sample_name = 'sample_name' sample_data = SampleData(['GQ'], [sample_name]) sample_data.add_sample_data(sample_name, 'GQ', [2.3]) self.assertEqual(sample_data.get_genotype_quality(sample_name), [2.3])
def test_gets_exact_values_if_key_is_NV(self): sample_name = 'sample_name' sample_data = SampleData(['NV'], [sample_name]) sample_data.add_sample_data(sample_name, 'NV', [100]) self.assertEqual(sample_data.get_variant_support(sample_name), [100])
def test_gets_exact_values_if_key_is_NR(self): sample_name = 'sample_name' sample_data = SampleData(['NR'], [sample_name]) sample_data.add_sample_data(sample_name, 'NR', [100]) self.assertEqual(sample_data.get_read_depth(sample_name), [100])
def test_gets_dot_if_key_is_PL(self): sample_name = 'sample_name' sample_data = SampleData(['PL'], [sample_name]) sample_data.add_sample_data(sample_name, 'PL', '.') self.assertEqual(sample_data.get_genotype_likelihoods(sample_name), '.')
def test_gets_list_of_none_if_key_is_GL(self): sample_name = 'sample_name' sample_data = SampleData(['GL'], [sample_name]) sample_data.add_sample_data(sample_name, 'GL', [None, None, None]) self.assertEqual(sample_data.get_genotype_likelihoods(sample_name), [None, None, None])
def test_gets_exact_values_if_key_is_PL(self): sample_name = 'sample_name' sample_data = SampleData(['PL'], [sample_name]) sample_data.add_sample_data(sample_name, 'PL', [1, 2, 3]) self.assertEqual(sample_data.get_genotype_likelihoods(sample_name), [-0.1, -0.2, -0.3])
def test_should_write_missing_values_in_sample_data(self): with VCFReaderContextManager( os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler: first_record = next(vcf_handler.read_records()) sample_data = SampleData(['GT', 'PL', 'GQ'], ['sample1', 'sample2', 'sample3']) sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0")) sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000]) sample_data.add_sample_data("sample1", "GQ", [1000]) sample_data.add_sample_data("sample2", "GT", GenotypeCall("1|1")) sample_data.add_sample_data("sample2", "PL", [2000, 0, 1000]) sample_data.add_sample_data("sample2", "GQ", [3]) first_record.sample_info = sample_data print((sample_data.to_vcf_columns())) vcf_string = vcf_row_from_record(first_record) expected_vcf_string = "20 10 . CT C 3000 PASS PP=3000;DP=250;DPR=140;DPF=110;VC=100;VCR=49;VCF=51;ABPV=0.2;SBPV=0.3;MQ=70.0;BR=31.0;QD=None GT:PL:GQ 1|0:3000,0,3000:1000 1|1:2000,0,1000:3 ./.:.:." # noqa self.assertEqual(expected_vcf_string, vcf_string)
def test_read_sample_data(self): schema = self.__get_example_schema("vcf_example.vcf") sample_schema = [key for key, _ in schema.iter_sample_data()] sample_data = SampleData(sample_schema, ['sample1']) sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0")) sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000]) sample_data.add_sample_data("sample1", "GQ", [1000]) sample_data.add_sample_data("sample1", "PQ", [2000]) sample_data.add_sample_data("sample1", "PS", [60000]) sample_data.add_sample_data("sample1", "AD", [140, 110]) sample_data.add_sample_data("sample1", "DP", [250]) sample_data.add_sample_data("sample1", "VAF", [0.4]) self.assertTrue(sample_data.has_sample("sample1")) self.assertEqual(sample_data.genotypes(), {"sample1": GenotypeCall("1|0")}) self.assertEqual(sample_data.get_field("sample1", 'GT'), GenotypeCall("1|0")) self.assertEqual(sample_data.get_field("sample1", 'PL'), [3000, 0, 3000]) genotype_data = sample_data.get_genotype_data("sample1") self.assertEqual(genotype_data.genotype(), GenotypeCall("1|0")) self.assertEqual(genotype_data['GT'], GenotypeCall("1|0")) self.assertEqual(genotype_data['PL'], [3000, 0, 3000])