def test_cli(self): oxo_vcf = get_test_data_path( "test_input_for_add_oxog_filters_from_maf.vcf.gz") vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: with captured_output() as (_, stderr): main(args=["add-oxog-filters", vcf_file, oxo_vcf, fn]) vcf = pysam.VariantFile(fn) self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"]) for record in vcf: if (record.contig == "chr1" and record.pos == 10 and record.alleles == ( "A", "T", )): self.assertEqual(record.filter.keys(), ["oxog"]) else: self.assertEqual(record.filter.keys(), ["PASS"]) vcf.close() serr = stderr.getvalue() self.assertTrue( "[gdc_filtration_tools.add_oxog_filters] - Creating tabix index" in serr) self.assertTrue( "[gdc_filtration_tools.add_oxog_filters] - Processed 4 records - Tagged 1; Wrote 4" in serr) self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr) finally: cleanup_files(fn)
def test_cli(self): ifa = get_test_data_path("test_oxog_ref.fa") imaf = get_test_data_path("test_oxog_annotated.maf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: with captured_output() as (_, stderr): main(["dtoxog-maf-to-vcf", imaf, ifa, fn]) vout = pysam.VariantFile(fn) for record in vout: self.assertEqual(record.chrom, "chr1") self.assertEqual(record.pos, 10) self.assertEqual(record.alleles, ( "A", "T", )) self.assertEqual(record.filter.keys(), ["oxog"]) vout.close() serr = stderr.getvalue() self.assertTrue( "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Creating tabix index..." in serr) self.assertTrue( "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Processed 2 records - Wrote 1" in serr) self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr) finally: cleanup_files([fn, fn + ".tbi"])
def test_cli(self): imets = get_test_data_path("test_oxog_metrics.txt") vcf_file = get_test_data_path("test_input_for_dtoxog.vcf") fa_file = get_test_data_path("test_oxog_ref.fa") (fd, fn) = tempfile.mkstemp() try: with captured_output() as (_, stderr): main(args=[ "create-dtoxog-maf", vcf_file, fn, fa_file, imets, "32.0" ]) with open(fn, "rt") as fh: self.assertEqual(fh.readline(), "#version 2.4.1\n") header = fh.readline().rstrip("\r\n").split("\t") self.assertEqual(header, MAF_COLUMNS) count = 0 for line in fh: dat = dict(zip(header, line.rstrip("\r\n").split("\t"))) self.assertEqual(dat, TestCreatedToxoGMaf.exp_maf[count]) count += 1 self.assertEqual(count, 3) serr = stderr.getvalue() self.assertTrue("Converts a SNP VCF to dToxoG MAF format." in serr) self.assertTrue("Processed 3 records" in serr) serr = [i for i in serr.split("\n") if i.rstrip("\r\n")] self.assertTrue( "gdc_filtration_tools.create_dtoxog_maf" in serr[0]) self.assertTrue("gdc_filtration_tools.main" in serr[-1]) finally: cleanup_files(fn)
def test_get_header(self): ivcf = get_test_data_path("pindel_test.vcf") vcf = pysam.VariantFile(ivcf) found_svtype = False found_fhet = False found_center = False try: res = get_header(vcf.header) for record in res.records: if record.type == "INFO": if record.get("ID", "") == "TYPEOFSV": self.assertFalse(found_svtype) found_svtype = True elif record.get("ID", "") == "forcedHet": self.assertFalse(found_fhet) found_fhet = True elif record.type == "GENERIC" and record.key == "center": found_center = True self.assertEqual(list(res.samples), ["NORMAL", "TUMOR"]) finally: vcf.close() self.assertTrue(found_svtype) self.assertTrue(found_fhet) self.assertFalse(found_center)
def test_filter_contigs(self): ivcf = get_test_data_path("filter_contigs.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf") with captured_output() as (_, stderr): filter_contigs(ivcf, fn) found = 0 exp_chroms = ["chr1", "chr2"] rdr = pysam.VariantFile(fn) try: for record in rdr: self.assertTrue(record.chrom in exp_chroms) found += 1 finally: rdr.close() self.assertEqual(found, 2) cleanup_files(fn) (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") with captured_output() as (_, stderr): filter_contigs(ivcf, fn) found = 0 rdr = pysam.VariantFile(fn) try: for record in rdr: self.assertTrue(record.chrom in exp_chroms) found += 1 finally: rdr.close() self.assertEqual(found, 2) cleanup_files(fn)
def test_build_new_record(self): ifa = get_test_data_path("test_oxog_ref.fa") header = generate_header(ifa, "oxog") maf = { "Chromosome": "chr1", "Start_position": "10", "Reference_Allele": "A", "Tumor_Seq_Allele1": "T", } (fd, fn) = tempfile.mkstemp(suffix=".vcf") vcf = None try: vcf = pysam.VariantFile(fn, mode="w", header=header) record = build_new_record(maf, vcf, "oxog") self.assertEqual(record.pos, 10) self.assertEqual(record.chrom, "chr1") self.assertEqual(record.alleles, ( "A", "T", )) self.assertEqual(record.filter.keys(), ["oxog"]) finally: if vcf is not None: vcf.close() cleanup_files(fn)
def test_cli(self): ivcf = get_test_data_path("test_somatic_score.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: total = 0 tagged = 0 with captured_output() as (_, stderr): main(args=["filter-somatic-score", ivcf, fn]) vcf = pysam.VariantFile(fn) for record in vcf: total += 1 self.assertTrue(record.pos != 1) if "ssc40" in record.filter: tagged += 1 self.assertTrue(record.samples["TUMOR"]["SSC"] == 25) vcf.close() self.assertEqual(total, 3) self.assertEqual(tagged, 1) serr = stderr.getvalue() self.assertTrue( "Filters SomaticSniper VCF files based on Somatic Score." in serr) self.assertTrue("Filter tag: ssc40" in serr) self.assertTrue("Creating tabix index..." in serr) self.assertTrue( "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr) serr = [i for i in serr.split("\n") if i.rstrip("\r\n")] self.assertTrue( "gdc_filtration_tools.filter_somatic_score" in serr[0]) self.assertTrue("gdc_filtration_tools.main" in serr[-1]) finally: cleanup_files(fn)
def test_get_context(self): vcf_file = get_test_data_path("test_input_for_dtoxog.vcf") fa_file = get_test_data_path("test_oxog_ref.fa") fasta = pysam.FastaFile(fa_file) vcf = pysam.VariantFile(vcf_file) exp = ["CTTGGGGGGGG", "GGGGGGGGGGCGGGGGGGGGG", "GGGGGGGTTTACCGGGGGGGG"] n = 0 try: for rec in vcf: res = get_context(rec, fasta) self.assertEqual(res, exp[n]) n += 1 finally: fasta.close() vcf.close()
def test_cli(self): ivcf = get_test_data_path("test.vcf") (fd, fn) = tempfile.mkstemp() try: found = [] expected = ["chr1:1", "chr2:1"] with captured_output() as (_, stderr): main(args=["create-oxog-intervals", ivcf, fn]) with open(fn, "rt") as fh: for line in fh: found.append(line.rstrip("\r\n")) self.assertEqual(len(found), 2) self.assertEqual(found, expected) serr = stderr.getvalue() self.assertTrue( "Extracts interval-file for Broad OxoG metrics from VCF." in serr ) self.assertTrue("Processed 2 records" in serr) serr = [i for i in serr.split("\n") if i.rstrip("\r\n")] self.assertTrue("gdc_filtration_tools.create_oxog_intervals" in serr[0]) self.assertTrue("gdc_filtration_tools.main" in serr[-1]) finally: cleanup_files(fn)
def test_filter_somatic_score_defaults(self): ivcf = get_test_data_path("test_somatic_score.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: total = 0 tagged = 0 with captured_output() as (_, stderr): filter_somatic_score(ivcf, fn) vcf = pysam.VariantFile(fn) for record in vcf: total += 1 self.assertTrue(record.pos != 1) if "ssc40" in record.filter: tagged += 1 self.assertTrue(record.samples["TUMOR"]["SSC"] == 25) vcf.close() self.assertEqual(total, 3) self.assertEqual(tagged, 1) serr = stderr.getvalue() self.assertTrue( "Filters SomaticSniper VCF files based on Somatic Score." in serr) self.assertTrue("Filter tag: ssc40" in serr) self.assertTrue("Creating tabix index..." in serr) self.assertTrue( "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr) finally: cleanup_files(fn)
def test_cli(self): ivcf = get_test_data_path("sanger_pindel_test.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: with captured_output() as (_, stderr): main(["format-sanger-pindel-vcf", ivcf, fn]) vcf = pysam.VariantFile(fn) self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"]) rec = next(vcf) self.assertEqual(rec.pos, 10) self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1)) self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0)) rec = next(vcf) self.assertEqual(rec.pos, 20) self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1)) self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0)) with self.assertRaises(StopIteration): rec = next(vcf) vcf.close() serr = stderr.getvalue() self.assertTrue( "[gdc_filtration_tools.format_sanger_pindel_vcf] - Creating tabix index..." in serr ) self.assertTrue( "[gdc_filtration_tools.format_sanger_pindel_vcf] - Processed 2 records." in serr ) self.assertTrue("gdc_filtration_tools.main" in serr) finally: cleanup_files(fn)
def test_filter_nonstandard_variants(self): ivcf = get_test_data_path("test_nonstandard_variants.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: with captured_output() as (_, stderr): filter_nonstandard_variants(ivcf, fn) vcf = pysam.VariantFile(fn) record = next(vcf) self.assertTrue(record.chrom == "chr1") record = next(vcf) self.assertTrue(record.chrom == "chr3") with self.assertRaises(StopIteration): record = next(vcf) vcf.close() serr = stderr.getvalue() self.assertTrue("Drops non-ACTG loci from a VCF." in serr) self.assertTrue("Removing chr2:1:A,R" in serr) self.assertTrue("Creating tabix index..." in serr) self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr) finally: cleanup_files(fn)
def test_cli(self): ivcf = get_test_data_path("test_nonstandard_variants.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: with captured_output() as (_, stderr): main(args=["filter-nonstandard-variants", ivcf, fn]) vcf = pysam.VariantFile(fn) record = next(vcf) self.assertTrue(record.chrom == "chr1") record = next(vcf) self.assertTrue(record.chrom == "chr3") with self.assertRaises(StopIteration): record = next(vcf) vcf.close() serr = stderr.getvalue() self.assertTrue("Drops non-ACTG loci from a VCF." in serr) self.assertTrue("Removing chr2:1:A,R" in serr) self.assertTrue("Creating tabix index..." in serr) self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr) serr = [i for i in serr.split("\n") if i.rstrip("\r\n")] self.assertTrue( "gdc_filtration_tools.filter_nonstandard_variants" in serr[0]) self.assertTrue("gdc_filtration_tools.main" in serr[-1]) finally: cleanup_files(fn)
def test_build_header(self): obj = FakeOpts() ivcf = get_test_data_path("test.vcf") vcf = pysam.VariantFile(ivcf) opts = [vcf] + obj.to_build_header() res = build_header(*opts) vcf.close() self.validate_header(obj, res)
def test_format_gdc_vcf(self): ivcf = get_test_data_path("test.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") obj = FakeOpts(ivcf, fn) opts = attr.asdict(obj) format_gdc_vcf(**opts) vcf = pysam.VariantFile(fn) hdr = vcf.header.copy() vcf.close() cleanup_files(fn) self.validate_header(obj, hdr)
def test_generate_maf_record(self): from gdc_filtration_tools.logger import Logger imets = get_test_data_path("test_oxog_metrics.txt") mets = load_oxog(imets) vcf_file = get_test_data_path("test_input_for_dtoxog.vcf") fa_file = get_test_data_path("test_oxog_ref.fa") fasta = pysam.FastaFile(fa_file) vcf = pysam.VariantFile(vcf_file) logger = Logger.get_logger("create_dtoxog_maf") count = 0 try: for record in vcf: maf_record = generate_maf_record(record, fasta, mets, 32.0, logger) self.assertEqual(maf_record, TestCreatedToxoGMaf.exp_maf[count]) count += 1 finally: fasta.close() vcf.close()
def test_create_dtoxog_maf(self): imets = get_test_data_path("test_oxog_metrics.txt") vcf_file = get_test_data_path("test_input_for_dtoxog.vcf") fa_file = get_test_data_path("test_oxog_ref.fa") (fd, fn) = tempfile.mkstemp() try: with captured_output() as (_, stderr): create_dtoxog_maf(vcf_file, fn, fa_file, imets, 32.0) with open(fn, "rt") as fh: self.assertEqual(fh.readline(), "#version 2.4.1\n") header = fh.readline().rstrip("\r\n").split("\t") self.assertEqual(header, MAF_COLUMNS) count = 0 for line in fh: dat = dict(zip(header, line.rstrip("\r\n").split("\t"))) self.assertEqual(dat, TestCreatedToxoGMaf.exp_maf[count]) count += 1 serr = stderr.getvalue().split("\n") self.assertTrue("Processed 3 records" in serr[2]) finally: cleanup_files(fn)
def test_add_oxog_filters(self): oxo_vcf = get_test_data_path( "test_input_for_add_oxog_filters_from_maf.vcf.gz") vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf") try: with captured_output() as (_, stderr): add_oxog_filters(vcf_file, oxo_vcf, fn) vcf = pysam.VariantFile(fn) self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"]) for record in vcf: if (record.contig == "chr1" and record.pos == 10 and record.alleles == ( "A", "T", )): self.assertEqual(record.filter.keys(), ["oxog"]) else: self.assertEqual(record.filter.keys(), ["PASS"]) vcf.close() serr = stderr.getvalue() self.assertTrue("Processed 4 records - Tagged 1; Wrote 4" in serr) finally: cleanup_files(fn)
def test_cli(self): ivcf = get_test_data_path("test.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") obj = FakeOpts(ivcf, fn) params = obj.to_cli_list() with captured_output() as (_, stderr): main(args=params) vcf = pysam.VariantFile(fn) hdr = vcf.header.copy() vcf.close() cleanup_files(fn) self.validate_header(obj, hdr) serr = [i for i in stderr.getvalue().split("\n") if i.rstrip("\r\n")] self.assertTrue("gdc_filtration_tools.format_gdc_vcf" in serr[0]) self.assertTrue("gdc_filtration_tools.main" in serr[-1])
def test_position_filter_dkfz(self): ivcf = get_test_data_path("test_dfkz.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: total = 0 with captured_output() as (_, stderr): position_filter_dkfz(ivcf, fn) vcf = pysam.VariantFile(fn) for record in vcf: total += 1 self.assertEqual(record.chrom, "chr2") vcf.close() self.assertEqual(total, 1) serr = stderr.getvalue() self.assertTrue("Position Filter for DKFZ." in serr) self.assertTrue("Creating tabix index..." in serr) self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr) finally: cleanup_files(fn)
def test_create_oxog_intervals(self): ivcf = get_test_data_path("test.vcf") (fd, fn) = tempfile.mkstemp() try: found = [] expected = ["chr1:1", "chr2:1"] with captured_output() as (_, stderr): create_oxog_intervals(ivcf, fn) with open(fn, "rt") as fh: for line in fh: found.append(line.rstrip("\r\n")) self.assertEqual(len(found), 2) self.assertEqual(found, expected) serr = stderr.getvalue() self.assertTrue( "Extracts interval-file for Broad OxoG metrics from VCF." in serr ) self.assertTrue("Processed 2 records" in serr) finally: cleanup_files(fn)
def test_cli(self): ivcf = get_test_data_path("test_dfkz.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: total = 0 with captured_output() as (_, stderr): main(args=["position-filter-dkfz", ivcf, fn]) vcf = pysam.VariantFile(fn) for record in vcf: total += 1 self.assertEqual(record.chrom, "chr2") vcf.close() self.assertEqual(total, 1) serr = stderr.getvalue() self.assertTrue("Position Filter for DKFZ." in serr) self.assertTrue("Creating tabix index..." in serr) self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr) serr = [i for i in serr.split("\n") if i.rstrip("\r\n")] self.assertTrue( "gdc_filtration_tools.position_filter_dkfz" in serr[0]) self.assertTrue("gdc_filtration_tools.main" in serr[-1]) finally: cleanup_files(fn)
def test_format_pindel_vcf(self): ivcf = get_test_data_path("pindel_test.vcf") (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz") try: with captured_output() as (_, stderr): format_pindel_vcf(ivcf, fn) vcf = pysam.VariantFile(fn) self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"]) rec = next(vcf) self.assertEqual(rec.info.get("TYPEOFSV"), "INS") with self.assertRaises(ValueError): rec.info.get("SVTYPE") self.assertFalse(rec.info.get("forcedHet")) self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1)) rec = next(vcf) self.assertEqual(rec.info.get("TYPEOFSV"), "INS") self.assertTrue(rec.info.get("forcedHet")) self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1)) with self.assertRaises(StopIteration): rec = next(vcf) vcf.close() serr = stderr.getvalue() self.assertTrue( "[gdc_filtration_tools.format_pindel_vcf] - Creating tabix index..." in serr ) self.assertTrue( "[gdc_filtration_tools.format_pindel_vcf] - Processed 2 records." in serr ) finally: cleanup_files(fn)
def test_generate_header(self): ifa = get_test_data_path("test_oxog_ref.fa") header = generate_header(ifa, "TEST") self.assertEqual(header.filters.keys(), ["PASS", "TEST"]) self.assertEqual(list(header.contigs), ["chr1"]) self.assertEqual(header.contigs.get("chr1").length, 100)
def test_load_oxog(self): imets = get_test_data_path("test_oxog_metrics.txt") res = load_oxog(imets) self.assertEqual(res, TestCreatedToxoGMaf.exp_oxog)