Пример #1
0
    def test_cli(self):
        imets = get_test_data_path("test_oxog_metrics.txt")
        vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
        fa_file = get_test_data_path("test_oxog_ref.fa")
        (fd, fn) = tempfile.mkstemp()
        try:
            with captured_output() as (_, stderr):
                main(args=[
                    "create-dtoxog-maf", vcf_file, fn, fa_file, imets, "32.0"
                ])
                with open(fn, "rt") as fh:
                    self.assertEqual(fh.readline(), "#version 2.4.1\n")
                    header = fh.readline().rstrip("\r\n").split("\t")
                    self.assertEqual(header, MAF_COLUMNS)
                    count = 0
                    for line in fh:
                        dat = dict(zip(header,
                                       line.rstrip("\r\n").split("\t")))
                        self.assertEqual(dat,
                                         TestCreatedToxoGMaf.exp_maf[count])
                        count += 1
                    self.assertEqual(count, 3)

            serr = stderr.getvalue()
            self.assertTrue("Converts a SNP VCF to dToxoG MAF format." in serr)
            self.assertTrue("Processed 3 records" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.create_dtoxog_maf" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #2
0
 def test_cli(self):
     oxo_vcf = get_test_data_path(
         "test_input_for_add_oxog_filters_from_maf.vcf.gz")
     vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         with captured_output() as (_, stderr):
             main(args=["add-oxog-filters", vcf_file, oxo_vcf, fn])
         vcf = pysam.VariantFile(fn)
         self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"])
         for record in vcf:
             if (record.contig == "chr1" and record.pos == 10
                     and record.alleles == (
                         "A",
                         "T",
                     )):
                 self.assertEqual(record.filter.keys(), ["oxog"])
             else:
                 self.assertEqual(record.filter.keys(), ["PASS"])
         vcf.close()
         serr = stderr.getvalue()
         self.assertTrue(
             "[gdc_filtration_tools.add_oxog_filters] - Creating tabix index"
             in serr)
         self.assertTrue(
             "[gdc_filtration_tools.add_oxog_filters] - Processed 4 records - Tagged 1; Wrote 4"
             in serr)
         self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr)
     finally:
         cleanup_files(fn)
    def test_cli(self):
        ivcf = get_test_data_path("test_nonstandard_variants.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                main(args=["filter-nonstandard-variants", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                record = next(vcf)
                self.assertTrue(record.chrom == "chr1")

                record = next(vcf)
                self.assertTrue(record.chrom == "chr3")

                with self.assertRaises(StopIteration):
                    record = next(vcf)

                vcf.close()
            serr = stderr.getvalue()
            self.assertTrue("Drops non-ACTG loci from a VCF." in serr)
            self.assertTrue("Removing chr2:1:A,R" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.filter_nonstandard_variants" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #4
0
    def test_cli(self):
        ivcf = get_test_data_path("test_somatic_score.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            total = 0
            tagged = 0
            with captured_output() as (_, stderr):
                main(args=["filter-somatic-score", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                for record in vcf:
                    total += 1
                    self.assertTrue(record.pos != 1)
                    if "ssc40" in record.filter:
                        tagged += 1
                        self.assertTrue(record.samples["TUMOR"]["SSC"] == 25)
                vcf.close()
            self.assertEqual(total, 3)
            self.assertEqual(tagged, 1)
            serr = stderr.getvalue()
            self.assertTrue(
                "Filters SomaticSniper VCF files based on Somatic Score." in
                serr)
            self.assertTrue("Filter tag: ssc40" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue(
                "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.filter_somatic_score" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #5
0
    def test_cli(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp()
        try:
            found = []
            expected = ["chr1:1", "chr2:1"]
            with captured_output() as (_, stderr):
                main(args=["create-oxog-intervals", ivcf, fn])
                with open(fn, "rt") as fh:
                    for line in fh:
                        found.append(line.rstrip("\r\n"))

            self.assertEqual(len(found), 2)
            self.assertEqual(found, expected)
            serr = stderr.getvalue()
            self.assertTrue(
                "Extracts interval-file for Broad OxoG metrics from VCF." in serr
            )
            self.assertTrue("Processed 2 records" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue("gdc_filtration_tools.create_oxog_intervals" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #6
0
    def test_cli(self):
        ivcf = get_test_data_path("sanger_pindel_test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                main(["format-sanger-pindel-vcf", ivcf, fn])

            vcf = pysam.VariantFile(fn)
            self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"])
            rec = next(vcf)
            self.assertEqual(rec.pos, 10)
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))
            self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0))

            rec = next(vcf)
            self.assertEqual(rec.pos, 20)
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))
            self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0))

            with self.assertRaises(StopIteration):
                rec = next(vcf)
            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.format_sanger_pindel_vcf] - Creating tabix index..."
                in serr
            )
            self.assertTrue(
                "[gdc_filtration_tools.format_sanger_pindel_vcf] - Processed 2 records."
                in serr
            )
            self.assertTrue("gdc_filtration_tools.main" in serr)
        finally:
            cleanup_files(fn)
    def test_cli(self):
        ifa = get_test_data_path("test_oxog_ref.fa")
        imaf = get_test_data_path("test_oxog_annotated.maf")

        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")

        try:
            with captured_output() as (_, stderr):
                main(["dtoxog-maf-to-vcf", imaf, ifa, fn])

            vout = pysam.VariantFile(fn)
            for record in vout:
                self.assertEqual(record.chrom, "chr1")
                self.assertEqual(record.pos, 10)
                self.assertEqual(record.alleles, (
                    "A",
                    "T",
                ))
                self.assertEqual(record.filter.keys(), ["oxog"])
            vout.close()
            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Creating tabix index..."
                in serr)
            self.assertTrue(
                "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Processed 2 records - Wrote 1"
                in serr)
            self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr)
        finally:
            cleanup_files([fn, fn + ".tbi"])
    def test_cli(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        obj = FakeOpts(ivcf, fn)
        params = obj.to_cli_list()
        with captured_output() as (_, stderr):
            main(args=params)
        vcf = pysam.VariantFile(fn)
        hdr = vcf.header.copy()
        vcf.close()
        cleanup_files(fn)
        self.validate_header(obj, hdr)

        serr = [i for i in stderr.getvalue().split("\n") if i.rstrip("\r\n")]
        self.assertTrue("gdc_filtration_tools.format_gdc_vcf" in serr[0])
        self.assertTrue("gdc_filtration_tools.main" in serr[-1])
Пример #9
0
    def test_cli(self):
        ivcf = get_test_data_path("test_dfkz.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            total = 0
            with captured_output() as (_, stderr):
                main(args=["position-filter-dkfz", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                for record in vcf:
                    total += 1
                    self.assertEqual(record.chrom, "chr2")
                vcf.close()
            self.assertEqual(total, 1)
            serr = stderr.getvalue()
            self.assertTrue("Position Filter for DKFZ." in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.position_filter_dkfz" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)