def test_filter_contigs(self):
        ivcf = get_test_data_path("filter_contigs.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf")
        with captured_output() as (_, stderr):
            filter_contigs(ivcf, fn)

        found = 0
        exp_chroms = ["chr1", "chr2"]
        rdr = pysam.VariantFile(fn)
        try:
            for record in rdr:
                self.assertTrue(record.chrom in exp_chroms)
                found += 1
        finally:
            rdr.close()
        self.assertEqual(found, 2)
        cleanup_files(fn)

        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        with captured_output() as (_, stderr):
            filter_contigs(ivcf, fn)

        found = 0
        rdr = pysam.VariantFile(fn)
        try:
            for record in rdr:
                self.assertTrue(record.chrom in exp_chroms)
                found += 1
        finally:
            rdr.close()
        self.assertEqual(found, 2)
        cleanup_files(fn)
    def test_filter_nonstandard_variants(self):
        ivcf = get_test_data_path("test_nonstandard_variants.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                filter_nonstandard_variants(ivcf, fn)

            vcf = pysam.VariantFile(fn)

            record = next(vcf)
            self.assertTrue(record.chrom == "chr1")

            record = next(vcf)
            self.assertTrue(record.chrom == "chr3")

            with self.assertRaises(StopIteration):
                record = next(vcf)

            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue("Drops non-ACTG loci from a VCF." in serr)
            self.assertTrue("Removing chr2:1:A,R" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr)
        finally:
            cleanup_files(fn)
Пример #3
0
 def test_filter_somatic_score_defaults(self):
     ivcf = get_test_data_path("test_somatic_score.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         total = 0
         tagged = 0
         with captured_output() as (_, stderr):
             filter_somatic_score(ivcf, fn)
             vcf = pysam.VariantFile(fn)
             for record in vcf:
                 total += 1
                 self.assertTrue(record.pos != 1)
                 if "ssc40" in record.filter:
                     tagged += 1
                     self.assertTrue(record.samples["TUMOR"]["SSC"] == 25)
             vcf.close()
         self.assertEqual(total, 3)
         self.assertEqual(tagged, 1)
         serr = stderr.getvalue()
         self.assertTrue(
             "Filters SomaticSniper VCF files based on Somatic Score." in
             serr)
         self.assertTrue("Filter tag: ssc40" in serr)
         self.assertTrue("Creating tabix index..." in serr)
         self.assertTrue(
             "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr)
     finally:
         cleanup_files(fn)
    def test_cli(self):
        ifa = get_test_data_path("test_oxog_ref.fa")
        imaf = get_test_data_path("test_oxog_annotated.maf")

        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")

        try:
            with captured_output() as (_, stderr):
                main(["dtoxog-maf-to-vcf", imaf, ifa, fn])

            vout = pysam.VariantFile(fn)
            for record in vout:
                self.assertEqual(record.chrom, "chr1")
                self.assertEqual(record.pos, 10)
                self.assertEqual(record.alleles, (
                    "A",
                    "T",
                ))
                self.assertEqual(record.filter.keys(), ["oxog"])
            vout.close()
            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Creating tabix index..."
                in serr)
            self.assertTrue(
                "[gdc_filtration_tools.dtoxog_maf_to_vcf] - Processed 2 records - Wrote 1"
                in serr)
            self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr)
        finally:
            cleanup_files([fn, fn + ".tbi"])
Пример #5
0
    def test_cli(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp()
        try:
            found = []
            expected = ["chr1:1", "chr2:1"]
            with captured_output() as (_, stderr):
                main(args=["create-oxog-intervals", ivcf, fn])
                with open(fn, "rt") as fh:
                    for line in fh:
                        found.append(line.rstrip("\r\n"))

            self.assertEqual(len(found), 2)
            self.assertEqual(found, expected)
            serr = stderr.getvalue()
            self.assertTrue(
                "Extracts interval-file for Broad OxoG metrics from VCF." in serr
            )
            self.assertTrue("Processed 2 records" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue("gdc_filtration_tools.create_oxog_intervals" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #6
0
    def test_cli(self):
        ivcf = get_test_data_path("test_somatic_score.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            total = 0
            tagged = 0
            with captured_output() as (_, stderr):
                main(args=["filter-somatic-score", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                for record in vcf:
                    total += 1
                    self.assertTrue(record.pos != 1)
                    if "ssc40" in record.filter:
                        tagged += 1
                        self.assertTrue(record.samples["TUMOR"]["SSC"] == 25)
                vcf.close()
            self.assertEqual(total, 3)
            self.assertEqual(tagged, 1)
            serr = stderr.getvalue()
            self.assertTrue(
                "Filters SomaticSniper VCF files based on Somatic Score." in
                serr)
            self.assertTrue("Filter tag: ssc40" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue(
                "Processed 4 records - Removed 1; Tagged 1; Wrote 3" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.filter_somatic_score" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #7
0
    def test_cli(self):
        imets = get_test_data_path("test_oxog_metrics.txt")
        vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
        fa_file = get_test_data_path("test_oxog_ref.fa")
        (fd, fn) = tempfile.mkstemp()
        try:
            with captured_output() as (_, stderr):
                main(args=[
                    "create-dtoxog-maf", vcf_file, fn, fa_file, imets, "32.0"
                ])
                with open(fn, "rt") as fh:
                    self.assertEqual(fh.readline(), "#version 2.4.1\n")
                    header = fh.readline().rstrip("\r\n").split("\t")
                    self.assertEqual(header, MAF_COLUMNS)
                    count = 0
                    for line in fh:
                        dat = dict(zip(header,
                                       line.rstrip("\r\n").split("\t")))
                        self.assertEqual(dat,
                                         TestCreatedToxoGMaf.exp_maf[count])
                        count += 1
                    self.assertEqual(count, 3)

            serr = stderr.getvalue()
            self.assertTrue("Converts a SNP VCF to dToxoG MAF format." in serr)
            self.assertTrue("Processed 3 records" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.create_dtoxog_maf" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #8
0
    def test_cli(self):
        ivcf = get_test_data_path("sanger_pindel_test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                main(["format-sanger-pindel-vcf", ivcf, fn])

            vcf = pysam.VariantFile(fn)
            self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"])
            rec = next(vcf)
            self.assertEqual(rec.pos, 10)
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))
            self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0))

            rec = next(vcf)
            self.assertEqual(rec.pos, 20)
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))
            self.assertEqual(rec.samples["NORMAL"]["GT"], (0, 0))

            with self.assertRaises(StopIteration):
                rec = next(vcf)
            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.format_sanger_pindel_vcf] - Creating tabix index..."
                in serr
            )
            self.assertTrue(
                "[gdc_filtration_tools.format_sanger_pindel_vcf] - Processed 2 records."
                in serr
            )
            self.assertTrue("gdc_filtration_tools.main" in serr)
        finally:
            cleanup_files(fn)
    def test_cli(self):
        ivcf = get_test_data_path("test_nonstandard_variants.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                main(args=["filter-nonstandard-variants", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                record = next(vcf)
                self.assertTrue(record.chrom == "chr1")

                record = next(vcf)
                self.assertTrue(record.chrom == "chr3")

                with self.assertRaises(StopIteration):
                    record = next(vcf)

                vcf.close()
            serr = stderr.getvalue()
            self.assertTrue("Drops non-ACTG loci from a VCF." in serr)
            self.assertTrue("Removing chr2:1:A,R" in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 3 records - Removed 1; Wrote 2" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.filter_nonstandard_variants" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #10
0
 def test_cli(self):
     oxo_vcf = get_test_data_path(
         "test_input_for_add_oxog_filters_from_maf.vcf.gz")
     vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         with captured_output() as (_, stderr):
             main(args=["add-oxog-filters", vcf_file, oxo_vcf, fn])
         vcf = pysam.VariantFile(fn)
         self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"])
         for record in vcf:
             if (record.contig == "chr1" and record.pos == 10
                     and record.alleles == (
                         "A",
                         "T",
                     )):
                 self.assertEqual(record.filter.keys(), ["oxog"])
             else:
                 self.assertEqual(record.filter.keys(), ["PASS"])
         vcf.close()
         serr = stderr.getvalue()
         self.assertTrue(
             "[gdc_filtration_tools.add_oxog_filters] - Creating tabix index"
             in serr)
         self.assertTrue(
             "[gdc_filtration_tools.add_oxog_filters] - Processed 4 records - Tagged 1; Wrote 4"
             in serr)
         self.assertTrue("[gdc_filtration_tools.main] - Finished!" in serr)
     finally:
         cleanup_files(fn)
 def test_parse_options(self):
     '''
     test_parse_options: $ANDROID_PRODUCT_OUT or -c must be provided
     '''
     with captured_output() as (_, err):
         with self.assertRaises(SystemExit) as cm:
             parse_options([])
         exception = cm.exception
         err_msg = err.getvalue()
         self.assertIn(
             'Environment variable $ANDROID_PRODUCT_OUT or parameter "-c" should be provided.',
             err_msg)
         self.assertEquals(2, exception.code)
Пример #12
0
    def test_import_global_variable_invalid(self):
        """
            This is a valid way to import, but you cannot import "importlib"
            and other such libraries. Only ones from the whitelist
        """

        with captured_output() as (out, err):
            with self.assertRaises(CompilationException) as context:
                self.si.execute_code_str("""
from test_contracts.good import balances
print('Hacked', balances) # Should not print this!
                """)
            self.assertEqual(out.getvalue().strip(), '')
    def test_cli(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        obj = FakeOpts(ivcf, fn)
        params = obj.to_cli_list()
        with captured_output() as (_, stderr):
            main(args=params)
        vcf = pysam.VariantFile(fn)
        hdr = vcf.header.copy()
        vcf.close()
        cleanup_files(fn)
        self.validate_header(obj, hdr)

        serr = [i for i in stderr.getvalue().split("\n") if i.rstrip("\r\n")]
        self.assertTrue("gdc_filtration_tools.format_gdc_vcf" in serr[0])
        self.assertTrue("gdc_filtration_tools.main" in serr[-1])
Пример #14
0
    def do_test_snapshot(self, ci_generator, version_input, version_output):
        version_input_full_path = full_path_from_relative_path(version_input)
        version_output_full_path = full_path_from_relative_path(version_output)
        with captured_output() as (out, err):
            with mock.patch(f'{self.class_path}.get_unity_versions_path'
                            ) as mocked_get_unity_versions_path:
                mocked_get_unity_versions_path.return_value = version_input_full_path
                ci_generator.output()
        output = out.getvalue().strip()

        if os.environ.get('UPDATE_SNAPSHOTS'):  # no cover
            with open(version_output_full_path, 'w') as f:  # no cover
                f.write(output)  # no cover

        with open(version_output_full_path) as f:
            self.maxDiff = None
            self.assertEqual(output, f.read())
Пример #15
0
 def test_position_filter_dkfz(self):
     ivcf = get_test_data_path("test_dfkz.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
     try:
         total = 0
         with captured_output() as (_, stderr):
             position_filter_dkfz(ivcf, fn)
             vcf = pysam.VariantFile(fn)
             for record in vcf:
                 total += 1
                 self.assertEqual(record.chrom, "chr2")
             vcf.close()
         self.assertEqual(total, 1)
         serr = stderr.getvalue()
         self.assertTrue("Position Filter for DKFZ." in serr)
         self.assertTrue("Creating tabix index..." in serr)
         self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr)
     finally:
         cleanup_files(fn)
Пример #16
0
    def test_extract_oxoq_from_sqlite(self):
        rec = OxoqRecord("10000", "200", "100", "30.23")
        exp = 30.23
        (fd, fn) = tempfile.mkstemp()

        # Generate test db
        with sqlite3.connect(fn) as conn:
            # make schema
            build_test_schema(conn)

            cur = conn.cursor()
            rec.insert(cur)

        with captured_output() as (stdout, _):
            extract_oxoq_from_sqlite(fn,
                                     input_state="gatk_applybqsr_readgroups")

        cleanup_files(fn)
        sout = float(stdout.getvalue().rstrip("\r\n"))
        self.assertEqual(sout, 30.23)
Пример #17
0
    def test_create_oxog_intervals(self):
        ivcf = get_test_data_path("test.vcf")
        (fd, fn) = tempfile.mkstemp()
        try:
            found = []
            expected = ["chr1:1", "chr2:1"]
            with captured_output() as (_, stderr):
                create_oxog_intervals(ivcf, fn)
                with open(fn, "rt") as fh:
                    for line in fh:
                        found.append(line.rstrip("\r\n"))

            self.assertEqual(len(found), 2)
            self.assertEqual(found, expected)
            serr = stderr.getvalue()
            self.assertTrue(
                "Extracts interval-file for Broad OxoG metrics from VCF." in serr
            )
            self.assertTrue("Processed 2 records" in serr)
        finally:
            cleanup_files(fn)
Пример #18
0
 def test_create_dtoxog_maf(self):
     imets = get_test_data_path("test_oxog_metrics.txt")
     vcf_file = get_test_data_path("test_input_for_dtoxog.vcf")
     fa_file = get_test_data_path("test_oxog_ref.fa")
     (fd, fn) = tempfile.mkstemp()
     try:
         with captured_output() as (_, stderr):
             create_dtoxog_maf(vcf_file, fn, fa_file, imets, 32.0)
             with open(fn, "rt") as fh:
                 self.assertEqual(fh.readline(), "#version 2.4.1\n")
                 header = fh.readline().rstrip("\r\n").split("\t")
                 self.assertEqual(header, MAF_COLUMNS)
                 count = 0
                 for line in fh:
                     dat = dict(zip(header,
                                    line.rstrip("\r\n").split("\t")))
                     self.assertEqual(dat,
                                      TestCreatedToxoGMaf.exp_maf[count])
                     count += 1
         serr = stderr.getvalue().split("\n")
         self.assertTrue("Processed 3 records" in serr[2])
     finally:
         cleanup_files(fn)
Пример #19
0
    def test_cli(self):
        ivcf = get_test_data_path("test_dfkz.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            total = 0
            with captured_output() as (_, stderr):
                main(args=["position-filter-dkfz", ivcf, fn])
                vcf = pysam.VariantFile(fn)
                for record in vcf:
                    total += 1
                    self.assertEqual(record.chrom, "chr2")
                vcf.close()
            self.assertEqual(total, 1)
            serr = stderr.getvalue()
            self.assertTrue("Position Filter for DKFZ." in serr)
            self.assertTrue("Creating tabix index..." in serr)
            self.assertTrue("Processed 2 records - Removed 1; Wrote 1" in serr)

            serr = [i for i in serr.split("\n") if i.rstrip("\r\n")]
            self.assertTrue(
                "gdc_filtration_tools.position_filter_dkfz" in serr[0])
            self.assertTrue("gdc_filtration_tools.main" in serr[-1])
        finally:
            cleanup_files(fn)
Пример #20
0
 def test_add_oxog_filters(self):
     oxo_vcf = get_test_data_path(
         "test_input_for_add_oxog_filters_from_maf.vcf.gz")
     vcf_file = get_test_data_path("test_input_for_add_oxog_filters.vcf")
     (fd, fn) = tempfile.mkstemp(suffix=".vcf")
     try:
         with captured_output() as (_, stderr):
             add_oxog_filters(vcf_file, oxo_vcf, fn)
         vcf = pysam.VariantFile(fn)
         self.assertEqual(vcf.header.filters.keys(), ["PASS", "oxog"])
         for record in vcf:
             if (record.contig == "chr1" and record.pos == 10
                     and record.alleles == (
                         "A",
                         "T",
                     )):
                 self.assertEqual(record.filter.keys(), ["oxog"])
             else:
                 self.assertEqual(record.filter.keys(), ["PASS"])
         vcf.close()
         serr = stderr.getvalue()
         self.assertTrue("Processed 4 records - Tagged 1; Wrote 4" in serr)
     finally:
         cleanup_files(fn)
    def test_format_pindel_vcf(self):
        ivcf = get_test_data_path("pindel_test.vcf")
        (fd, fn) = tempfile.mkstemp(suffix=".vcf.gz")
        try:
            with captured_output() as (_, stderr):
                format_pindel_vcf(ivcf, fn)

            vcf = pysam.VariantFile(fn)
            self.assertEqual(list(vcf.header.samples), ["NORMAL", "TUMOR"])
            rec = next(vcf)
            self.assertEqual(rec.info.get("TYPEOFSV"), "INS")
            with self.assertRaises(ValueError):
                rec.info.get("SVTYPE")
            self.assertFalse(rec.info.get("forcedHet"))
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))

            rec = next(vcf)
            self.assertEqual(rec.info.get("TYPEOFSV"), "INS")
            self.assertTrue(rec.info.get("forcedHet"))
            self.assertEqual(rec.samples["TUMOR"]["GT"], (0, 1))

            with self.assertRaises(StopIteration):
                rec = next(vcf)
            vcf.close()

            serr = stderr.getvalue()
            self.assertTrue(
                "[gdc_filtration_tools.format_pindel_vcf] - Creating tabix index..."
                in serr
            )
            self.assertTrue(
                "[gdc_filtration_tools.format_pindel_vcf] - Processed 2 records."
                in serr
            )
        finally:
            cleanup_files(fn)