示例#1
0
    def test_no_overwriting_muts(self):
        """Ensure that (given configuration that disallows) we cannot annotate from a datasource when a value was specified in the input."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config",
            "testdata/thaga_janakari_gene_ds/hg19/")
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {
            OptionConstants.ALLOW_ANNOTATION_OVERWRITING: False,
            OptionConstants.NO_PREPEND: True
        }

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)

        self.assertRaises(DuplicateAnnotationException, annotator.annotate)
示例#2
0
    def test_overwriting_muts(self):
        """Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config",
            "testdata/thaga_janakari_gene_ds/hg19/")
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {
            OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True,
            OptionConstants.NO_PREPEND: True
        }

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)

        annotator.annotate()

        tsv_reader = GenericTsvReader(output_filename)

        for i, line_dict in enumerate(tsv_reader):
            self.assertTrue(line_dict.get('TJ_Data_Who', "") != "Tromokratis")
示例#3
0
    def test_no_overwriting_muts(self):
        """Ensure that (given configuration that disallows) we cannot annotate from a datasource when a value was specified in the input."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
        )
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: False, OptionConstants.NO_PREPEND: True}

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts,
        )
        annotator = Annotator()
        annotator.initialize(run_spec)

        self.assertRaises(DuplicateAnnotationException, annotator.annotate)
示例#4
0
    def test_overwriting_muts(self):
        """Ensure that (given correct configuration) we can annotate from a datasource, even if the datasource will overwrite an existing mutation."""
        # We will have an input with a "Who" annotation that this datasource will try to write.
        gene_ds = DatasourceFactory.createDatasource(
            "testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/"
        )
        input_filename = "testdata/maflite/who_alt1_vs_alt2.maflite"
        output_filename = "out/who_alt1_vs_alt2.maf.annotated"
        input_format = "MAFLITE"
        output_format = "TCGAMAF"

        other_opts = {OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True, OptionConstants.NO_PREPEND: True}

        run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
            input_format,
            output_format,
            input_filename,
            output_filename,
            datasource_list=[gene_ds],
            other_opts=other_opts,
        )
        annotator = Annotator()
        annotator.initialize(run_spec)

        annotator.annotate()

        tsv_reader = GenericTsvReader(output_filename)

        for i, line_dict in enumerate(tsv_reader):
            self.assertTrue(line_dict.get("TJ_Data_Who", "") != "Tromokratis")
 def test_run_spec_creation_no_datasources(self):
     """Test that we can create a run spec with no datasources"""
     run_spec = RunSpecificationFactory.create_run_spec_given_datasources(input_format="VCF",
                                                                          input_filename="testdata/m2_support/phasingExample.vcf",
                                                                 output_format="TCGAMAF",
                                                                 output_filename="out/foo.maf.annotated",
                                                                 datasource_list=[])
     self.assertTrue(isinstance(run_spec.inputCreator, InputMutationCreator))
     self.assertTrue(isinstance(run_spec.outputRenderer, OutputRenderer))
     self.assertTrue(run_spec.is_allow_annotation_overwriting==False)
示例#6
0
 def test_run_spec_creation_no_datasources(self):
     """Test that we can create a run spec with no datasources"""
     run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
         input_format="VCF",
         input_filename="testdata/m2_support/phasingExample.vcf",
         output_format="TCGAMAF",
         output_filename="out/foo.maf.annotated",
         datasource_list=[])
     self.assertTrue(isinstance(run_spec.inputCreator,
                                InputMutationCreator))
     self.assertTrue(isinstance(run_spec.outputRenderer, OutputRenderer))
     self.assertTrue(run_spec.is_allow_annotation_overwriting == False)
    def test_tcgamaf_invalid_input_file(self):
        """Test a case where TCGAMAF specified as input and we get an error (as we should) for a missing file"""
        is_exception_seen = False
        try:
            run_spec = RunSpecificationFactory.create_run_spec_given_datasources(input_format="TCGAMAF",
                                                                             input_filename="testdata/Idonotexist",
                                                                    output_format="TCGAMAF",
                                                                    output_filename="out/foo.maf.annotated",
                                                                    datasource_list=[])
        except IOError as ie:
            is_exception_seen = True

        self.assertTrue(is_exception_seen)
示例#8
0
    def test_tcgamaf_invalid_input_file(self):
        """Test a case where TCGAMAF specified as input and we get an error (as we should) for a missing file"""
        is_exception_seen = False
        try:
            run_spec = RunSpecificationFactory.create_run_spec_given_datasources(
                input_format="TCGAMAF",
                input_filename="testdata/Idonotexist",
                output_format="TCGAMAF",
                output_filename="out/foo.maf.annotated",
                datasource_list=[])
        except IOError as ie:
            is_exception_seen = True

        self.assertTrue(is_exception_seen)
    def test_reannotating_actual_file(self):
        """Test that we can take in a file, annotate, similar to M2 process (VCF to TCGA MAF no ONPs, then TCGA MAF to TCGA MAF with ONPs) and collapse values."""
        # This test assumes that the numeric values are not being collapsed.
        input_filename = "testdata/m2_support/phasingExample.vcf"
        midpoint_output_filename = "out/m2_support/reannotating_tcga_maf_midpoint.maf.annotated"
        output_filename = "out/m2_support/reannotating_tcga_maf.maf.annotated"

        options_step1 = {OptionConstants.COLLAPSE_FILTER_COLS: True, OptionConstants.NO_PREPEND: False,
                         OptionConstants.SPLIT_ALLELIC_DEPTH: True, OptionConstants.INFER_ONPS: False}

        # Note that this will also test collapsing numeric values.
        options_step2 = {OptionConstants.REANNOTATE_TCGA_MAF_COLS: True, OptionConstants.INFER_ONPS: True,
                   OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True, OptionConstants.NO_PREPEND: False,
                   OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS: True}

        run_spec_step1 = RunSpecificationFactory.create_run_spec("VCF", "TCGAMAF", input_filename, midpoint_output_filename,
                                                                 is_skip_no_alts=True, other_opts=options_step1,
                                                                 datasource_dir=self._determine_db_dir())

        annotator = Annotator()
        annotator.initialize(run_spec_step1)
        annotator.annotate()

        # To speed up this test, use the same datasources from step 1
        ds_list = run_spec_step1.get_datasources()

        tsv_reader = GenericTsvReader(midpoint_output_filename)
        i = -1
        for i, line in enumerate(tsv_reader):
            self.assertTrue(line["i_QSS"].find("|") == -1, "i_QSS annotation should not have a '|' in it in mutation: " + str(i+1))
        self.assertTrue(i == 2, 'Mutation count flawed... should have been three mutations: ' + str(i+1))


        run_spec_step2 = RunSpecificationFactory.create_run_spec_given_datasources("TCGAMAF", "TCGAMAF", midpoint_output_filename, output_filename,
                                                                 other_opts=options_step2, datasource_list=ds_list)

        annotator.initialize(run_spec_step2)
        annotator.annotate()

        gt_alt_count = [80, 7]
        gt_alt_count_full = ["82|80", "7"]
        gt_ref_count = [68, 151]

        # Please note that this is not "68|68" since these were collapsed by ONP combiner.
        gt_ref_count_full = ["68", "151"]

        gt_tumor_f = [.5375, .046]
        gt_tumor_f_full = ["0.538|0.537", "0.046"]

        tsv_reader = GenericTsvReader(output_filename)
        i = -1
        for i, line in enumerate(tsv_reader):
            is_good_prefix = [not ks.startswith('i_i_') for ks in line.keys()]
            self.assertTrue(all(is_good_prefix), "i_i_ prefix found.")
            if i == 0:
                self.assertTrue(line["i_QSS"].find("|") != -1, "i_QSS tag should have a '|' in it for the first mutation")
            self.assertEqual(int(line['t_alt_count']), gt_alt_count[i])
            self.assertEqual(int(line['t_ref_count']), gt_ref_count[i])
            self.assertEqual(float(line['i_tumor_f']), gt_tumor_f[i])

            self.assertEqual(line['i_t_alt_count_full'], gt_alt_count_full[i])
            self.assertEqual(line['i_t_ref_count_full'], gt_ref_count_full[i])
            self.assertEqual(line['i_tumor_f_full'], gt_tumor_f_full[i])

        self.assertTrue(i == 1, 'Mutation count flawed... should have been two mutations: ' + str(i+1))
    def test_reannotating_actual_file(self):
        """Test that we can take in a file, annotate, similar to M2 process (VCF to TCGA MAF no ONPs, then TCGA MAF to TCGA MAF with ONPs) and collapse values."""
        # This test assumes that the numeric values are not being collapsed.
        input_filename = "testdata/m2_support/phasingExample.vcf"
        midpoint_output_filename = "out/m2_support/reannotating_tcga_maf_midpoint.maf.annotated"
        output_filename = "out/m2_support/reannotating_tcga_maf.maf.annotated"

        options_step1 = {
            OptionConstants.COLLAPSE_FILTER_COLS: True,
            OptionConstants.NO_PREPEND: False,
            OptionConstants.SPLIT_ALLELIC_DEPTH: True,
            OptionConstants.INFER_ONPS: False
        }

        # Note that this will also test collapsing numeric values.
        options_step2 = {
            OptionConstants.REANNOTATE_TCGA_MAF_COLS: True,
            OptionConstants.INFER_ONPS: True,
            OptionConstants.ALLOW_ANNOTATION_OVERWRITING: True,
            OptionConstants.NO_PREPEND: False,
            OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS: True
        }

        run_spec_step1 = RunSpecificationFactory.create_run_spec(
            "VCF",
            "TCGAMAF",
            input_filename,
            midpoint_output_filename,
            is_skip_no_alts=True,
            other_opts=options_step1,
            datasource_dir=self._determine_db_dir())

        annotator = Annotator()
        annotator.initialize(run_spec_step1)
        annotator.annotate()

        # To speed up this test, use the same datasources from step 1
        ds_list = run_spec_step1.get_datasources()

        tsv_reader = GenericTsvReader(midpoint_output_filename)
        i = -1
        for i, line in enumerate(tsv_reader):
            self.assertTrue(
                line["i_QSS"].find("|") == -1,
                "i_QSS annotation should not have a '|' in it in mutation: " +
                str(i + 1))
        self.assertTrue(
            i == 2,
            'Mutation count flawed... should have been three mutations: ' +
            str(i + 1))

        run_spec_step2 = RunSpecificationFactory.create_run_spec_given_datasources(
            "TCGAMAF",
            "TCGAMAF",
            midpoint_output_filename,
            output_filename,
            other_opts=options_step2,
            datasource_list=ds_list)

        annotator.initialize(run_spec_step2)
        annotator.annotate()

        gt_alt_count = [80, 7]
        gt_alt_count_full = ["82|80", "7"]
        gt_ref_count = [68, 151]

        # Please note that this is not "68|68" since these were collapsed by ONP combiner.
        gt_ref_count_full = ["68", "151"]

        gt_tumor_f = [.5375, .046]
        gt_tumor_f_full = ["0.538|0.537", "0.046"]

        tsv_reader = GenericTsvReader(output_filename)
        i = -1
        for i, line in enumerate(tsv_reader):
            is_good_prefix = [not ks.startswith('i_i_') for ks in line.keys()]
            self.assertTrue(all(is_good_prefix), "i_i_ prefix found.")
            if i == 0:
                self.assertTrue(
                    line["i_QSS"].find("|") != -1,
                    "i_QSS tag should have a '|' in it for the first mutation")
            self.assertEqual(int(line['t_alt_count']), gt_alt_count[i])
            self.assertEqual(int(line['t_ref_count']), gt_ref_count[i])
            self.assertEqual(float(line['i_tumor_f']), gt_tumor_f[i])

            self.assertEqual(line['i_t_alt_count_full'], gt_alt_count_full[i])
            self.assertEqual(line['i_t_ref_count_full'], gt_ref_count_full[i])
            self.assertEqual(line['i_tumor_f_full'], gt_tumor_f_full[i])

        self.assertTrue(
            i == 1,
            'Mutation count flawed... should have been two mutations: ' +
            str(i + 1))