Exemplos de VcfInputMutationCreator em Python, exemplos de oncotator.input.VcfInputMutationCreator.VcfInputMutationCreator em Python

Exemplo n.º 1

0

Exibir arquivo

    def testSNPsAndIndelStartAndEndPos(self):
        """
        Tests that the start and end positions of SNPs and Indels are parsed as defined by the NCI's MAF specification
        (https://wiki.nci.nih.gov/display/TCGA/Mutation+Annotation+Format+(MAF)+Specification).
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.snps.indels.vcf"])
        outputFilename = os.path.join("out", "example.snps.indels.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        for row in tsvReader:
            if row['start'] == "16890445":
                self.assertEqual(row["end"], "16890445", "The value should be %s but it was %s." % ("16890445",
                                                                                                    row["end"]))
            elif row["start"] == "154524458":
                self.assertEqual(row["end"], "154524459", "The value should be %s but it was %s." % ("154524459",
                                                                                                     row["end"]))
            elif row["start"] == "114189432":
                self.assertEqual(row["end"], "114189433", "The value should be %s but it was %s." % ("114189433",
                                                                                                     row["end"]))

Exemplo n.º 2

0

Exibir arquivo

    def testDuplicateAnnotation(self):
        """
        Tests that the duplicate annotations are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_annotation.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)
        fieldnames = tsvReader.getFieldNames()
        self.assertTrue("variant_status" in fieldnames, "variant_status field is missing in the header.")
        self.assertTrue("sample_variant_status" in fieldnames, "sample_variant_status is missing in the header.")

        row = tsvReader.next()
        self.assertTrue("variant_status" in row, "variant_status field is missing in the row.")
        self.assertTrue("sample_variant_status" in row, "sample_variant_status is missing in the row.")

        self.assertEqual("2", row["variant_status"], "Incorrect value of variant_status.")
        self.assertEqual("0", row["sample_variant_status"], "Incorrect value of sample_variant_status")

Exemplo n.º 3

0

Exibir arquivo

    def testAnnotationRoundTripEmpty(self):
        """Read a VCF, annotate it with no datasources, write it, and read it again without changes"""
        inputFilename = os.path.join(
            *["testdata", "m2_support", "NA12878.ob_filtered.vcf"])
        outputFilename = os.path.join("out",
                                      "test_round_trip_empty_annotated.vcf")

        other_opts = dict()
        other_opts[OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS] = True

        run_spec = RunSpecificationFactory.create_run_spec(
            "VCF",
            "VCF",
            inputFilename,
            outputFilename,
            datasource_dir="THIS_DIR_DOES_NOT_EXIST__",
            genomeBuild="hg19",
            other_opts=other_opts)
        annotator = Annotator()
        annotator.initialize(run_spec)
        annotated_filename = annotator.annotate()

        vcf_input2 = VcfInputMutationCreator(
            annotated_filename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts2 = [m for m in vcf_input2.createMutations()]
        self.assertTrue(len(muts2) > 0)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: SampleNameSelectorTest.py Projeto: Tmacme/oncotator

 def testSampleNameSelectorWithVCF(self):
     input = VcfInputMutationCreator("testdata/vcf/example.1row.vcf")
     first_mut = next(input.createMutations())
     s = SampleNameSelector(first_mut)
     expected = ["NA 00001", "NA 00002", "NA 00003"]
     for mut in input.createMutations():
         self.assertIn(s.getSampleName(mut), expected)
     self.assertEqual(s.getAnnotationSource(), "INPUT")
     self.assertEquals(s.getOutputAnnotationName(), "sample_name")

Exemplo n.º 5

0

Exibir arquivo

    def testFailureWithSpanningDeletion(self):
        """Fail with a spanning deletion unless alternates are being ignored."""
        inputFilename = os.path.join(*["testdata", "simple_vcf_spanning_deletion.vcf"])
        vcf_input = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True))
        muts = vcf_input.createMutations()
        ctr = 0

        for m in muts:
            ctr += 1

Exemplo n.º 6

0

Exibir arquivo

Arquivo: SampleNameSelectorTest.py Projeto: xingtech/oncotator

 def testSampleNameSelectorWithVCF(self):
     input = VcfInputMutationCreator("testdata/vcf/example.1row.vcf")
     first_mut = next(input.createMutations())
     s = SampleNameSelector(first_mut)
     expected = ["NA 00001", "NA 00002", "NA 00003"]
     for mut in input.createMutations():
         self.assertIn(s.getSampleName(mut), expected)
     self.assertEqual(s.getAnnotationSource(), "INPUT")
     self.assertEquals(s.getOutputAnnotationName(), "sample_name")

Exemplo n.º 7

0

Exibir arquivo

    def testNumberGRenderingOfRandomVcf(self):
        inputFilename = os.path.join(*["testdata", "vcf", "number_g.random.vcf"])
        outputFilename = os.path.join("out", "number_g.random.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

Exemplo n.º 8

0

Exibir arquivo

    def testDuplicateAnnotationMetaData(self):
        """
        Tests that the metadata is populated correctly in cases where duplicate annotations are present in the input VCF
        file.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_annotation.vcf"])

        creator = VcfInputMutationCreator(inputFilename)
        md = creator.getMetadata()

        self.assertTrue("variant_status" in md, "variant_status field is missing in metadata.")
        self.assertTrue("sample_variant_status" in md, "sample_variant_status is missing in metadata.")

Exemplo n.º 9

0

Exibir arquivo

    def testSwitchedFieldsWithExampleVcf(self):
        """
        Tests whether the switched tags are ignored.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.bad.switched.fields.vcf"])
        outputFilename = os.path.join("out", "example.switched.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)

Exemplo n.º 10

0

Exibir arquivo

    def testSuccesseWithSpanningDeletion(self):
        """Succeed with a spanning deletion since alternates are being ignored."""
        inputFilename = os.path.join(*["testdata", "simple_vcf_spanning_deletion.vcf"])

        other_options = {InputMutationCreatorOptions.IS_SKIP_ALTS: True}
        vcf_input = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True),
                                            other_options=other_options)
        muts = vcf_input.createMutations()
        ctr = 0

        for m in muts:
            ctr += 1
        self.assertTrue(ctr == 1, "There should only have been one mutation seen, instead saw: " + str(ctr))

Exemplo n.º 11

0

Exibir arquivo

    def testSimpleAnnotationWithAComplexVcf(self):
        """
        Tests the ability to parse a rather complex VCF file without any errors.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "random.vcf"])
        outputFilename = os.path.join("out", "random.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

Exemplo n.º 12

0

Exibir arquivo

    def testGetMetaDataWithNoSampleNameExampleVcf(self):
        """
        Tests to ensure that the metadata can be retrieved even before createMutations has been called.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.sampleName.removed.vcf"])

        creator = VcfInputMutationCreator(inputFilename)
        gtKeys = {'genotype', 'read_depth', 'genotype_quality', 'haplotype_quality', 'q10', 's50', 'samples_number',
                  'depth_across_samples', 'allele_frequency', 'ancestral_allele', 'dbSNP_membership', 'id', 'qual',
                  'hapmap2_membership'}
        md = creator.getMetadata()
        ks = set(md.keys())
        diff = gtKeys.symmetric_difference(ks)
        self.assertTrue(len(diff) == 0, "Missing keys that should have been seen in the metadata: " + str(diff))

Exemplo n.º 13

0

Exibir arquivo

    def testAnnotationWithDuplicateValuesInVcf(self):
        """
        Tests the ability to parse a VCF that contains an INFO, FILTER, and INFO field with the same name.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.duplicate_fields.vcf"])
        outputFilename = os.path.join("out", "example.duplicate_fields2.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

Exemplo n.º 14

0

Exibir arquivo

    def testSimpleAnnotationWithExampleVcf(self):
        """
        Tests the ability to do a simple Gaf 3.0 annotation.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "simpleVCF.Gaf.annotated.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename, [])
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.addDatasource(TestUtils.createTranscriptProviderDatasource(self.config))
        annotator.annotate()

Exemplo n.º 15

0

Exibir arquivo

    def testGenotypeFieldIsHonored(self):
        """
        Tests that no issues arise with genotype values >1 when multiple variants appear on one line.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.severalGTs.vcf"])
        creator = VcfInputMutationCreator(inputFilename)
        muts = creator.createMutations()
        ctr = 0
        for mut in muts:

            if MutUtils.str2bool(mut["alt_allele_seen"]):
                self.assertTrue(mut['sample_name'] != "NA 00001")
                ctr += 1
        self.assertTrue(ctr == 7,
                        str(ctr) + " mutations with alt seen, but expected 7.  './.' should not show as a variant.")

Exemplo n.º 16

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: Tmacme/oncotator

    def testAnnotationWithExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations in a simple TSV.  Missing format fields yield -->""  ".,." --> ","
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "example.out.tsv")
        expectedOutputFilename = os.path.join(
            *["testdata", "vcf", "example.expected.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename,
                                  sep='\t',
                                  header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(
            len(currentColNames.symmetric_difference(expectedColNames)) is 0,
            "Should have the same columns")
        self.assertTrue(
            len(current.index) == len(expected.index),
            "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(
                sum((current[colName] == expected[colName])
                    | (pandas.isnull(current[colName])
                       & pandas.isnull(expected[colName]))) == len(
                           current.index),
                "Should have the same values in column " + colName + ": \n" +
                str(current[colName]) + "\nvs\n" + str(expected[colName]))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: Tmacme/oncotator

    def testMissingFilter(self):
        """
        Tests that the missing FILTER fields are parsed correctly.
        """
        inputFilename = os.path.join(
            *["testdata", "vcf", "example.missing_filters.vcf"])
        outputFilename = os.path.join("out", "example.missing_filters.out.tsv")
        expectedOutputFilename = os.path.join(
            *["testdata", "vcf", "example.expected.missing_filters.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename,
                                  sep='\t',
                                  header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(
            len(currentColNames.symmetric_difference(expectedColNames)) is 0,
            "Should have the same columns")
        self.assertTrue(
            len(current.index) == len(expected.index),
            "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(
                sum((current[colName] == expected[colName])
                    | (pandas.isnull(current[colName])
                       & pandas.isnull(expected[colName]))) == len(
                           current.index),
                "Should have the same values in column " + colName)

Exemplo n.º 18

0

Exibir arquivo

    def testTCGAMAFRendering(self):
        """
        Tests the ability to render a germline VCF file as a TCGA MAF file.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "example.vcf.maf.annotated")

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = TcgaMafOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.setManualAnnotations(self._createTCGAMAFOverridesForVCF())
        datasources = self._createDatasourceCorpus()
        for ds in datasources:
            annotator.addDatasource(ds)
        filename = annotator.annotate()

        self._validateTcgaMafContents(filename)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: Tmacme/oncotator

    def testSplitByNumberOfAltsWithFile(self):
        """
        Tests whether we properly determine that a field is split using an actual file.
        """
        inputFilename = os.path.join(
            *["testdata", "vcf", "example.split.tags.vcf"])
        creator = VcfInputMutationCreator(inputFilename)
        isSplit = dict()
        isSplit['read_depth'] = False
        isSplit['ESP_MAF'] = False
        isSplit['allele_frequency'] = True

        mapVcfFields2Tsv = dict()
        mapVcfFields2Tsv['read_depth'] = 'DP'
        mapVcfFields2Tsv['ESP_MAF'] = 'ESP_MAF'
        mapVcfFields2Tsv['allele_frequency'] = 'AF'

        muts = creator.createMutations()

        vcfReader = vcf.Reader(filename=inputFilename, strict_whitespace=True)

        chrom = None
        pos = None
        variant = None
        for m in muts:
            if (chrom != m['chr']) or (pos != m['start']):
                chrom = m['chr']
                pos = m['start']
                variant = vcfReader.next()

            for annotationName in isSplit.keys():
                if mapVcfFields2Tsv[annotationName] in variant.INFO:
                    a = m.getAnnotation(annotationName)
                    self.assertTrue(
                        (TagConstants.SPLIT
                         in a.getTags()) == isSplit[annotationName],
                        "Is " + annotationName + " split for chrom " + chrom +
                        ", pos " + str(pos) + "? " +
                        str(isSplit[annotationName]) + ", but saw: " +
                        str(TagConstants.SPLIT in a.getTags()))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: Tmacme/oncotator

    def testBasicCreationWithExampleVcf(self):
        """
        Tests the ability to parse an input VCF file can be parsed without any errors.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])

        creator = VcfInputMutationCreator(inputFilename)
        muts = creator.createMutations()

        # You cannot use len(muts), since muts is a generator.
        ctr = 0
        for m in muts:
            ctr += 1
        self.assertTrue(
            ctr == 27,
            "Should have seen 27 (# REF alleles x # samples) mutations, but saw: "
            + str(ctr))
        self.assertTrue((m.chr == "21") and (m.start == 1234569),
                        "Last mutation was not correct: " + str(m))

        # Reminder: muts is a generator, so it has to be reset
        creator.reset()
        muts = creator.createMutations()
        ctr = 0
        for m in muts:
            ctr += 1
        self.assertTrue(
            ctr == 27,
            "Should have seen 27 called mutations, but saw: " + str(ctr))

Exemplo n.º 21

0

Exibir arquivo

    def testAnnotationWithNoSampleNameExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations when the input is a VCF file that has no samples.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.sampleName.removed.vcf"])
        outputFilename = os.path.join("out", "example.sampleName.removed.out.tsv")

        creator = VcfInputMutationCreator(inputFilename)
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

Exemplo n.º 22

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: broadinstitute/oncotator

    def testAnnotationWithExampleVcf(self):
        """
        Tests whether parsed annotations match the actual annotations in a simple TSV.  Missing format fields yield -->""  ".,." --> ","
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])
        outputFilename = os.path.join("out", "example.out.tsv")
        expectedOutputFilename = os.path.join(*["testdata", "vcf", "example.expected.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename, sep='\t', header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(len(currentColNames.symmetric_difference(expectedColNames)) is 0,
                        "Should have the same columns")
        self.assertTrue(len(current.index) == len(expected.index), "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(sum((current[colName] == expected[colName]) | (pandas.isnull(current[colName]) &
                                                                           pandas.isnull(expected[colName]))) ==
                            len(current.index), "Should have the same values in column " + colName + ": \n" +
                            str(current[colName]) + "\nvs\n" + str(expected[colName]))

Exemplo n.º 23

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: broadinstitute/oncotator

    def testBasicCreationWithExampleVcf(self):
        """
        Tests the ability to parse an input VCF file can be parsed without any errors.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"])

        creator = VcfInputMutationCreator(inputFilename)
        muts = creator.createMutations()

        # You cannot use len(muts), since muts is a generator.
        ctr = 0
        for m in muts:
            ctr += 1
        self.assertTrue(ctr == 27, "Should have seen 27 (# REF alleles x # samples) mutations, but saw: " + str(ctr))
        self.assertTrue((m.chr == "21") and (m.start == 1234569), "Last mutation was not correct: " + str(m))

        # Reminder: muts is a generator, so it has to be reset
        creator.reset()
        muts = creator.createMutations()
        ctr = 0
        for m in muts:
            ctr += 1
        self.assertTrue(ctr == 27, "Should have seen 27 called mutations, but saw: " + str(ctr))

Exemplo n.º 24

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: broadinstitute/oncotator

    def testMissingFilter(self):
        """
        Tests that the missing FILTER fields are parsed correctly.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.missing_filters.vcf"])
        outputFilename = os.path.join("out", "example.missing_filters.out.tsv")
        expectedOutputFilename = os.path.join(*["testdata", "vcf", "example.expected.missing_filters.out.tsv"])

        creator = VcfInputMutationCreator(inputFilename)
        creator.createMutations()
        renderer = SimpleOutputRenderer(outputFilename)
        annotator = Annotator()
        annotator.setInputCreator(creator)
        annotator.setOutputRenderer(renderer)
        annotator.annotate()

        tsvReader = GenericTsvReader(outputFilename)

        current = pandas.read_csv(outputFilename, sep='\t', header=len(tsvReader.getCommentsAsList()))
        expected = pandas.read_csv(expectedOutputFilename, sep='\t')

        currentColNames = set()
        for i in range(len(current.columns)):
            currentColNames.add(current.columns[i])

        expectedColNames = set()
        for i in range(len(expected.columns)):
            expectedColNames.add(expected.columns[i])

        self.assertTrue(len(currentColNames.symmetric_difference(expectedColNames)) is 0,
                        "Should have the same columns")
        self.assertTrue(len(current.index) == len(expected.index), "Should have the same number of rows")

        for colName in currentColNames:
            self.assertTrue(sum((current[colName] == expected[colName]) | (pandas.isnull(current[colName]) &
                                                                           pandas.isnull(expected[colName]))) ==
                            len(current.index), "Should have the same values in column " + colName)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: VcfInputMutationCreatorTest.py Projeto: broadinstitute/oncotator

    def testSplitByNumberOfAltsWithFile(self):
        """
        Tests whether we properly determine that a field is split using an actual file.
        """
        inputFilename = os.path.join(*["testdata", "vcf", "example.split.tags.vcf"])
        creator = VcfInputMutationCreator(inputFilename)
        isSplit = dict()
        isSplit['read_depth'] = False
        isSplit['ESP_MAF'] = False
        isSplit['allele_frequency'] = True

        mapVcfFields2Tsv = dict()
        mapVcfFields2Tsv['read_depth'] = 'DP'
        mapVcfFields2Tsv['ESP_MAF'] = 'ESP_MAF'
        mapVcfFields2Tsv['allele_frequency'] = 'AF'

        muts = creator.createMutations()

        vcfReader = vcf.Reader(filename=inputFilename, strict_whitespace=True)

        chrom = None
        pos = None
        variant = None
        for m in muts:
            if (chrom != m['chr']) or (pos != m['start']):
                chrom = m['chr']
                pos = m['start']
                variant = vcfReader.next()

            for annotationName in isSplit.keys():
                if mapVcfFields2Tsv[annotationName] in variant.INFO:
                    a = m.getAnnotation(annotationName)
                    self.assertTrue((TagConstants.SPLIT in a.getTags()) == isSplit[annotationName],
                                    "Is " + annotationName + " split for chrom " + chrom + ", pos " + str(pos) +
                                    "? " + str(isSplit[annotationName]) + ", but saw: " +
                                    str(TagConstants.SPLIT in a.getTags()))

Exemplo n.º 26

0

Exibir arquivo

    def testOverwriteAnnotationsSupported(self):
        """Test that mutations support overwrite annotation in the VCFInputMutationCreator. (white box testing)"""
        inputFilename = os.path.join(*["testdata", "vcf", "example.trailing_whitespace_in_alleles.vcf"])


        vcf_overwriting_disallowed = VcfInputMutationCreator(inputFilename, MutationDataFactory())
        vcf_overwriting_allowed = VcfInputMutationCreator(inputFilename, MutationDataFactory(allow_overwriting=True))

        mutations = vcf_overwriting_disallowed.createMutations()
        for m in mutations:
            self.assertTrue(m._new_required)

        mutations = vcf_overwriting_allowed.createMutations()
        for m in mutations:
            self.assertFalse(m._new_required)

Exemplo n.º 27

0

Exibir arquivo

    def testSimpleRoundTripWithoutAnnotating(self):
        """Read a VCF, write it, and read it again without changes"""
        other_opts = dict()
        other_opts[OptionConstants.COLLAPSE_NUMBER_ANNOTATIONS] = True
        inputFilename = os.path.join(
            *["testdata", "m2_support", "NA12878.ob_filtered.vcf"])
        vcf_input = VcfInputMutationCreator(
            inputFilename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts = [m for m in vcf_input.createMutations()]

        outputFilename = os.path.join("out", "test_round_trip.vcf")
        vcf_output = VcfOutputRenderer(outputFilename, otherOptions=other_opts)
        vcf_output.renderMutations(muts)

        vcf_input2 = VcfInputMutationCreator(
            outputFilename,
            MutationDataFactory(allow_overwriting=True),
            other_options=other_opts)
        muts2 = [m for m in vcf_input2.createMutations()]
        self.assertTrue(len(muts2) > 0)