def testFullSnpVcf(self): """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created. """ outputFilename = "out/TCGAVCFTest.snp.vcf" callStatsIn = MafliteInputMutationCreator( "testdata/Test.call_stats.trim.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) maflite_ic = MafliteInputMutationCreator( "testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) for i, m in enumerate(muts): rec = vcf_reader.next() qual = rec.QUAL # All records should have QUAL with a value (i.e. NOT ".") self.assertIsNotNone(qual)
def testManualAnnotations(self): """ Test that the manual annotation facility in the Annotator is working properly. """ annotator = Annotator() overrides = {'source': 'Capture', 'status': 'Somatic', 'phase': 'Phase_I', 'sequencer': 'Illumina GAIIx'} annotator.setManualAnnotations(overrides) inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt') outputRenderer = SimpleOutputRenderer("out/testManualAnnotationsFile.tsv") annotator.setInputCreator(inputCreator) annotator.setOutputRenderer(outputRenderer) testOutputFilename = annotator.annotate() keysOfInterest = overrides.keys() statinfo = os.stat(testOutputFilename) self.assertTrue(statinfo.st_size > 0, "Generated TSV file (" + testOutputFilename + ") is empty.") tsvReader = GenericTsvReader(testOutputFilename) ctr = 1 for lineDict in tsvReader: for k in keysOfInterest: self.assertTrue(lineDict[k] != "__UNKNOWN__", "__UNKNOWN__ value seen on line " + str(ctr) + ", when it should be populated: " + k) self.assertTrue(lineDict[k] != "", "Blank value seen on line " + str(ctr) + ", when it should be populated: " + k) self.assertTrue(lineDict[k] == overrides[k], "Value for " + k + " on line " + str(ctr) + " did not match override: " + str( lineDict[k]) + " <> " + str(overrides[k])) ctr += 1
def testFullIndelVcf(self): """ Perform test of a Indel maflite all the way through TCGA VCF creation """ outputFilename = "out/TCGAVCFTest.indel.vcf" callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) # Check that the deletions have position decremented by one from what is present in the maflite # Checking that 1 36643701 in the maflite (a deletion) becomes 1 36643700 in the vcf, but that the others are # the same. maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) vcf_pos = [int(rec.POS) for rec in vcf_reader] for m in muts: # If the variant is a deletion, then the vcf position should be the same as maflite minus one. Otherwise, the same. is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".") if is_variant_deletion: self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start) else: self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
def testManualAnnotations(self): """ Test that the manual annotation facility in the Annotator is working properly. """ annotator = Annotator() overrides = {'source': 'Capture', 'status': 'Somatic', 'phase': 'Phase_I', 'sequencer': 'Illumina GAIIx'} annotator.setManualAnnotations(overrides) inputCreator = MafliteInputMutationCreator('testdata/maflite/Patient0.snp.maf.txt') outputRenderer = SimpleOutputRenderer("out/testManualAnnotationsFile.tsv") annotator.setInputCreator(inputCreator) annotator.setOutputRenderer(outputRenderer) testOutputFilename = annotator.annotate() keysOfInterest = overrides.keys() statinfo = os.stat(testOutputFilename) self.assertTrue(statinfo.st_size > 0, "Generated TSV file (" + testOutputFilename + ") is empty.") tsvReader = GenericTsvReader(testOutputFilename) ctr = 1 for lineDict in tsvReader: for k in keysOfInterest: self.assertTrue(lineDict[k] != "__UNKNOWN__", "__UNKNOWN__ value seen on line " + str(ctr) + ", when it should be populated: " + k) self.assertTrue(lineDict[k] != "", "Blank value seen on line " + str(ctr) + ", when it should be populated: " + k) self.assertTrue(lineDict[k] == overrides[k], "Value for " + k + " on line " + str(ctr) + " did not match override: " + str( lineDict[k]) + " <> " + str(overrides[k])) ctr += 1
def testFullIndelVcf(self): """ Perform test of a Indel maflite all the way through TCGA VCF creation """ outputFilename = "out/TCGAVCFTest.indel.vcf" callStatsIn = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) # Check that the deletions have position decremented by one from what is present in the maflite # Checking that 1 36643701 in the maflite (a deletion) becomes 1 36643700 in the vcf, but that the others are # the same. maflite_ic = MafliteInputMutationCreator("testdata/maflite/Patient0.indel.maf.txt") muts = maflite_ic.createMutations() vcf_reader = vcf.Reader(open(outputFilename, 'r')) vcf_pos = [int(rec.POS) for rec in vcf_reader] for m in muts: # If the variant is a deletion, then the vcf position should be the same as maflite minus one. Otherwise, the same. is_variant_deletion = (m.alt_allele == "") or (m.alt_allele == "-") or (m.alt_allele == ".") if is_variant_deletion: self.assertTrue((int(m.start) - 1) in vcf_pos, "Deletion was not correct for " + m.chr + ":" + m.start) else: self.assertTrue(int(m.start) in vcf_pos, "Insertion was not correct for " + m.chr + ":" + m.start)
def testMafInput(self): """Make sure that we can render a TCGA VCF from a TCGA MAF -- using no datasources""" inputFile = "testdata/maf/Patient1.snp.maf.annotated" outputFilename = "out/maf2tcgavcf.vcf" mafIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) annotator = Annotator() annotator.setInputCreator(mafIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testMafInput(self): """Make sure that we can render a TCGA VCF from a TCGA MAF -- using no datasources""" inputFile = "testdata/maf/Patient1.snp.maf.annotated" outputFilename = "out/maf2tcgavcf.vcf" mafIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) annotator = Annotator() annotator.setInputCreator(mafIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testFullSnpVcf(self): """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created. """ outputFilename = "out/TCGAVCFTest.snp.vcf" callStatsIn = MafliteInputMutationCreator("testdata/Test.call_stats.trim.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename))
def testFullSnpVcf(self): """ Perform test of a SNP call stats (maflite) all the way through TCGA VCF creation. Only checks that a file was created. """ outputFilename = "out/TCGAVCFTest.snp.vcf" callStatsIn = MafliteInputMutationCreator("testdata/Test.call_stats.trim.txt") vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename))
def testEmptyInput(self): """Make sure that we can generate an empty vcf from an empty maflite""" inputFile = "testdata/maflite/empty.maflite" outputFilename = "out/empty.vcf" callStatsIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testAnotherFullSNP(self): """Test SNP call stats . Just make sure no exception is thrown.""" inputFile = "testdata/maflite/Another.call_stats.txt" outputFilename = "out/Another.call_stats.out.vcf" callStatsIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testEmptyInput(self): """Make sure that we can generate an empty vcf from an empty maflite""" inputFile = "testdata/maflite/empty.maflite" outputFilename = "out/empty.vcf" callStatsIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testAnotherFullSNP(self): """Test SNP call stats . Just make sure no exception is thrown.""" inputFile = "testdata/maflite/Another.call_stats.txt" outputFilename = "out/Another.call_stats.out.vcf" callStatsIn = MafliteInputMutationCreator(inputFile) vcfOR = TcgaVcfOutputRenderer(outputFilename) datasources = self._createDatasourcesForTesting() annotator = Annotator() annotator.setInputCreator(callStatsIn) annotator.setOutputRenderer(vcfOR) annotator.setManualAnnotations(self._createManualAnnotations()) for ds in datasources: annotator.addDatasource(ds) annotator.annotate() self.assertTrue(os.path.exists(outputFilename)) statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated VCF file (" + outputFilename + ") is empty.")
def testTCGAMAFRendering(self): """ Tests the ability to render a germline VCF file as a TCGA MAF file. """ inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"]) outputFilename = os.path.join("out", "example.vcf.maf.annotated") creator = VcfInputMutationCreator(inputFilename) creator.createMutations() renderer = TcgaMafOutputRenderer(outputFilename) annotator = Annotator() annotator.setInputCreator(creator) annotator.setOutputRenderer(renderer) annotator.setManualAnnotations(self._createTCGAMAFOverridesForVCF()) datasources = self._createDatasourceCorpus() for ds in datasources: annotator.addDatasource(ds) filename = annotator.annotate() self._validateTcgaMafContents(filename)