def test_validation_correction_valid(self): """ Test that the validation allele fields are determined automatically when not specified by the user for a valid mutation. """ m = MutationDataFactory.default_create() m.chr = "3" m.start = "178948145" m.end = "178948145" m.alt_allele = "A" m.ref_allele = "G" m['validation_status'] = "Valid" m['Match_Norm_Validation_Allele1'] = "" m['Match_Norm_Validation_Allele2'] = "" m['Tumor_Validation_Allele1'] = "" m['Tumor_Validation_Allele2'] = "" m['Mutation_Status'] = "Somatic" output_filename = os.path.join("out", "test_validation_correction2.maf.tsv") outputRenderer = TcgaMafOutputRenderer(output_filename, configFile=os.path.join("configs", "tcgaMAF2.4_output.config")) outputRenderer.renderMutations([m].__iter__()) tsv_reader = GenericTsvReader(output_filename) for line_dict in tsv_reader: self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.") self.assertTrue(line_dict['Tumor_Validation_Allele1'] == line_dict['Reference_Allele'], "Tumor validation allele 1 did not match reference for a valid validation result.") self.assertTrue(line_dict['Tumor_Validation_Allele2'] == line_dict['Tumor_Seq_Allele2'], "Tumor validation allele 2 did not match Tumor_Seq_Allele2 for a valid validation result.") self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele1'], "Tumor allele 1 did not match normal alleles for a valid validation result.") self.assertTrue(line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" %(line_dict['Match_Norm_Validation_Allele1'] ,line_dict['Reference_Allele']) ) self.assertTrue("G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele']) self.assertTrue("A" == line_dict['Tumor_Seq_Allele2'], "Alt allele should have been A, but was " + line_dict['Tumor_Seq_Allele2'])
def testTCGAMAFAsInput(self): """ Test that we can take in a TCGA MAF (using MAFLITE), do no annotations, and still render it properly """ tmp = MafliteInputMutationCreator("testdata/maf/Patient0.maf.annotated", 'configs/maflite_input.config') muts = tmp.createMutations() outputFilename = "out/testTCGAMAFAsInput.tsv" outputRenderer = TcgaMafOutputRenderer(outputFilename, 'configs/tcgaMAF2.4_output.config') outputRenderer.renderMutations(muts, tmp.getComments())
def test_validation_correction(self): """ Test that the validation allele fields are determined automatically when not specified by the user for invalid mutation. """ m = MutationDataFactory.default_create() m.chr = "3" m.start = "178948145" m.end = "178948145" m.alt_allele = "A" m.ref_allele = "G" m['validation_status'] = "Invalid" m['Match_Norm_Validation_Allele1'] = "" m['Match_Norm_Validation_Allele2'] = "" m['Tumor_Validation_Allele1'] = "" m['Tumor_Validation_Allele2'] = "" m['Mutation_Status'] = "Somatic" output_filename = os.path.join("out", "test_validation_correction1.maf.tsv") outputRenderer = TcgaMafOutputRenderer(output_filename, configFile=os.path.join( "configs", "tcgaMAF2.4_output.config")) outputRenderer.renderMutations([m].__iter__()) tsv_reader = GenericTsvReader(output_filename) for line_dict in tsv_reader: self.assertTrue( line_dict['Match_Norm_Validation_Allele1'] == line_dict['Match_Norm_Validation_Allele2'], "Matched norm alleles did not match.") self.assertTrue( line_dict['Tumor_Validation_Allele1'] == line_dict['Tumor_Validation_Allele2'], "Tumor alleles did not match for an invalid validation result." ) self.assertTrue( line_dict['Match_Norm_Validation_Allele1'] == line_dict['Tumor_Validation_Allele2'], "Tumor alleles did not match normal alleles for an invalid validation result." ) self.assertTrue( line_dict['Match_Norm_Validation_Allele1'] == line_dict['Reference_Allele'], "Norm validation alleles did not match reference (norm, reference): (%s, %s)" % (line_dict['Match_Norm_Validation_Allele1'], line_dict['Reference_Allele'])) self.assertTrue( "G" == line_dict['Reference_Allele'], "Reference allele should have been G, but was " + line_dict['Reference_Allele']) self.assertTrue( "None" == line_dict['Mutation_Status'], "Mutation Status must be None when Validation Status is Invalid: " + line_dict['Mutation_Status'])
def testTCGAMAFAsInput(self): """ Test that we can take in a TCGA MAF (using MAFLITE), do no annotations, and still render it properly """ tmp = MafliteInputMutationCreator( "testdata/maf/Patient0.maf.annotated", None, 'configs/maflite_input.config') muts = tmp.createMutations() outputFilename = "out/testTCGAMAFAsInput.tsv" outputRenderer = TcgaMafOutputRenderer( outputFilename, 'configs/tcgaMAF2.4_output.config') outputRenderer.renderMutations(muts, tmp.getComments())
def testInternalFieldsSkipPrepend(self): """ Test that no prepending of "i_" is honored.""" outputFilename = "out/testInternalFields_v2.4.maf.tsv" m = MutationDataFactory.default_create() m.createAnnotation("TEST", "THIS IS A TEST", "TESTING") # The next annotation is real and should not be considered internal. m.createAnnotation("gene", "EGFR") outputRenderer = TcgaMafOutputRenderer( outputFilename, configFile='configs/tcgaMAF2.4_output.config', other_options={OptionConstants.NO_PREPEND: True}) outputRenderer.renderMutations(iter([m]), ['No comments']) configFile = ConfigUtils.createConfigParser( 'configs/tcgaMAF2.4_output.config') requiredColumns = configFile.get("general", "requiredColumns") self.assertTrue( "Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF. If not, the test must be modified." ) statinfo = os.stat(outputFilename) self.assertTrue( statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.") tsvReader = GenericTsvReader(outputFilename) headers = tsvReader.getFieldNames() self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers") self.assertTrue( "i_TEST" not in headers, "i_TEST was found in output headers when prepend was disabled.") self.assertTrue("TEST" in headers, "TEST was not found in output headers.")
def testInternalFieldsSkipPrepend(self): """ Test that no prepending of "i_" is honored.""" outputFilename = "out/testInternalFields_v2.4.maf.tsv" m = MutationDataFactory.default_create() m.createAnnotation("TEST", "THIS IS A TEST", "TESTING") # The next annotation is real and should not be considered internal. m.createAnnotation("gene", "EGFR") outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config', other_options={OptionConstants.NO_PREPEND:True}) outputRenderer.renderMutations(iter([m]), ['No comments']) configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config') requiredColumns = configFile.get("general", "requiredColumns") self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF. If not, the test must be modified.") statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.") tsvReader = GenericTsvReader(outputFilename) headers = tsvReader.getFieldNames() self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers") self.assertTrue("i_TEST" not in headers, "i_TEST was found in output headers when prepend was disabled.") self.assertTrue("TEST" in headers, "TEST was not found in output headers.")
def testInternalFields(self): """ Test that an annotation that is not listed explicitly in the required or optional columns is rendered with i_ prepended """ outputFilename = "out/testInternalFields_v2.4.maf.tsv" m = MutationData() m.createAnnotation("TEST", "THIS IS A TEST", "TESTING") # The next annotation is real and should not be considered internal. m.createAnnotation("gene", "EGFR") outputRenderer = TcgaMafOutputRenderer(outputFilename, configFile='configs/tcgaMAF2.4_output.config') outputRenderer.renderMutations(iter([m]), ['No comments']) configFile = ConfigUtils.createConfigParser('configs/tcgaMAF2.4_output.config') requiredColumns = configFile.get("general", "requiredColumns") self.assertTrue("Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF. If not, the test must be modified.") statinfo = os.stat(outputFilename) self.assertTrue(statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.") tsvReader = GenericTsvReader(outputFilename) headers = tsvReader.getFieldNames() self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers") self.assertTrue("TEST" not in headers, "TEST was found in output headers when it should have been renamed to i_TEST") self.assertTrue("i_TEST" in headers, "i_TEST not found in output headers")
def testInternalFields(self): """ Test that an annotation that is not listed explicitly in the required or optional columns is rendered with i_ prepended """ outputFilename = "out/testInternalFields_v2.4.maf.tsv" m = MutationData() m.createAnnotation("TEST", "THIS IS A TEST", "TESTING") # The next annotation is real and should not be considered internal. m.createAnnotation("gene", "EGFR") outputRenderer = TcgaMafOutputRenderer( outputFilename, configFile='configs/tcgaMAF2.4_output.config') outputRenderer.renderMutations(iter([m]), ['No comments']) configFile = ConfigUtils.createConfigParser( 'configs/tcgaMAF2.4_output.config') requiredColumns = configFile.get("general", "requiredColumns") self.assertTrue( "Hugo_Symbol" in requiredColumns, " This test assumes that Hugo_Symbol is a required column in the TCGA MAF. If not, the test must be modified." ) statinfo = os.stat(outputFilename) self.assertTrue( statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.") tsvReader = GenericTsvReader(outputFilename) headers = tsvReader.getFieldNames() self.assertTrue("Hugo_Symbol" in headers, "Hugo_Symbol not found in output headers") self.assertTrue( "TEST" not in headers, "TEST was found in output headers when it should have been renamed to i_TEST" ) self.assertTrue("i_TEST" in headers, "i_TEST not found in output headers")
def testTCGAMAFAsInputAndQuickAnnotate(self): """ Test that we can take in a TCGA MAF (using MAFLITE), do annotating, and still render it properly """ inputFilename = "testdata/maf/Patient0.maf.annotated" tmp = MafliteInputMutationCreator(inputFilename, 'configs/maflite_input.config') outputFilename = "out/testTCGAMAFAsInputAndQuickAnnotate.tsv" outputRenderer = TcgaMafOutputRenderer( outputFilename, 'configs/tcgaMAF2.4_output.config') annotator = Annotator() annotator.setInputCreator(tmp) annotator.setOutputRenderer(outputRenderer) ds = DatasourceFactory.createDatasource( "testdata/thaga_janakari_gene_ds/hg19/tj_data.config", "testdata/thaga_janakari_gene_ds/hg19/") annotator.addDatasource(ds) annotator.annotate() statinfo = os.stat(outputFilename) self.assertTrue( statinfo.st_size > 0, "Generated MAF file (" + outputFilename + ") is empty.") tsvReaderIn = GenericTsvReader(inputFilename) tsvReader = GenericTsvReader(outputFilename) self.assertTrue(tsvReader.getComments().find('#version') != -1, "First line did not specify a version number") self.assertTrue("i_TJ_Data_Why" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Why) from header") self.assertTrue("i_TJ_Data_Who" in tsvReader.getFieldNames(), "New field missing (i_TJ_Data_Who) from header") ctrOut = 0 for lineDict in tsvReader: ctrOut += 1 ctrIn = 0 for lineDict in tsvReaderIn: ctrIn += 1 ctrIn += len(tsvReaderIn.getCommentsAsList()) ctrOut += len(tsvReader.getCommentsAsList()) self.assertTrue( ctrOut == (ctrIn + 2), "Output file should have same number of lines plus two (for maf version and Oncotator version comments) as input file. (In,Out): " + str(ctrIn) + ", " + str(ctrOut))
def testTCGAMAFRendering(self): """ Tests the ability to render a germline VCF file as a TCGA MAF file. """ inputFilename = os.path.join(*["testdata", "vcf", "example.vcf"]) outputFilename = os.path.join("out", "example.vcf.maf.annotated") creator = VcfInputMutationCreator(inputFilename) creator.createMutations() renderer = TcgaMafOutputRenderer(outputFilename) annotator = Annotator() annotator.setInputCreator(creator) annotator.setOutputRenderer(renderer) annotator.setManualAnnotations(self._createTCGAMAFOverridesForVCF()) datasources = self._createDatasourceCorpus() for ds in datasources: annotator.addDatasource(ds) filename = annotator.annotate() self._validateTcgaMafContents(filename)