def test_simple_seg_file_input(self): """Test that we can read in a seg file, do no annotation, and output as SIMPLE_TSV""" inputFilename = "testdata/seg/Patient0.seg.txt" output_filename = "out/test_simple_seg_file_input.tsv" if os.path.exists(output_filename): os.remove(output_filename) ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config') segs = ic.createMutations() i = 1 for i,seg in enumerate(segs): pass self.assertTrue((i+1) == 27, "Found %d segments when there should have been 27." % (i+1)) ic = MafliteInputMutationCreator(inputFilename, 'configs/seg_file_input.config') segs = ic.createMutations() outputRenderer = SimpleOutputRenderer(output_filename, '') outputRenderer.renderMutations(segs) # Now check the output output_reader = GenericTsvReader(output_filename) required_cols = ["Sample", "Num_Probes", "Segment_Mean"] headers = output_reader.getFieldNames() for rcol in required_cols: self.assertTrue(rcol in headers) for line_dict in output_reader: self.assertTrue(line_dict['start'] is not None) self.assertTrue(line_dict['start'].strip() != "") self.assertTrue(line_dict['end'] is not None) self.assertTrue(line_dict['end'].strip() != "")
def test_simple_seg_file_annotations(self): """Test that we can read in a seg file, do GENCODE annotation, and output as SIMPLE_TSV""" inputFilename = "testdata/seg/Patient0.seg.txt" output_filename = "out/test_simple_seg_file_annotations.tsv" if os.path.exists(output_filename): os.remove(output_filename) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() i = 1 for i, seg in enumerate(segs): pass self.assertTrue( (i + 1) == 27, "Found %d segments when there should have been 27." % (i + 1)) ic = MafliteInputMutationCreator(inputFilename, None, 'configs/seg_file_input.config') segs = ic.createMutations() gencode_ds = TestUtils._create_test_gencode_v19_ds( "out/seg_file_gencode_ds") annotator = Annotator() segs_annotated = [] for seg in segs: segs_annotated.append(gencode_ds.annotate_segment(seg)) outputRenderer = SimpleOutputRenderer(output_filename, '') outputRenderer.renderMutations(segs_annotated.__iter__()) # Now check the output output_reader = GenericTsvReader(output_filename) required_cols = ["Sample", "Num_Probes", "Segment_Mean"] headers = output_reader.getFieldNames() for rcol in required_cols: self.assertTrue(rcol in headers) for line_dict in output_reader: self.assertTrue(line_dict['start'] is not None) self.assertTrue(line_dict['start'].strip() != "") self.assertTrue(line_dict['end'] is not None) self.assertTrue(line_dict['end'].strip() != "") self.assertTrue("genes" in line_dict.keys())
def testMulticoreAnnotateFromChunkedFile(self): #TODO: Add unit test that Mutation data is pickle-able inputFile = "testdata/maflite/Patient0.snp.maf.txt" outputFile = "out/testGAFMulticorePatient0.snp.maf.txt" chunkSize = 200 numChunks = 4 gafDatasource = TestUtils.createGafDatasourceProxy(self.config) ic = MafliteInputMutationCreator(inputFile) oc = SimpleOutputRenderer(outputFile) # createChunks muts = ic.createMutations() allAnnotatedChunksFlat = [] are_mutations_remaining = True p = LoggingPool(processes=numChunks) while are_mutations_remaining: chunks = [] for j in xrange(0, numChunks): chunk = [] for i in xrange(0, chunkSize): try: chunk.append(muts.next()) except StopIteration: are_mutations_remaining = False break chunks.append((chunk, gafDatasource)) annotatedChunks = p.map(annotate_mutations_global, chunks) annotatedChunksFlat = self._flattenChunks(annotatedChunks) allAnnotatedChunksFlat.append(annotatedChunksFlat) p.close() p.join() annotatedMuts = chain.from_iterable(allAnnotatedChunksFlat) ctr = 0 oc.renderMutations(annotatedMuts, Metadata()) tsvReader = GenericTsvReader(outputFile) for line in tsvReader: ctr += 1 self.assertTrue(ctr == 730, "Should have read 730 variants, but read " + str(ctr))