def testSampleNameSelectorWithVCF(self): input = VcfInputMutationCreator("testdata/vcf/example.1row.vcf") first_mut = next(input.createMutations()) s = SampleNameSelector(first_mut) expected = ["NA 00001", "NA 00002", "NA 00003"] for mut in input.createMutations(): self.assertIn(s.getSampleName(mut), expected) self.assertEqual(s.getAnnotationSource(), "INPUT") self.assertEquals(s.getOutputAnnotationName(), "sample_name")
def testSampleNameSelectorWithMaf(self): input = MafliteInputMutationCreator( "testdata/maflite/tiny_maflite.maf.txt") first_mut = next(input.createMutations()) s = SampleNameSelector(first_mut) for mut in input.createMutations(): self.assertEqual("Patient0-Normal-Patient0-Tumor", s.getSampleName(mut)) self.assertEqual(s.getAnnotationSource(), "OUTPUT") self.assertEqual(s.getOutputAnnotationName(), MutUtils.SAMPLE_NAME_ANNOTATION_NAME)
def __init__(self, mutations): """ Initialize an new queue with a MutationData iterator :param mutations: any MutationData producing Iterator """ self.mutations = more_itertools.peekable(mutations) self.sns = SampleNameSelector(self.mutations.peek()) self.queue = collections.defaultdict(list) self.indel_queue = [] self.last = 0 self.logger = logging.getLogger(__name__) self.warned_about_order = False
def _writeMuts2Tsv(self, muts, path): """ Given a mutation generator, this methods writes a tab separated file for all mutations in the mutation generator. In addition, this method computes the appropriate sample name in scenarios where the mutation is missing sample name annotation. It also computes a list of all chromosomes and sample names contained within the generator. :param path: temporary filename :param muts: generator object with mutations """ sampleNames = set() chroms = set() writer = None # create a temporary file to write tab-separated file tempTsvFile = tempfile.NamedTemporaryFile(dir=path, delete=False) self.logger.debug("Creating intermediate tsv file at %s" % tempTsvFile.name) mutAttributeNames = [] sampleNameSelector = SampleNameSelector(self.mutation, configFile=self.configTable.getConfigFilename(), section="OTHER") with open(tempTsvFile.name, 'w') as fptr: ctr = 0 sampleNameAnnotationName = sampleNameSelector.getOutputAnnotationName() sampleNameSource = sampleNameSelector.getAnnotationSource() for mut in muts: if len(mutAttributeNames) == 0: mutAttributeNames = mut.getAttributeNames() sampleName = sampleNameSelector.getSampleName(mut) if sampleName is not None: if mut.get(sampleNameAnnotationName, None) is None: mut.createAnnotation(sampleNameAnnotationName, sampleName, sampleNameSource) sampleNames.add(sampleName) # Parse chromosome chroms.add(mut.chr) updated_start, updated_ref_allele, updated_alt_allele = MutUtils.retrieveMutCoordinatesForRendering(mut) mut.ref_allele = updated_ref_allele mut.alt_allele = updated_alt_allele mut.start = updated_start if ctr == 0: fieldnames2Render = MutUtils.getAllAttributeNames(mut) if sampleNameAnnotationName is not None: fieldnames2Render += [sampleNameAnnotationName] for fieldname in fieldnames2Render: # fieldnames that start "_" aren't rendered if fieldname.startswith("_"): fieldnames2Render.remove(fieldname) writer = csv.DictWriter(fptr, fieldnames2Render, extrasaction='ignore', delimiter=self.delimiter, lineterminator=self.lineterminator) writer.writeheader() writer.writerow(mut) ctr += 1 if (ctr % 1000) == 0: self.logger.info("Wrote " + str(ctr) + " mutations to tsv.") sampleNames = list(sampleNames) sampleNames.sort() chroms = list(chroms) return chroms, sampleNames, tempTsvFile.name