def testSampleNameSelectorWithMaf(self):
     input = MafliteInputMutationCreator("testdata/maflite/tiny_maflite.maf.txt")
     first_mut = next(input.createMutations())
     s = SampleNameSelector(first_mut)
     for mut in input.createMutations():
         self.assertEqual("Patient0-Normal-Patient0-Tumor", s.getSampleName(mut))
     self.assertEqual(s.getAnnotationSource(),"OUTPUT")
     self.assertEqual(s.getOutputAnnotationName(), MutUtils.SAMPLE_NAME_ANNOTATION_NAME)
 def testSampleNameSelectorWithVCF(self):
     input = VcfInputMutationCreator("testdata/vcf/example.1row.vcf")
     first_mut = next(input.createMutations())
     s = SampleNameSelector(first_mut)
     expected = ["NA 00001", "NA 00002", "NA 00003"]
     for mut in input.createMutations():
         self.assertIn(s.getSampleName(mut), expected)
     self.assertEqual(s.getAnnotationSource(), "INPUT")
     self.assertEquals(s.getOutputAnnotationName(), "sample_name")
 def testSampleNameSelectorWithVCF(self):
     input = VcfInputMutationCreator("testdata/vcf/example.1row.vcf")
     first_mut = next(input.createMutations())
     s = SampleNameSelector(first_mut)
     expected = ["NA 00001", "NA 00002", "NA 00003"]
     for mut in input.createMutations():
         self.assertIn(s.getSampleName(mut), expected)
     self.assertEqual(s.getAnnotationSource(), "INPUT")
     self.assertEquals(s.getOutputAnnotationName(), "sample_name")
 def testSampleNameSelectorWithMaf(self):
     input = MafliteInputMutationCreator(
         "testdata/maflite/tiny_maflite.maf.txt")
     first_mut = next(input.createMutations())
     s = SampleNameSelector(first_mut)
     for mut in input.createMutations():
         self.assertEqual("Patient0-Normal-Patient0-Tumor",
                          s.getSampleName(mut))
     self.assertEqual(s.getAnnotationSource(), "OUTPUT")
     self.assertEqual(s.getOutputAnnotationName(),
                      MutUtils.SAMPLE_NAME_ANNOTATION_NAME)
示例#5
0
    def _writeMuts2Tsv(self, muts, path):
        """
        Given a mutation generator, this methods writes a tab separated file for all mutations in the mutation
        generator. In addition, this method computes the appropriate sample name in scenarios where the mutation is
        missing sample name annotation. It also computes a list of all chromosomes and sample names contained within
        the generator.

        :param path: temporary filename
        :param muts: generator object with mutations
        """

        sampleNames = set()
        chroms = set()

        writer = None

        # create a temporary file to write tab-separated file
        tempTsvFile = tempfile.NamedTemporaryFile(dir=path, delete=False)
        self.logger.debug("Creating intermediate tsv file at %s" % tempTsvFile.name)

        mutAttributeNames = []
        sampleNameSelector = SampleNameSelector(self.mutation,
                                                configFile=self.configTable.getConfigFilename(),
                                                section="OTHER")

        with open(tempTsvFile.name, 'w') as fptr:
            ctr = 0
            sampleNameAnnotationName = sampleNameSelector.getOutputAnnotationName()
            sampleNameSource = sampleNameSelector.getAnnotationSource()

            for mut in muts:
                if len(mutAttributeNames) == 0:
                    mutAttributeNames = mut.getAttributeNames()

                sampleName = sampleNameSelector.getSampleName(mut)
                if sampleName is not None:
                    if mut.get(sampleNameAnnotationName, None) is None:
                        mut.createAnnotation(sampleNameAnnotationName, sampleName, sampleNameSource)
                    sampleNames.add(sampleName)

                # Parse chromosome
                chroms.add(mut.chr)

                updated_start, updated_ref_allele, updated_alt_allele = MutUtils.retrieveMutCoordinatesForRendering(mut)
                mut.ref_allele = updated_ref_allele
                mut.alt_allele = updated_alt_allele
                mut.start = updated_start

                if ctr == 0:
                    fieldnames2Render = MutUtils.getAllAttributeNames(mut)
                    if sampleNameAnnotationName is not None:
                        fieldnames2Render += [sampleNameAnnotationName]
                    for fieldname in fieldnames2Render:  # fieldnames that start "_" aren't rendered
                        if fieldname.startswith("_"):
                            fieldnames2Render.remove(fieldname)

                    writer = csv.DictWriter(fptr, fieldnames2Render, extrasaction='ignore', delimiter=self.delimiter,
                                            lineterminator=self.lineterminator)
                    writer.writeheader()

                writer.writerow(mut)

                ctr += 1
                if (ctr % 1000) == 0:
                    self.logger.info("Wrote " + str(ctr) + " mutations to tsv.")

        sampleNames = list(sampleNames)
        sampleNames.sort()
        chroms = list(chroms)

        return chroms, sampleNames, tempTsvFile.name