示例#1
0
    def _writeMuts2Tsv(self, muts, path):
        """
        Given a mutation generator, this methods writes a tab separated file for all mutations in the mutation
        generator. In addition, this method computes the appropriate sample name in scenarios where the mutation is
        missing sample name annotation. It also computes a list of all chromosomes and sample names contained within
        the generator.

        :param path: temporary filename
        :param muts: generator object with mutations
        """

        sampleNames = set()
        chroms = set()

        writer = None

        # create a temporary file to write tab-separated file
        tempTsvFile = tempfile.NamedTemporaryFile(dir=path, delete=False)
        self.logger.debug("Creating intermediate tsv file at %s" % tempTsvFile.name)

        mutAttributeNames = []
        sampleNameSelector = SampleNameSelector(self.mutation,
                                                configFile=self.configTable.getConfigFilename(),
                                                section="OTHER")

        with open(tempTsvFile.name, 'w') as fptr:
            ctr = 0
            sampleNameAnnotationName = sampleNameSelector.getOutputAnnotationName()
            sampleNameSource = sampleNameSelector.getAnnotationSource()

            for mut in muts:
                if len(mutAttributeNames) == 0:
                    mutAttributeNames = mut.getAttributeNames()

                sampleName = sampleNameSelector.getSampleName(mut)
                if sampleName is not None:
                    if mut.get(sampleNameAnnotationName, None) is None:
                        mut.createAnnotation(sampleNameAnnotationName, sampleName, sampleNameSource)
                    sampleNames.add(sampleName)

                # Parse chromosome
                chroms.add(mut.chr)

                updated_start, updated_ref_allele, updated_alt_allele = MutUtils.retrieveMutCoordinatesForRendering(mut)
                mut.ref_allele = updated_ref_allele
                mut.alt_allele = updated_alt_allele
                mut.start = updated_start

                if ctr == 0:
                    fieldnames2Render = MutUtils.getAllAttributeNames(mut)
                    if sampleNameAnnotationName is not None:
                        fieldnames2Render += [sampleNameAnnotationName]
                    for fieldname in fieldnames2Render:  # fieldnames that start "_" aren't rendered
                        if fieldname.startswith("_"):
                            fieldnames2Render.remove(fieldname)

                    writer = csv.DictWriter(fptr, fieldnames2Render, extrasaction='ignore', delimiter=self.delimiter,
                                            lineterminator=self.lineterminator)
                    writer.writeheader()

                writer.writerow(mut)

                ctr += 1
                if (ctr % 1000) == 0:
                    self.logger.info("Wrote " + str(ctr) + " mutations to tsv.")

        sampleNames = list(sampleNames)
        sampleNames.sort()
        chroms = list(chroms)

        return chroms, sampleNames, tempTsvFile.name
示例#2
0
    def _writeMuts2Tsv(self, muts, path):
        """
        Given a mutation generator, this methods writes a tab separated file for all mutations in the mutation
        generator. In addition, this method computes the appropriate sample name in scenarios where the mutation is
        missing sample name annotation. It also computes a list of all chromosomes and sample names contained within
        the generator.

        :param filename: temporary filename
        :param muts: generator object with mutations
        """

        sampleNames = set()
        chroms = set()

        writer = None

        # create a temporary file to write tab-separated file
        tempTsvFile = tempfile.NamedTemporaryFile(dir=path, delete=False)
        self.logger.info("Creating intermediate tsv file...")

        sampleNameAnnotationNames = self.getAnnotationNames("SAMPLE_NAME")
        tumorSampleNameAnnotationNames = self.getAnnotationNames("SAMPLE_TUMOR_NAME")
        normalSampleNameAnnotationNames = self.getAnnotationNames("SAMPLE_NORMAL_NAME")

        mutAttributeNames = []

        with open(tempTsvFile.name, 'w') as fptr:
            ctr = 0
            for mut in muts:

                sampleName = None
                sampleNameAnnotationName = None

                if len(mutAttributeNames) == 0:
                    mutAttributeNames = mut.getAttributeNames()

                # Sample name annotation is present
                if len(sampleNameAnnotationNames) != 0:
                    sampleNameAnnotationName = sampleNameAnnotationNames[0]
                    sampleName = mut[sampleNameAnnotationName]
                # Both, tumor and normal sample name annotations are present
                elif len(tumorSampleNameAnnotationNames) != 0 and len(normalSampleNameAnnotationNames) != 0:
                    tumorSampleNameAnnotationName = tumorSampleNameAnnotationNames[0]
                    normalSampleNameAnnotationName = normalSampleNameAnnotationNames[0]
                    sampleName = string.join([mut[normalSampleNameAnnotationName],
                                              mut[tumorSampleNameAnnotationName]], sep="-")
                    sampleNameAnnotationName = MutUtils.SAMPLE_NAME_ANNOTATION_NAME
                    mut.createAnnotation(sampleNameAnnotationName, sampleName, "OUTPUT")
                    if ctr == 0:
                        self.logger.info("Sample name is the concatenation of %s and %s columns."
                                         % (normalSampleNameAnnotationName, tumorSampleNameAnnotationName))
                # Only tumor sample name is present
                elif len(tumorSampleNameAnnotationNames) != 0:
                    tumorSampleNameAnnotationName = tumorSampleNameAnnotationNames[0]
                    sampleName = mut[tumorSampleNameAnnotationName]
                    sampleNameAnnotationName = MutUtils.SAMPLE_NAME_ANNOTATION_NAME
                    mut.createAnnotation(sampleNameAnnotationName, sampleName, "INPUT")
                    if ctr == 0:
                        self.logger.info("Sample name is %s column." % tumorSampleNameAnnotationName)
                # Only normal sample name is present
                elif len(normalSampleNameAnnotationNames) != 0:
                    normalSampleNameAnnotationName = normalSampleNameAnnotationNames[0]
                    sampleName = mut[normalSampleNameAnnotationName]
                    sampleNameAnnotationName = MutUtils.SAMPLE_NAME_ANNOTATION_NAME
                    mut.createAnnotation(sampleNameAnnotationName, sampleName, "INPUT")
                    if ctr == 0:
                        self.logger.info("Sample name is %s column." % normalSampleNameAnnotationName)

                if sampleName is not None:
                    sampleNames.add(sampleName)

                # Parse chromosome
                chroms.add(mut.chr)

                updated_start, updated_ref_allele, updated_alt_allele = MutUtils.retrieveMutCoordinatesForRendering(mut)
                mut.ref_allele = updated_ref_allele
                mut.alt_allele = updated_alt_allele
                mut.start = updated_start

                if ctr == 0:
                    fieldnames2Render = MutUtils.getAllAttributeNames(mut)
                    if sampleNameAnnotationName is not None:
                        fieldnames2Render += [sampleNameAnnotationName]
                    for fieldname in fieldnames2Render:  # fieldnames that start "_" aren't rendered
                        if fieldname.startswith("_"):
                            fieldnames2Render.remove(fieldname)

                    writer = csv.DictWriter(fptr, fieldnames2Render, extrasaction='ignore', delimiter=self.delimiter,
                                            lineterminator=self.lineterminator)
                    writer.writeheader()

                writer.writerow(mut)

                ctr += 1
                if (ctr % 1000) == 0:
                    self.logger.info("Wrote " + str(ctr) + " mutations to tsv.")

        sampleNames = list(sampleNames)
        sampleNames.sort()
        chroms = list(chroms)

        return chroms, sampleNames, tempTsvFile.name