Exemplos de collect_multiple_metrics em Python, exemplos de bfx.picard.collect_multiple_metrics em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: run_processing_aligner.py Projeto: marquispa/test

    def _picard_rna_metrics(readset):
        """
        Computes a series of quality control metrics using both CollectRnaSeqMetrics and CollectAlignmentSummaryMetrics
        functions metrics are collected using [Picard](http://broadinstitute.github.io/picard/).
        """

        jobs = []
        sample = readset.sample

        alignment_file = readset.bam + ".bam"
        output_directory = os.path.dirname(alignment_file)

        job = picard.collect_multiple_metrics(
            alignment_file,
            readset.bam + ".metrics",
            reference_sequence=readset.reference_file)
        job.name = "picard_collect_multiple_metrics." + readset.name + ".met" + "." + readset.run + "." + readset.lane
        jobs.append(job)

        if len(readset.annotation_files) > 2 and os.path.isfile(
                readset.annotation_files[2]):
            job = picard.collect_rna_metrics(
                alignment_file, os.path.join(output_directory, sample.name),
                readset.annotation_files[2], readset.reference_file)

            job.name = "picard_rna_metrics." + readset.name + ".rmet" + "." + readset.run + "." + readset.lane
            jobs.append(job)

        return jobs

Exemplo n.º 2

0

Exibir arquivo

Arquivo: run_processing_aligner.py Projeto: WenchaoLin/mugqic-demo

    def _picard_rna_metrics(readset):
        """
        Computes a series of quality control metrics using both CollectRnaSeqMetrics and CollectAlignmentSummaryMetrics
        functions metrics are collected using [Picard](http://broadinstitute.github.io/picard/).
        """

        jobs = []
        sample = readset.sample

        alignment_file = readset.bam + ".bam"
        output_directory = os.path.dirname(alignment_file)

        job = picard.collect_multiple_metrics(alignment_file, readset.bam + ".metrics",
                                              reference_sequence=readset.reference_file)
        job.name = "picard_collect_multiple_metrics." + readset.name + ".met" + "." + readset.run + "." + readset.lane
        jobs.append(job)

        if len(readset.annotation_files) > 2 and os.path.isfile(readset.annotation_files[2]):
            job = picard.collect_rna_metrics(alignment_file,
                                             os.path.join(output_directory, sample.name),
                                             readset.annotation_files[2],
                                             readset.reference_file)

            job.name = "picard_rna_metrics." + readset.name + ".rmet" + "." + readset.run + "." + readset.lane
            jobs.append(job)

        return jobs

Exemplo n.º 3

0

Exibir arquivo

Arquivo: run_processing_aligner.py Projeto: WenchaoLin/mugqic-demo

    def get_metrics_jobs(self, readset):
        jobs = []

        input_file_prefix = readset.bam + '.'
        input = input_file_prefix + "bam"

        job = picard.collect_multiple_metrics(input, input_file_prefix + "metrics",
                                              reference_sequence=readset.reference_file)
        job.name = "picard_collect_multiple_metrics." + readset.name + ".met" + "." + readset.run + "." + readset.lane
        jobs.append(job)

        if readset.beds:
            coverage_bed = readset.beds[0]
            full_coverage_bed = (self.output_dir + os.sep + coverage_bed)
        else:
            coverage_bed = None
            full_coverage_bed = None

        if coverage_bed:
            if (not os.path.exists(full_coverage_bed)) and \
                    (coverage_bed not in BwaRunProcessingAligner.downloaded_bed_files):
                # Download the bed file
                command = config.param('DEFAULT', 'fetch_bed_file_command').format(
                    output_directory=self.output_dir,
                    filename=coverage_bed
                )
                job = Job([], [full_coverage_bed], command=command, name="bed_download." + coverage_bed)
                BwaRunProcessingAligner.downloaded_bed_files.append(coverage_bed)
                jobs.append(job)

            interval_list = re.sub("\.[^.]+$", ".interval_list", coverage_bed)

            if interval_list not in BwaRunProcessingAligner.created_interval_lists:
                # Create one job to generate the interval list from the bed file
                ref_dict = os.path.splitext(readset.reference_file)[0] + '.dict'
                job = tools.bed2interval_list(ref_dict, full_coverage_bed, interval_list)
                job.name = "interval_list." + coverage_bed
                BwaRunProcessingAligner.created_interval_lists.append(interval_list)
                jobs.append(job)

            job = picard.calculate_hs_metrics(input_file_prefix + "bam", input_file_prefix + "metrics.onTarget.txt",
                                              interval_list, reference_sequence=readset.reference_file)
            job.name = "picard_calculate_hs_metrics." + readset.name + ".hs" + "." + readset.run + "." + readset.lane
            jobs.append(job)

        jobs.extend(self.verify_bam_id(readset, full_coverage_bed))

        job = bvatools.depth_of_coverage(
            input,
            input_file_prefix + "metrics.targetCoverage.txt",
            full_coverage_bed,
            other_options=config.param('bvatools_depth_of_coverage', 'other_options', required=False),
            reference_genome=readset.reference_file
        )
        job.name = "bvatools_depth_of_coverage." + readset.name + ".doc" + "." + readset.run + "." + readset.lane
        jobs.append(job)

        return jobs

Exemplo n.º 4

0

Exibir arquivo

Arquivo: puure.py Projeto: bdamerac/Epigene-450k

    def metrics(self):
        jobs = []
        for sample in self.samples:
            file_prefix = os.path.join("alignment", sample.name, sample.name + ".sorted.dup.")
            input = file_prefix + "bam"

            job = picard.collect_multiple_metrics(input, file_prefix + "all.metrics")
            job.name = "picard_collect_multiple_metrics." + sample.name
            jobs.append(job)

            # Compute genome coverage
            job = gatk.depth_of_coverage(input, file_prefix + "all.coverage")
            job.name = "gatk_depth_of_coverage.genome." + sample.name
            jobs.append(job)

            job = igvtools.compute_tdf(input, input + ".tdf")
            job.name = "igvtools_compute_tdf." + sample.name
            jobs.append(job)
        return jobs

Exemplo n.º 5

0

Exibir arquivo

Arquivo: rnaseq.py Projeto: WenchaoLin/mugqic-demo

 def picard_rna_metrics(self):
     """
     Computes a series of quality control metrics using both CollectRnaSeqMetrics and CollectAlignmentSummaryMetrics functions
     metrics are collected using [Picard](http://broadinstitute.github.io/picard/).
     """
     
     jobs = []
     reference_file = config.param('picard_rna_metrics', 'genome_fasta', type='filepath')
     for sample in self.samples:
             alignment_file = os.path.join("alignment", sample.name, sample.name + ".sorted.mdup.bam")
             output_directory = os.path.join("metrics", sample.name)
             
             job = concat_jobs([
                     Job(command="mkdir -p " + output_directory, removable_files=[output_directory]),
                     picard.collect_multiple_metrics(alignment_file, os.path.join(output_directory,sample.name),reference_file),
                     picard.collect_rna_metrics(alignment_file, os.path.join(output_directory,sample.name+".picard_rna_metrics"))
             ],name="picard_rna_metrics."+ sample.name)
             jobs.append(job)
     
     return jobs

Exemplo n.º 6

0

Exibir arquivo

    def picard_rna_metrics(self):
        """
        Computes a series of quality control metrics using both CollectRnaSeqMetrics and CollectAlignmentSummaryMetrics functions
        metrics are collected using [Picard](http://broadinstitute.github.io/picard/).
        """

        jobs = []
        reference_file = config.param('picard_rna_metrics', 'genome_fasta', type='filepath')
        for sample in self.samples:
                alignment_file = os.path.join("alignment", sample.name, sample.name + ".sorted.mdup.bam")
                output_directory = os.path.join("metrics", sample.name)

                job = concat_jobs([
                        Job(command="mkdir -p " + output_directory, removable_files=[output_directory]),
                        picard.collect_multiple_metrics(alignment_file, os.path.join(output_directory,sample.name),reference_file),
                        picard.collect_rna_metrics(alignment_file, os.path.join(output_directory,sample.name+".picard_rna_metrics"))
                ],name="picard_rna_metrics."+ sample.name)
                jobs.append(job)

        return jobs

Exemplo n.º 7

0

Exibir arquivo

    def metrics(self):
        """
        Compute metrics and generate coverage tracks per sample. Multiple metrics are computed at this stage:
        Number of raw reads, Number of filtered reads, Number of aligned reads, Number of duplicate reads,
        Median, mean and standard deviation of insert sizes of reads after alignment, percentage of bases
        covered at X reads (%_bases_above_50 means the % of exons bases which have at least 50 reads)
        whole genome or targeted percentage of bases covered at X reads (%_bases_above_50 means the % of exons
        bases which have at least 50 reads). A TDF (.tdf) coverage track is also generated at this step
        for easy visualization of coverage in the IGV browser.
        """

        jobs = []
        for sample in self.samples:
            input_file_prefix = os.path.join(
                "alignment", sample.name, sample.name + ".matefixed.sorted.")
            input = input_file_prefix + "bam"

            job = picard.collect_multiple_metrics(
                input, input_file_prefix + "all.metrics")
            job.name = "picard_collect_multiple_metrics." + sample.name
            job.samples = [sample]
            jobs.append(job)

            # Compute genome or target coverage with BVATools
            job = bvatools.depth_of_coverage(
                input,
                input_file_prefix + "coverage.tsv",
                bvatools.resolve_readset_coverage_bed(sample.readsets[0]),
                other_options=config.param('bvatools_depth_of_coverage',
                                           'other_options',
                                           required=False))
            job.name = "bvatools_depth_of_coverage." + sample.name
            job.samples = [sample]
            jobs.append(job)

            job = igvtools.compute_tdf(input, input + ".tdf")
            job.name = "igvtools_compute_tdf." + sample.name
            job.samples = [sample]
            jobs.append(job)

        return jobs

Exemplo n.º 8

0

Exibir arquivo

Arquivo: dnaseq_high_coverage.py Projeto: WenchaoLin/mugqic-demo

    def metrics(self):
        """
        Compute metrics and generate coverage tracks per sample. Multiple metrics are computed at this stage:
        Number of raw reads, Number of filtered reads, Number of aligned reads, Number of duplicate reads,
        Median, mean and standard deviation of insert sizes of reads after alignment, percentage of bases
        covered at X reads (%_bases_above_50 means the % of exons bases which have at least 50 reads)
        whole genome or targeted percentage of bases covered at X reads (%_bases_above_50 means the % of exons
        bases which have at least 50 reads). A TDF (.tdf) coverage track is also generated at this step
        for easy visualization of coverage in the IGV browser.
        """

        jobs = []
        for sample in self.samples:
            input_file_prefix = os.path.join("alignment", sample.name, sample.name + ".matefixed.sorted.")
            input = input_file_prefix + "bam"

            job = picard.collect_multiple_metrics(input, input_file_prefix + "all.metrics")
            job.name = "picard_collect_multiple_metrics." + sample.name
            jobs.append(job)

            # Compute genome or target coverage with BVATools
            job = bvatools.depth_of_coverage(
                input,
                input_file_prefix + "coverage.tsv",
                bvatools.resolve_readset_coverage_bed(sample.readsets[0]),
                other_options=config.param('bvatools_depth_of_coverage', 'other_options', required=False)
            )

            job.name = "bvatools_depth_of_coverage." + sample.name
            jobs.append(job)

            job = igvtools.compute_tdf(input, input + ".tdf")
            job.name = "igvtools_compute_tdf." + sample.name
            jobs.append(job)

        return jobs

Exemplo n.º 9

0

Exibir arquivo

Arquivo: run_processing_aligner.py Projeto: marquispa/test

    def get_metrics_jobs(self, readset):
        jobs = []

        input_file_prefix = readset.bam + '.'
        input = input_file_prefix + "bam"

        job = picard.collect_multiple_metrics(
            input,
            input_file_prefix + "metrics",
            reference_sequence=readset.reference_file)
        job.name = "picard_collect_multiple_metrics." + readset.name + ".met" + "." + readset.run + "." + readset.lane
        jobs.append(job)

        if readset.beds:
            coverage_bed = readset.beds[0]
            full_coverage_bed = (self.output_dir + os.sep + coverage_bed)
        else:
            coverage_bed = None
            full_coverage_bed = None

        if coverage_bed:
            if (not os.path.exists(full_coverage_bed)) and \
                    (coverage_bed not in BwaRunProcessingAligner.downloaded_bed_files):
                # Download the bed file
                command = config.param('DEFAULT',
                                       'fetch_bed_file_command').format(
                                           output_directory=self.output_dir,
                                           filename=coverage_bed)
                job = Job([], [full_coverage_bed],
                          command=command,
                          name="bed_download." + coverage_bed)
                BwaRunProcessingAligner.downloaded_bed_files.append(
                    coverage_bed)
                jobs.append(job)

            interval_list = re.sub("\.[^.]+$", ".interval_list", coverage_bed)

            if interval_list not in BwaRunProcessingAligner.created_interval_lists:
                # Create one job to generate the interval list from the bed file
                ref_dict = os.path.splitext(
                    readset.reference_file)[0] + '.dict'
                job = tools.bed2interval_list(ref_dict, full_coverage_bed,
                                              interval_list)
                job.name = "interval_list." + coverage_bed
                BwaRunProcessingAligner.created_interval_lists.append(
                    interval_list)
                jobs.append(job)

            job = picard.calculate_hs_metrics(
                input_file_prefix + "bam",
                input_file_prefix + "metrics.onTarget.txt",
                interval_list,
                reference_sequence=readset.reference_file)
            job.name = "picard_calculate_hs_metrics." + readset.name + ".hs" + "." + readset.run + "." + readset.lane
            jobs.append(job)

        jobs.extend(self.verify_bam_id(readset))

        job = bvatools.depth_of_coverage(
            input,
            input_file_prefix + "metrics.targetCoverage.txt",
            full_coverage_bed,
            other_options=config.param('bvatools_depth_of_coverage',
                                       'other_options',
                                       required=False),
            reference_genome=readset.reference_file)
        job.name = "bvatools_depth_of_coverage." + readset.name + ".doc" + "." + readset.run + "." + readset.lane
        jobs.append(job)

        return jobs