Пример #1
0
    def execute(self):

        # NOTE: don't create a transient temporary file (not thread safe)
        # because actual upload happens in another thread
        temp_filename = tempfile.gettempdir() + "/bam_qc_metrics.tsv"
        temp = open(temp_filename, "wt")

        # write header
        temp.write("\t".join([
            "sample_id",
            "bam_file",
            "pct_pf_reads_aligned",
            "strand_balance",
            "status",
        ]) + "\n")

        # write content
        for pb in self.processed_bams:
            metrics_ok = bam_qc_metrics_ok(pb.qc_metrics, self.config_)
            temp.write("\t".join([
                pb.bam_file.metadata["sample_id"], pb.bam_file.name,
                str(pb.qc_metrics.pct_pf_reads_aligned),
                str(pb.qc_metrics.strand_balance),
                "PASS" if metrics_ok else "FAIL"
            ]) + "\n")
        temp.close()

        # upload file to platform (overwrites existing file)
        self.uploaded_file = UploadFile(local_path=temp.name,
                                        to_project=Context().project,
                                        overwrite=True).file
Пример #2
0
    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="bwa",
            inputs={
                "FASTQ": self.fastqs,
                "Input_reference": ctx.refs["bwa_bundle"]
            },
            task_name="BWAmem-" + self.fastqs[0].metadata["sample_id"],
        )

        self.merged_bam = self.task.outputs["merged_bam"]
Пример #3
0
    def run_task(self, app_name, inputs, task_name=None):
        """Executes app on SB platform and returns finished task.
        'app_name' must have defined app in automation config file."""

        ctx = Context()
        if not task_name:
            task_name = self.name_

        self.task = FindOrCreateAndRunTask(
            new_name=task_name + " - " +
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            inputs=inputs,
            app=ctx.apps[app_name],
            in_project=ctx.project,
        ).finished_task
Пример #4
0
    def parse_manifest_into_cohort(filename):

        logging.info(f"Reading manifest file: '{filename}'")

        if filename.startswith("sb://"):
            project_id, file_name = os.path.split(filename[5:])
            sbfile = FindOrCopyFilesByName(
                f"CopyManifest",
                names=[file_name],
                from_project=SBApi().projects.get(project_id),
                to_project=Context().project,
            ).copied_files[0]
            filename = tempfile.gettempdir() + "/manifest.txt"
            sbfile.download(path=filename)

        cohort = Cohort(manifest_file=filename)

        num_entries = 0
        with open(str(filename), "r") as f:
            for line_no, line in enumerate(f.readlines()):

                if line_no == 0:  # skip header
                    continue

                if line.strip().startswith("#"):
                    continue

                patient_id, sample_id, read_group, fq1, fq2 = line.strip(
                ).split("\t")

                patient = cohort.get_patient_by_id(patient_id)
                if not patient:
                    patient = Patient(patient_id)
                    cohort.add_patient(patient)

                sample = patient.get_sample_by_id(sample_id)
                if not sample:
                    sample = Sample(sample_id)
                    patient.add_sample(sample)

                lane = Lane(read_group=read_group, fq1=fq1, fq2=fq2)
                sample.add_lane(lane)

                num_entries += 1

        logging.info("  %d manifest entries read." % num_entries)

        return cohort
Пример #5
0
    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="alignmentqc",
            inputs={
                "input_bam": self.input_bam,
                "reference": ctx.refs["reference_fasta"],
            },
            task_name="AlignmentQC-" + self.input_bam.metadata["sample_id"],
        )

        self.summary_metrics_file = self.task.outputs["summary_metrics"]
        self.qc_metrics = self.parse_qc_from_metrics_file()

        logging.info(
            f"pct_pf_reads_aligned: {self.qc_metrics.pct_pf_reads_aligned}")
        logging.info(f"strand balance: {self.qc_metrics.strand_balance}")
Пример #6
0
    def parse_manifest_into_cohort(manifest_file):

        logging.info(f"Reading manifest file: '{manifest_file.name}'")

        # copy manifest into analysis project and parse content
        FindOrCopyFiles(
            f"CopyManifest",
            files=[manifest_file],
            to_project=Context().project,
        ).copied_files[0]
        filename = tempfile.gettempdir() + "/manifest.txt"
        manifest_file.download(path=filename, overwrite=True)

        cohort = Cohort(manifest_file=manifest_file.name)

        num_entries = 0
        with open(str(filename), "r") as f:
            for line_no, line in enumerate(f.readlines()):

                if line_no == 0:  # skip header
                    continue

                if line.strip().startswith("#"):
                    continue

                patient_id, sample_id, read_group, fq1, fq2 = line.strip(
                ).split("\t")

                patient = cohort.get_patient_by_id(patient_id)
                if not patient:
                    patient = Patient(patient_id)
                    cohort.add_patient(patient)

                sample = patient.get_sample_by_id(sample_id)
                if not sample:
                    sample = Sample(sample_id)
                    patient.add_sample(sample)

                lane = Lane(read_group=read_group, fq1=fq1, fq2=fq2)
                sample.add_lane(lane)

                num_entries += 1

        logging.info("  %d manifest entries read." % num_entries)

        return cohort
Пример #7
0
    def execute(self):
        "Main execution method. Execution starts here."

        # setup execution project, stage apps, ref files
        Context().initialize(project_name=self.project_name)

        # parse manifest into cohort, import fastq files, set metadata
        cohort = load_manifest(self.manifest_filename)

        # process samples in loop
        # note: processing happens in parallel due to use of promises
        processed_bams = [
            ProcessSample(fastqs=s.fastqs, name_=s.id).processed_bam
            for s in cohort.samples
        ]

        # collect BAM QC metrics and upload summary file
        self.qc_summary = CollectAndUploadQCSummary(
            processed_bams=processed_bams).uploaded_file
Пример #8
0
    def stage_input_files_in_bulk(cohort):
        "Copy all input files to execution project in bulk to save API calls"

        ctx = Context()

        fastq_project = SBApi().projects.get(id=ctx.config.fastq_project)

        files_to_stage = [
            f for s in cohort.samples for l in s.lanes for f in [l.fq1, l.fq2]
        ]

        staged_files = FindOrCopyFilesByName(
            names=files_to_stage,
            from_project=fastq_project,
            to_project=ctx.project).copied_files

        staged_files = {f.name: f for f in staged_files}

        for sample in cohort.samples:
            for lane in sample.lanes:
                lane.fq1 = staged_files[lane.fq1]
                lane.fq2 = staged_files[lane.fq2]
Пример #9
0
    def execute(self):
        "Main execution method. Execution starts here."

        # setup execution project, stage apps, ref files
        Context().initialize(project_name=self.project_name)

        # parse manifest into cohort, import fastq files, set metadata
        cohort = load_manifest(self.manifest_filename)

        for sample in cohort.samples:

            # process sample in seprate step
            # step must be named explicitly b/c of loop
            ps = ProcessSample(f"Process-{sample.id}", fastqs=sample.fastqs)

            # collect results for downstream aggregation steps
            sample.aligned_bam = ps.aligned_bam
            sample.bam_qc_metrics = ps.bam_qc_metrics

        # upload QC metrics summary file to SB platform and 
        # provide uploaded file on output
        self.qc_summary = CollectAndUploadQCSummary(
            qc_metrics=[s.bam_qc_metrics for s in cohort.samples]
        ).uploaded_file