def execute(self): # NOTE: don't create a transient temporary file (not thread safe) # because actual upload happens in another thread temp_filename = tempfile.gettempdir() + "/bam_qc_metrics.tsv" temp = open(temp_filename, "wt") # write header temp.write("\t".join([ "sample_id", "bam_file", "pct_pf_reads_aligned", "strand_balance", "status", ]) + "\n") # write content for pb in self.processed_bams: metrics_ok = bam_qc_metrics_ok(pb.qc_metrics, self.config_) temp.write("\t".join([ pb.bam_file.metadata["sample_id"], pb.bam_file.name, str(pb.qc_metrics.pct_pf_reads_aligned), str(pb.qc_metrics.strand_balance), "PASS" if metrics_ok else "FAIL" ]) + "\n") temp.close() # upload file to platform (overwrites existing file) self.uploaded_file = UploadFile(local_path=temp.name, to_project=Context().project, overwrite=True).file
def execute(self): ctx = Context() self.run_task( app_name="bwa", inputs={ "FASTQ": self.fastqs, "Input_reference": ctx.refs["bwa_bundle"] }, task_name="BWAmem-" + self.fastqs[0].metadata["sample_id"], ) self.merged_bam = self.task.outputs["merged_bam"]
def run_task(self, app_name, inputs, task_name=None): """Executes app on SB platform and returns finished task. 'app_name' must have defined app in automation config file.""" ctx = Context() if not task_name: task_name = self.name_ self.task = FindOrCreateAndRunTask( new_name=task_name + " - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), inputs=inputs, app=ctx.apps[app_name], in_project=ctx.project, ).finished_task
def parse_manifest_into_cohort(filename): logging.info(f"Reading manifest file: '{filename}'") if filename.startswith("sb://"): project_id, file_name = os.path.split(filename[5:]) sbfile = FindOrCopyFilesByName( f"CopyManifest", names=[file_name], from_project=SBApi().projects.get(project_id), to_project=Context().project, ).copied_files[0] filename = tempfile.gettempdir() + "/manifest.txt" sbfile.download(path=filename) cohort = Cohort(manifest_file=filename) num_entries = 0 with open(str(filename), "r") as f: for line_no, line in enumerate(f.readlines()): if line_no == 0: # skip header continue if line.strip().startswith("#"): continue patient_id, sample_id, read_group, fq1, fq2 = line.strip( ).split("\t") patient = cohort.get_patient_by_id(patient_id) if not patient: patient = Patient(patient_id) cohort.add_patient(patient) sample = patient.get_sample_by_id(sample_id) if not sample: sample = Sample(sample_id) patient.add_sample(sample) lane = Lane(read_group=read_group, fq1=fq1, fq2=fq2) sample.add_lane(lane) num_entries += 1 logging.info(" %d manifest entries read." % num_entries) return cohort
def execute(self): ctx = Context() self.run_task( app_name="alignmentqc", inputs={ "input_bam": self.input_bam, "reference": ctx.refs["reference_fasta"], }, task_name="AlignmentQC-" + self.input_bam.metadata["sample_id"], ) self.summary_metrics_file = self.task.outputs["summary_metrics"] self.qc_metrics = self.parse_qc_from_metrics_file() logging.info( f"pct_pf_reads_aligned: {self.qc_metrics.pct_pf_reads_aligned}") logging.info(f"strand balance: {self.qc_metrics.strand_balance}")
def parse_manifest_into_cohort(manifest_file): logging.info(f"Reading manifest file: '{manifest_file.name}'") # copy manifest into analysis project and parse content FindOrCopyFiles( f"CopyManifest", files=[manifest_file], to_project=Context().project, ).copied_files[0] filename = tempfile.gettempdir() + "/manifest.txt" manifest_file.download(path=filename, overwrite=True) cohort = Cohort(manifest_file=manifest_file.name) num_entries = 0 with open(str(filename), "r") as f: for line_no, line in enumerate(f.readlines()): if line_no == 0: # skip header continue if line.strip().startswith("#"): continue patient_id, sample_id, read_group, fq1, fq2 = line.strip( ).split("\t") patient = cohort.get_patient_by_id(patient_id) if not patient: patient = Patient(patient_id) cohort.add_patient(patient) sample = patient.get_sample_by_id(sample_id) if not sample: sample = Sample(sample_id) patient.add_sample(sample) lane = Lane(read_group=read_group, fq1=fq1, fq2=fq2) sample.add_lane(lane) num_entries += 1 logging.info(" %d manifest entries read." % num_entries) return cohort
def execute(self): "Main execution method. Execution starts here." # setup execution project, stage apps, ref files Context().initialize(project_name=self.project_name) # parse manifest into cohort, import fastq files, set metadata cohort = load_manifest(self.manifest_filename) # process samples in loop # note: processing happens in parallel due to use of promises processed_bams = [ ProcessSample(fastqs=s.fastqs, name_=s.id).processed_bam for s in cohort.samples ] # collect BAM QC metrics and upload summary file self.qc_summary = CollectAndUploadQCSummary( processed_bams=processed_bams).uploaded_file
def stage_input_files_in_bulk(cohort): "Copy all input files to execution project in bulk to save API calls" ctx = Context() fastq_project = SBApi().projects.get(id=ctx.config.fastq_project) files_to_stage = [ f for s in cohort.samples for l in s.lanes for f in [l.fq1, l.fq2] ] staged_files = FindOrCopyFilesByName( names=files_to_stage, from_project=fastq_project, to_project=ctx.project).copied_files staged_files = {f.name: f for f in staged_files} for sample in cohort.samples: for lane in sample.lanes: lane.fq1 = staged_files[lane.fq1] lane.fq2 = staged_files[lane.fq2]
def execute(self): "Main execution method. Execution starts here." # setup execution project, stage apps, ref files Context().initialize(project_name=self.project_name) # parse manifest into cohort, import fastq files, set metadata cohort = load_manifest(self.manifest_filename) for sample in cohort.samples: # process sample in seprate step # step must be named explicitly b/c of loop ps = ProcessSample(f"Process-{sample.id}", fastqs=sample.fastqs) # collect results for downstream aggregation steps sample.aligned_bam = ps.aligned_bam sample.bam_qc_metrics = ps.bam_qc_metrics # upload QC metrics summary file to SB platform and # provide uploaded file on output self.qc_summary = CollectAndUploadQCSummary( qc_metrics=[s.bam_qc_metrics for s in cohort.samples] ).uploaded_file