def print_sample_statistics(args): print("Counting statistics") sample_name = args.sampleName mate_handler = MateHandler() sample = mate_handler.get_sample_by_name(sample_name) total_first = 0 total_second = 0 total_aligned = 0 total_deduplicated = 0 for mate in sample.mates: di = MateDirInfo(mate) total_first += count_fastq_reads(di.first_fastqc_zip_path) total_second += count_fastq_reads(di.second_fastqc_zip_path) alignment_report = BismarkReport.fromfile(di.alignment_report_path) total_aligned += alignment_report.num_of_aligned drp = DeduplicationReportParser(di.deduplication_report_path) total_deduplicated += drp.sequences_left() print("Total first: ", total_first) print("Total second: ", total_second) print("Total algined: ", total_aligned) print("Total deduplicated: ", total_deduplicated)
def merge_sample_alignments(args): sample_name = args.sampleName mate_handler = MateHandler() sample = mate_handler.get_sample_by_name(sample_name) sp = SamplePipeline(sample) pipeline_handler = PipelineHandler(sp, ncores=22, memory=220000, clean_output_dir=True) pipeline_handler.run_on_condor()
def align_sample(args): sample_name = args.sampleName mate_handler = MateHandler() sample = mate_handler.get_sample_by_name(sample_name) ## conditionally, submit all the samples directly to condor (remove in the future) for mate in sample.mates: pipeline = MatePipeline(mate) pipeline.setup() pipeline_handler = PipelineHandler(pipeline, ncores=10, memory=10900) pipeline_handler.run_on_condor()
print("*************************************", file=self.log_file_handler) print("Splitting:", file=self.log_file_handler) self.split_bam_by_chromosome() print("*************************************", file=self.log_file_handler) print("", file=self.log_file_handler) print("", file=self.log_file_handler) print("*************************************", file=self.log_file_handler) print("Extracting:", file=self.log_file_handler) self.extract_methylation() print("*************************************", file=self.log_file_handler) print("", file=self.log_file_handler) print("", file=self.log_file_handler) # destroy the logger self.log_file_handler.close() if __name__ == "__main__": print("Running sample pipeline for test sample") mate_handler = MateHandler("Config/samples_config.yaml") sample = mate_handler.get_sample_by_name("test_sample") dir_handler = SampleDirInfo(sample) # print(bamtools_clean_command(dir_handler.aligned_bam_path, dir_handler.filtered_bam_path)) # dir_handler = SampleDirInfo(sample) # print(dir_handler.list_aligned_bam_files()) sp = SamplePipeline(sample) sp.pipeline()