def run_main(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ setup_logging(config) fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml) fastq_dir, galaxy_dir, config_dir = _get_full_paths( get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) config_file = os.path.join(config_dir, os.path.basename(config_file)) dirs = { "fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir } config = _set_resources(parallel, config) run_parallel = parallel_runner(parallel, dirs, config, config_file) # process each flowcell lane run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"], fc_name) lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items) lane_items = run_parallel("process_lane", lanes) align_items = run_parallel("process_alignment", lane_items) # process samples, potentially multiplexed across multiple lanes samples = organize_samples(align_items, dirs, config_file) samples = run_parallel("merge_sample", samples) samples = run_parallel("prep_recal", samples) samples = recalibrate.parallel_write_recal_bam(samples, run_parallel) samples = parallel_realign_sample(samples, run_parallel) samples = parallel_variantcall(samples, run_parallel) samples = run_parallel("postprocess_variants", samples) samples = combine_multiple_callers(samples) samples = run_parallel("detect_sv", samples) samples = run_parallel("combine_calls", samples) run_parallel("process_sample", samples) run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]}) write_project_summary(samples) write_metrics(run_info, fc_name, fc_date, dirs)
def run(self, config, config_file, run_parallel, dirs, lane_items): lane_items = run_parallel("trim_lane", lane_items) align_items = run_parallel("process_alignment", lane_items) # process samples, potentially multiplexed across multiple lanes samples = organize_samples(align_items, dirs, config_file) samples = run_parallel("merge_sample", samples) samples = run_parallel("prep_recal", samples) samples = recalibrate.parallel_write_recal_bam(samples, run_parallel) samples = parallel_realign_sample(samples, run_parallel) samples = parallel_variantcall(samples, run_parallel) samples = run_parallel("postprocess_variants", samples) samples = combine_multiple_callers(samples) samples = ensemble.combine_calls_parallel(samples, run_parallel) samples = run_parallel("detect_sv", samples) samples = qcsummary.generate_parallel(samples, run_parallel) run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]}) return samples
def run(self, config, config_file, run_parallel, parallel, dirs, lane_items): raise NotImplementedError("`variant` processing is deprecated: please use `variant2`" "The next version will alias variant to the new variant2 pipeline") lane_items = run_parallel("trim_lane", lane_items) align_items = run_parallel("process_alignment", lane_items) # process samples, potentially multiplexed across multiple lanes samples = organize_samples(align_items, dirs, config_file) samples = run_parallel("merge_sample", samples) samples = run_parallel("prep_recal", samples) samples = recalibrate.parallel_write_recal_bam(samples, run_parallel) samples = parallel_realign_sample(samples, run_parallel) samples = parallel_variantcall(samples, run_parallel) samples = run_parallel("postprocess_variants", samples) samples = combine_multiple_callers(samples) samples = ensemble.combine_calls_parallel(samples, run_parallel) samples = run_parallel("detect_sv", samples) samples = qcsummary.generate_parallel(samples, run_parallel) run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]}) return samples
def run_main(config, config_file, work_dir, parallel, fc_dir=None, run_info_yaml=None): """Run toplevel analysis, processing a set of input files. config_file -- Main YAML configuration file with system parameters fc_dir -- Directory of fastq files to process run_info_yaml -- YAML configuration file specifying inputs to process """ setup_logging(config) fc_name, fc_date, run_info = get_run_info(fc_dir, config, run_info_yaml) fastq_dir, galaxy_dir, config_dir = _get_full_paths(get_fastq_dir(fc_dir) if fc_dir else None, config, config_file) config_file = os.path.join(config_dir, os.path.basename(config_file)) dirs = {"fastq": fastq_dir, "galaxy": galaxy_dir, "work": work_dir, "flowcell": fc_dir, "config": config_dir} config = _set_resources(parallel, config) run_parallel = parallel_runner(parallel, dirs, config, config_file) # process each flowcell lane run_items = add_multiplex_across_lanes(run_info["details"], dirs["fastq"], fc_name) lanes = ((info, fc_name, fc_date, dirs, config) for info in run_items) lane_items = run_parallel("process_lane", lanes) align_items = run_parallel("process_alignment", lane_items) # process samples, potentially multiplexed across multiple lanes samples = organize_samples(align_items, dirs, config_file) samples = run_parallel("merge_sample", samples) samples = run_parallel("prep_recal", samples) samples = recalibrate.parallel_write_recal_bam(samples, run_parallel) samples = parallel_realign_sample(samples, run_parallel) samples = parallel_variantcall(samples, run_parallel) samples = run_parallel("postprocess_variants", samples) samples = combine_multiple_callers(samples) samples = run_parallel("detect_sv", samples) samples = run_parallel("combine_calls", samples) run_parallel("process_sample", samples) run_parallel("generate_bigwig", samples, {"programs": ["ucsc_bigwig"]}) write_project_summary(samples) write_metrics(run_info, fc_name, fc_date, dirs)