def workflow_align(fastq_dir, ref, gff, config_dict, today, local): if local: # 1. Build index print("Building index...") bt2, _ = run_build_index_job(ref, config_dict, local) print("Index complete, index name: {}".format(bt2)) # 2. Find fastq files print("Looking for fastq files") fastq_files = helpers.find_files_in_a_tree(fastq_dir, file_type='fastq') print("Found {} fastq files".format(len(fastq_files))) # 3. Iterate over them and align for file in fastq_files: print("Aligning {}".format(file)) sam_file, _ = run_alignment_job(file, bt2, config_dict, local) print("Sorting {}".format(sam_file)) sorted_bam, _ = run_sam_to_bam_conversion_and_sorting( sam_file, config_dict, local) print("Counting {}".format(sorted_bam)) counts_file, _ = run_count_job_bedtools(gff, sorted_bam, config_dict, local) print("Counting complete, count file: {}".format(counts_file)) print("Editing count file") run_edit_count_job_bedtools(counts_file, config_dict, today, local) return [] else: job_ids = {} # 1. Build index bt2, index_jobid = run_build_index_job(ref, config_dict, local) # 2. Find fastq files fastq_files = helpers.find_files_in_a_tree(fastq_dir, file_type='fastq') # 3. Iterate over them and align for file in fastq_files: sam_file, samfile_jobid = run_alignment_job( file, bt2, config_dict, local, index_jobid) sorted_bam, sam2bam_jobid = run_sam_to_bam_conversion_and_sorting( sam_file, config_dict, local, samfile_jobid) counts_file, counts_jobid = run_count_job_bedtools( gff, sorted_bam, config_dict, local, sam2bam_jobid) count_file_edited, ce_jobid = run_edit_count_job_bedtools( counts_file, config_dict, local, counts_jobid) job_ids[file] = [ samfile_jobid, sam2bam_jobid, counts_jobid, ce_jobid ] return job_ids
def workflow_trim_and_qc(fastq_dir, config_dict, local=False): files = helpers.find_files_in_a_tree(fastq_dir, file_type='.fastq') for file in files: # Run trim job trimmed_fastq_file, trim_jobid = run_trim_job(file, config_dict, local) # Run fastqc job run_fastqc_job(trimmed_fastq_file, config_dict, local, trim_jobid) return "Jobs for trim and qc submitted" # todo test workflow on flux
def flux_fastq_dir_ref(tmpdir, day): data_dir = "/scratch/hmobley_fluxod/annasint/code/data/reads" files = helpers.find_files_in_a_tree(data_dir, "fastq") for f in files: fname = os.path.basename(f) new_name = str(tmpdir.join(fname)) shutil.copy(f, new_name) ref = "/scratch/hmobley_fluxod/annasint/code/data/ref/MG1655.fna" ref_name = os.path.basename(ref) test_ref = str(tmpdir.join(ref_name)) shutil.copy(ref, test_ref) return str(tmpdir), test_ref, day
def local_fastq_dir_ref(tmpdir, day): data_dir = "/Users/annasintsova/git_repos/code/data/reads" files = helpers.find_files_in_a_tree(data_dir, "fastq") for f in files: fname = os.path.basename(f) new_name = str(tmpdir.join(fname)) shutil.copy(f, new_name) ref = "/Users/annasintsova/git_repos/code/data/ref/MG1655.fna" ref_name = os.path.basename(ref) test_ref = str(tmpdir.join(ref_name)) shutil.copy(ref, test_ref) return str(tmpdir), test_ref, day
def workflow_count(gff, bam_folder, config_dict, today, local): bams = helpers.find_files_in_a_tree(bam_folder, file_type="bam") if local: for bam in bams: count_file, _ = run_count_job_bedtools(gff, bam, config_dict, today, local) count_file_edited, _ = run_edit_count_job_bedtools( count_file, config_dict, today, local) else: for bam in bams: count_file, jobid = run_count_job_bedtools(gff, bam, config_dict, today, local) count_file_edited, _ = run_edit_count_job_bedtools( count_file, config_dict, today, local, job_dependency=jobid)
def workflow_bam_stats(bam_dir, today, local, job_dependency=""): if local: bam_files = helpers.find_files_in_a_tree(bam_dir, file_type="bam") stats = [] labels = ["Name", "Total", "Mapped", "% Mapped"] for bm in bam_files: sample_name = os.path.basename(bm).split(".")[0] total, mapped, pcnt_mapped = run_bam_stats(bm) stats.append((sample_name, total, mapped, pcnt_mapped)) df = pd.DataFrame.from_records(stats, index="Name", columns=labels) filename = os.path.join(bam_dir, today + "_alignment_stats.csv") df.to_csv(filename) else: script = "python workflow.py -a bam-stats -i {} -local".format(bam_dir) jobid = submit_flux_job(bam_dir, "alignment_stats", "alignment_stats", script, job_dependency) return jobid
def workflow_qc(fastq_dir, config_dict, local=False): files = helpers.find_files_in_a_tree(fastq_dir, file_type='fastq') for file in files: run_fastqc_job(file, config_dict, local) return "Fastqc jobs submitted!"