示例#1
0
def workflow_align(fastq_dir, ref, gff, config_dict, today, local):

    if local:
        # 1. Build index
        print("Building index...")
        bt2, _ = run_build_index_job(ref, config_dict, local)
        print("Index complete, index name: {}".format(bt2))
        # 2. Find fastq files
        print("Looking for fastq files")
        fastq_files = helpers.find_files_in_a_tree(fastq_dir,
                                                   file_type='fastq')
        print("Found {} fastq files".format(len(fastq_files)))
        # 3. Iterate over them and align
        for file in fastq_files:
            print("Aligning {}".format(file))
            sam_file, _ = run_alignment_job(file, bt2, config_dict, local)
            print("Sorting {}".format(sam_file))
            sorted_bam, _ = run_sam_to_bam_conversion_and_sorting(
                sam_file, config_dict, local)
            print("Counting {}".format(sorted_bam))
            counts_file, _ = run_count_job_bedtools(gff, sorted_bam,
                                                    config_dict, local)
            print("Counting complete, count file: {}".format(counts_file))
            print("Editing count file")
            run_edit_count_job_bedtools(counts_file, config_dict, today, local)
        return []
    else:
        job_ids = {}
        # 1. Build index
        bt2, index_jobid = run_build_index_job(ref, config_dict, local)
        # 2. Find fastq files
        fastq_files = helpers.find_files_in_a_tree(fastq_dir,
                                                   file_type='fastq')
        # 3. Iterate over them and align
        for file in fastq_files:
            sam_file, samfile_jobid = run_alignment_job(
                file, bt2, config_dict, local, index_jobid)
            sorted_bam, sam2bam_jobid = run_sam_to_bam_conversion_and_sorting(
                sam_file, config_dict, local, samfile_jobid)
            counts_file, counts_jobid = run_count_job_bedtools(
                gff, sorted_bam, config_dict, local, sam2bam_jobid)
            count_file_edited, ce_jobid = run_edit_count_job_bedtools(
                counts_file, config_dict, local, counts_jobid)
            job_ids[file] = [
                samfile_jobid, sam2bam_jobid, counts_jobid, ce_jobid
            ]
        return job_ids
示例#2
0
def workflow_trim_and_qc(fastq_dir, config_dict, local=False):

    files = helpers.find_files_in_a_tree(fastq_dir, file_type='.fastq')
    for file in files:
        # Run trim job
        trimmed_fastq_file, trim_jobid = run_trim_job(file, config_dict, local)
        # Run fastqc job
        run_fastqc_job(trimmed_fastq_file, config_dict, local, trim_jobid)
    return "Jobs for trim and qc submitted"  # todo test workflow on flux
示例#3
0
def flux_fastq_dir_ref(tmpdir, day):
    data_dir = "/scratch/hmobley_fluxod/annasint/code/data/reads"
    files = helpers.find_files_in_a_tree(data_dir, "fastq")
    for f in files:
        fname = os.path.basename(f)
        new_name = str(tmpdir.join(fname))
        shutil.copy(f, new_name)
    ref = "/scratch/hmobley_fluxod/annasint/code/data/ref/MG1655.fna"
    ref_name = os.path.basename(ref)
    test_ref = str(tmpdir.join(ref_name))
    shutil.copy(ref, test_ref)
    return str(tmpdir), test_ref, day
示例#4
0
def local_fastq_dir_ref(tmpdir, day):

    data_dir = "/Users/annasintsova/git_repos/code/data/reads"
    files = helpers.find_files_in_a_tree(data_dir, "fastq")
    for f in files:
        fname = os.path.basename(f)
        new_name = str(tmpdir.join(fname))
        shutil.copy(f, new_name)
    ref = "/Users/annasintsova/git_repos/code/data/ref/MG1655.fna"
    ref_name = os.path.basename(ref)
    test_ref = str(tmpdir.join(ref_name))
    shutil.copy(ref, test_ref)
    return str(tmpdir), test_ref, day
示例#5
0
def workflow_count(gff, bam_folder, config_dict, today, local):
    bams = helpers.find_files_in_a_tree(bam_folder, file_type="bam")
    if local:
        for bam in bams:
            count_file, _ = run_count_job_bedtools(gff, bam, config_dict,
                                                   today, local)
            count_file_edited, _ = run_edit_count_job_bedtools(
                count_file, config_dict, today, local)
    else:
        for bam in bams:
            count_file, jobid = run_count_job_bedtools(gff, bam, config_dict,
                                                       today, local)
            count_file_edited, _ = run_edit_count_job_bedtools(
                count_file, config_dict, today, local, job_dependency=jobid)
示例#6
0
def workflow_bam_stats(bam_dir, today, local, job_dependency=""):
    if local:
        bam_files = helpers.find_files_in_a_tree(bam_dir, file_type="bam")
        stats = []
        labels = ["Name", "Total", "Mapped", "% Mapped"]
        for bm in bam_files:
            sample_name = os.path.basename(bm).split(".")[0]
            total, mapped, pcnt_mapped = run_bam_stats(bm)
            stats.append((sample_name, total, mapped, pcnt_mapped))
        df = pd.DataFrame.from_records(stats, index="Name", columns=labels)
        filename = os.path.join(bam_dir, today + "_alignment_stats.csv")
        df.to_csv(filename)
    else:
        script = "python workflow.py -a bam-stats -i {} -local".format(bam_dir)
        jobid = submit_flux_job(bam_dir, "alignment_stats", "alignment_stats",
                                script, job_dependency)
        return jobid
示例#7
0
def workflow_qc(fastq_dir, config_dict, local=False):
    files = helpers.find_files_in_a_tree(fastq_dir, file_type='fastq')
    for file in files:
        run_fastqc_job(file, config_dict, local)
    return "Fastqc jobs submitted!"