Пример #1
0
def sam_to_index_bam(args):
    # input_sam_path = os.path.abspath(input_sam)
    # if output_bam == "inputdir":
    #     output_bam = os.path.dirname(input_sam_path)
    # else:
    #     output_bam = output_bam

    r1 = os.path.abspath(args.r1_file)
    r2 = os.path.abspath(args.r2_file)

    sample = extract_sample(r1, r2)
    output_dir = obtain_output_dir(args, "Bam")
    sample_name = sample + ".sam"
    input_sam_path = os.path.join(output_dir, sample_name)

    input_name = (".").join(os.path.basename(input_sam_path).split(".")[:-1])

    output_bam_name = input_name + ".bam"
    output_bam_path = os.path.join(output_dir, output_bam_name)

    output_bg_sorted_name = input_name + ".rg.sorted.bam"
    output_bg_sorted_path = os.path.join(output_dir, output_bg_sorted_name)

    check_create_dir(output_dir)
    """
    #sam to bam: samtools view -Sb $input_file -o $output_dir/$sample.bam
    with open(output_bam_path, "w") as outfile:
        #map reads and save it in th eoutput file
        subprocess.run(["samtools", "view", "-Sb", input_sam_path], 
        stdout=outfile, stderr=subprocess.PIPE, check=True, universal_newlines=True)
    """
    cmd = [
        "samtools", "view", "-Sb", input_sam_path, "-o", output_bam_path,
        "--threads",
        str(args.threads)
    ]
    execute_subprocess(cmd)

    check_remove_file(input_sam_path)

    add_SG(args, output_bam_path, output_bg_sorted_path)

    check_remove_file(output_bam_path)
    """
Пример #2
0
def picard_markdup(args):
    #java -jar picard.jar MarkDuplicates \
    #  I=input.bam O=marked_duplicates.bam M=marked_dup_metrics.txt
    picard_jar = get_picard_path()

    input_bam = os.path.abspath(args.input_bam)
    in_param = "I=" + input_bam

    path_file_name = input_bam.split(".")[0]
    file_name = path_file_name.split("/")[-1]
    output_markdup = path_file_name + ".rg.markdup.bam"
    output_markdup_sorted = path_file_name + ".rg.markdup.sorted.bam"
    out_param = "O=" + output_markdup

    stat_output_dir = obtain_output_dir(args, "Stats")
    stat_output_file = file_name + ".markdup.metrics.txt"
    stat_output_full = os.path.join(stat_output_dir, stat_output_file)
    stats_param = "M=" + stat_output_full

    check_create_dir(stat_output_dir)

    cmd_markdup = [
        "java", "-jar", picard_jar, "MarkDuplicates", in_param, out_param,
        stats_param
    ]
    execute_subprocess(cmd_markdup)

    #samtools sort: samtools sort $output_dir/$sample".sorted.bam" -o $output_dir/$sample".sorted.bam"
    cmd_sort = [
        "samtools", "sort", output_markdup, "-o", output_markdup_sorted
    ]
    execute_subprocess(cmd_sort)

    #Handled in Haplotype Caller function
    #samtools index: samtools index $output_dir/$sample".sorted.bam"
    subprocess.run(["samtools", "index", output_markdup_sorted],
                   stdout=subprocess.PIPE,
                   stderr=subprocess.PIPE,
                   check=True)
    check_remove_file(input_bam)
    check_remove_file(output_markdup)
Пример #3
0
def ivar_trim(input_bam,
              primers_file,
              sample,
              min_length=30,
              min_quality=20,
              sliding_window_width=4):
    """
    Usage: ivar trim -i <input.bam> -b <primers.bed> -p <prefix> [-m <min-length>] [-q <min-quality>] [-s <sliding-window-width>]
        Input Options    Description
           -i    (Required) Sorted bam file, with aligned reads, to trim primers and quality
           -b    (Required) BED file with primer sequences and positions
           -m    Minimum length of read to retain after trimming (Default: 30)
           -q    Minimum quality threshold for sliding window to pass (Default: 20)
           -s    Width of sliding window (Default: 4)
           -e    Include reads with no primers. By default, reads with no primers are excluded
        Output Options   Description
           -p    (Required) Prefix for the output BAM file
    """

    input_bam = os.path.abspath(input_bam)
    input_bai = input_bam + ".bai"
    primers_file = os.path.abspath(primers_file)

    prefix = input_bam.split('.')[0] + ".rg.markdup.trimmed"
    output_trimmed_bam = prefix + ".bam"
    output_trimmed_sorted_bam = input_bam.split(
        '.')[0] + ".rg.markdup.trimmed.sorted.bam"

    cmd = [
        "ivar", "trim", "-i", input_bam, "-b", primers_file, "-p", prefix,
        "-m",
        str(min_length), "-q",
        str(min_quality), "-s",
        str(sliding_window_width), "-e"
    ]
    execute_subprocess(cmd)

    check_remove_file(input_bam)

    cmd_sort = [
        "samtools", "sort", output_trimmed_bam, "-o", output_trimmed_sorted_bam
    ]
    execute_subprocess(cmd_sort)

    check_remove_file(output_trimmed_bam)

    cmd_index = ["samtools", "index", output_trimmed_sorted_bam]
    execute_subprocess(cmd_index)

    check_remove_file(input_bai)
Пример #4
0
def sam_to_index_bam(sample, output_dir, r1, threads):
    # input_sam_path = os.path.abspath(input_sam)
    # if output_bam == "inputdir":
    #     output_bam = os.path.dirname(input_sam_path)
    # else:
    #     output_bam = output_bam

    sample_name = sample + ".sam"
    input_sam_path = os.path.join(output_dir, sample_name)

    input_name = (".").join(os.path.basename(input_sam_path).split(".")[:-1])

    output_bam_name = input_name + ".bam"
    output_bam_path = os.path.join(output_dir, output_bam_name)

    output_sorted_name = input_name + ".sorted.bam"
    output_sorted_path = os.path.join(output_dir, output_sorted_name)

    output_bg_sorted_name = input_name + ".rg.sorted.bam"
    output_bg_sorted_path = os.path.join(output_dir, output_bg_sorted_name)

    cmd_view = [
        "samtools",
        "view",
        "-Sb",
        input_sam_path,
        "--threads",
        str(threads),
        "-o",
        output_bam_path,
    ]
    execute_subprocess(cmd_view)

    check_remove_file(input_sam_path)

    cmd_sort = ["samtools", "sort", output_bam_path, "-o", output_sorted_path]
    execute_subprocess(cmd_sort)

    check_remove_file(output_bam_path)

    add_SG(sample, output_sorted_path, output_bg_sorted_path, r1)

    check_remove_file(output_sorted_path)
    """
Пример #5
0
######################################################################
#####################START PIPELINE###################################
######################################################################
output = os.path.abspath(args.output)
group_name = args.input.split("/")[-1].split(".")[0]
out_vcf_dir = os.path.join(args.output, "VCF")
check_create_dir(out_vcf_dir)

output_vcf_file = os.path.abspath(args.input)

base_input = os.path.basename(args.input)

linked_file = os.path.join(out_vcf_dir, base_input)

check_remove_file(linked_file)

os.symlink(output_vcf_file, linked_file)

print("\n\n" + BLUE + BOLD +
      "STARTING COHORT GVCF TO SPLIT SAMPLE VCF IN GROUP: " + group_name +
      END_FORMATTING)

#SELECT VARIANTS 2/2 FOR HARD FILTERING AND RECALIBRATION
#########################################################
out_vcfsnp_name = group_name + ".cohort.snp.vcf"
output_vcfsnp_file = os.path.join(out_vcf_dir, out_vcfsnp_name)

if os.path.isfile(output_vcfsnp_file):
    print(YELLOW + DIM + output_vcfsnp_file +
          " EXIST\nOmmiting Variant Selection (Group) for group " +