def sam_to_index_bam(args): # input_sam_path = os.path.abspath(input_sam) # if output_bam == "inputdir": # output_bam = os.path.dirname(input_sam_path) # else: # output_bam = output_bam r1 = os.path.abspath(args.r1_file) r2 = os.path.abspath(args.r2_file) sample = extract_sample(r1, r2) output_dir = obtain_output_dir(args, "Bam") sample_name = sample + ".sam" input_sam_path = os.path.join(output_dir, sample_name) input_name = (".").join(os.path.basename(input_sam_path).split(".")[:-1]) output_bam_name = input_name + ".bam" output_bam_path = os.path.join(output_dir, output_bam_name) output_bg_sorted_name = input_name + ".rg.sorted.bam" output_bg_sorted_path = os.path.join(output_dir, output_bg_sorted_name) check_create_dir(output_dir) """ #sam to bam: samtools view -Sb $input_file -o $output_dir/$sample.bam with open(output_bam_path, "w") as outfile: #map reads and save it in th eoutput file subprocess.run(["samtools", "view", "-Sb", input_sam_path], stdout=outfile, stderr=subprocess.PIPE, check=True, universal_newlines=True) """ cmd = [ "samtools", "view", "-Sb", input_sam_path, "-o", output_bam_path, "--threads", str(args.threads) ] execute_subprocess(cmd) check_remove_file(input_sam_path) add_SG(args, output_bam_path, output_bg_sorted_path) check_remove_file(output_bam_path) """
def picard_markdup(args): #java -jar picard.jar MarkDuplicates \ # I=input.bam O=marked_duplicates.bam M=marked_dup_metrics.txt picard_jar = get_picard_path() input_bam = os.path.abspath(args.input_bam) in_param = "I=" + input_bam path_file_name = input_bam.split(".")[0] file_name = path_file_name.split("/")[-1] output_markdup = path_file_name + ".rg.markdup.bam" output_markdup_sorted = path_file_name + ".rg.markdup.sorted.bam" out_param = "O=" + output_markdup stat_output_dir = obtain_output_dir(args, "Stats") stat_output_file = file_name + ".markdup.metrics.txt" stat_output_full = os.path.join(stat_output_dir, stat_output_file) stats_param = "M=" + stat_output_full check_create_dir(stat_output_dir) cmd_markdup = [ "java", "-jar", picard_jar, "MarkDuplicates", in_param, out_param, stats_param ] execute_subprocess(cmd_markdup) #samtools sort: samtools sort $output_dir/$sample".sorted.bam" -o $output_dir/$sample".sorted.bam" cmd_sort = [ "samtools", "sort", output_markdup, "-o", output_markdup_sorted ] execute_subprocess(cmd_sort) #Handled in Haplotype Caller function #samtools index: samtools index $output_dir/$sample".sorted.bam" subprocess.run(["samtools", "index", output_markdup_sorted], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) check_remove_file(input_bam) check_remove_file(output_markdup)
def ivar_trim(input_bam, primers_file, sample, min_length=30, min_quality=20, sliding_window_width=4): """ Usage: ivar trim -i <input.bam> -b <primers.bed> -p <prefix> [-m <min-length>] [-q <min-quality>] [-s <sliding-window-width>] Input Options Description -i (Required) Sorted bam file, with aligned reads, to trim primers and quality -b (Required) BED file with primer sequences and positions -m Minimum length of read to retain after trimming (Default: 30) -q Minimum quality threshold for sliding window to pass (Default: 20) -s Width of sliding window (Default: 4) -e Include reads with no primers. By default, reads with no primers are excluded Output Options Description -p (Required) Prefix for the output BAM file """ input_bam = os.path.abspath(input_bam) input_bai = input_bam + ".bai" primers_file = os.path.abspath(primers_file) prefix = input_bam.split('.')[0] + ".rg.markdup.trimmed" output_trimmed_bam = prefix + ".bam" output_trimmed_sorted_bam = input_bam.split( '.')[0] + ".rg.markdup.trimmed.sorted.bam" cmd = [ "ivar", "trim", "-i", input_bam, "-b", primers_file, "-p", prefix, "-m", str(min_length), "-q", str(min_quality), "-s", str(sliding_window_width), "-e" ] execute_subprocess(cmd) check_remove_file(input_bam) cmd_sort = [ "samtools", "sort", output_trimmed_bam, "-o", output_trimmed_sorted_bam ] execute_subprocess(cmd_sort) check_remove_file(output_trimmed_bam) cmd_index = ["samtools", "index", output_trimmed_sorted_bam] execute_subprocess(cmd_index) check_remove_file(input_bai)
def sam_to_index_bam(sample, output_dir, r1, threads): # input_sam_path = os.path.abspath(input_sam) # if output_bam == "inputdir": # output_bam = os.path.dirname(input_sam_path) # else: # output_bam = output_bam sample_name = sample + ".sam" input_sam_path = os.path.join(output_dir, sample_name) input_name = (".").join(os.path.basename(input_sam_path).split(".")[:-1]) output_bam_name = input_name + ".bam" output_bam_path = os.path.join(output_dir, output_bam_name) output_sorted_name = input_name + ".sorted.bam" output_sorted_path = os.path.join(output_dir, output_sorted_name) output_bg_sorted_name = input_name + ".rg.sorted.bam" output_bg_sorted_path = os.path.join(output_dir, output_bg_sorted_name) cmd_view = [ "samtools", "view", "-Sb", input_sam_path, "--threads", str(threads), "-o", output_bam_path, ] execute_subprocess(cmd_view) check_remove_file(input_sam_path) cmd_sort = ["samtools", "sort", output_bam_path, "-o", output_sorted_path] execute_subprocess(cmd_sort) check_remove_file(output_bam_path) add_SG(sample, output_sorted_path, output_bg_sorted_path, r1) check_remove_file(output_sorted_path) """
###################################################################### #####################START PIPELINE################################### ###################################################################### output = os.path.abspath(args.output) group_name = args.input.split("/")[-1].split(".")[0] out_vcf_dir = os.path.join(args.output, "VCF") check_create_dir(out_vcf_dir) output_vcf_file = os.path.abspath(args.input) base_input = os.path.basename(args.input) linked_file = os.path.join(out_vcf_dir, base_input) check_remove_file(linked_file) os.symlink(output_vcf_file, linked_file) print("\n\n" + BLUE + BOLD + "STARTING COHORT GVCF TO SPLIT SAMPLE VCF IN GROUP: " + group_name + END_FORMATTING) #SELECT VARIANTS 2/2 FOR HARD FILTERING AND RECALIBRATION ######################################################### out_vcfsnp_name = group_name + ".cohort.snp.vcf" output_vcfsnp_file = os.path.join(out_vcf_dir, out_vcfsnp_name) if os.path.isfile(output_vcfsnp_file): print(YELLOW + DIM + output_vcfsnp_file + " EXIST\nOmmiting Variant Selection (Group) for group " +