def create_data_dir(args, fasta_path, bam_path): print "++Creating data directory for bam2aln processing." data_dir = os.path.join(args.output_dir, "data") if not os.path.exists(data_dir): os.makedirs(data_dir) reference_fasta_path = os.path.join(data_dir, "reference.fasta") if not os.path.exists(reference_fasta_path): shutil.copy2(fasta_path, reference_fasta_path) samtools.faidx(reference_fasta_path) reference_bam_path = os.path.join(data_dir, "reference.bam") if not os.path.exists(reference_bam_path): shutil.copy2(bam_path, reference_bam_path) samtools.index(reference_bam_path)
def ssaha2_alignment(args, step_1="01_reference_conversion", step_2="02_reference_alignment"): fasta_path = prepare_reference(args, step_1) step_2_dir = os.path.join(args.output_dir, step_2) step_2_done_file = os.path.join(step_2_dir, "create_alignment.done") bam_path = "" if not os.path.exists(step_2_done_file): if not os.path.exists(step_2_dir): os.makedirs(step_2_dir) samtools.faidx(fasta_path) built_reference_path = fasta_path.replace(".fasta", "") cmd = "ssaha2Build -rtype solexa -skip 1 -save {} {}".format(built_reference_path, fasta_path) print cmd os.system(cmd) cmd_fmt = "ssaha2 -disk 2 -save {} -kmer 13 -skip 1 -seeds 1 -score 12 -cmatch 9 -ckmer 1 -output sam_soft -outfile {} {}" sam_paths = map( lambda read_path: os.path.join(step_2_dir, os.path.basename(read_path.replace(".fastq", ".sam"))), args.read_paths, ) for sam_path, read_path in zip(sam_paths, args.read_paths): cmd = cmd_fmt.format(built_reference_path, sam_path, read_path) print cmd if not os.path.exists(sam_path): os.system(cmd) bam_path = prepare_alignment(fasta_path, sam_paths, step_2_dir, True) # Step: Mark step as completed. p.dump(bam_path, open(step_2_done_file, "w")) else: print "++Reference alignment file has already been completed." bam_path = p.load(open(step_2_done_file, "r")) assert os.path.exists(bam_path) and bam_path.endswith(".bam") print fasta_path, bam_path return fasta_path, bam_path