def mauve_pw_align(ref, query, dirs): """Set up and perform a pairwise alignment with Mauve.""" # set outputs mauve_outfile = dirs['mauve']+ref.name+"_"+query.name+".mauve" segfile = dirs['aln_segs']+ref.name+"_"+query.name+"_segs.txt" # check for existing alignment if path.exists(segfile): print "already done" else: # prep segments file open(segfile, 'w').write('') # purge any pre-existing sslist files sslist_files = from_dir(dirs['seqfiles'], re.compile(r'.*\.sslist.*')) for sslist in sslist_files: try: os.remove(dirs['seqfiles']+sslist) except Exception: raise # do Mauve alignment file_list = [ref.gbk, query.gbk] align_mauve(file_list, mauve_outfile) try: # parse Mauve output (without initial clumping) coords = mauver_load2_k0(mauve_outfile+".backbone", 0) print "\nSegment results:", len(coords), '->', # chop segments that are too long chop_array = chop_rows(coords, max_size, chop_mode) print len(chop_array), 'segments <', max_size, 'bp' # make detailed pairwise alignments of the segments print "Aligning segments ..." ref_rec = load_genbank(ref.gbk) query_rec = load_genbank(query.gbk) id = iter_align(chop_array, ref_rec, query_rec, dirs['aln_segs'], segfile) print "Results:", id, "% id. overall" except IOError: print "\nERROR: Mauve alignment failed"
def mauve_pw_align(ref, query, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode, mauve_exec, mtype): """Set up and perform a pairwise alignment with Mauve.""" aln_dir = r_root_dir + run + dirs['aln_segs'] mauve_dir = r_root_dir + run + dirs['mauve'] # set outputs mauve_outfile = mauve_dir + ref.name + "_" + query.name + ".mauve" segfile = aln_dir + ref.name + "_" + query.name + "_segs.txt" # check for existing alignment if path.exists(segfile): print "already done" else: # prep segments file open(segfile, 'w').write('') # purge any pre-existing sslist files sslist_files = from_dir(g_root_dir, re.compile(r'.*\.sslist.*')) for sslist in sslist_files: try: os.remove(g_root_dir + sslist) except Exception: raise # do Mauve alignment file_list = [ref.gbk, query.gbk] align_mauve(file_list, mauve_outfile, mauve_exec) try: # parse Mauve output (without initial clumping) coords = mauver_load2_k0(mauve_outfile + ".backbone", 0, mtype) print "\nSegment results:", len(coords), '->', # chop segments that are too long chop_array = chop_rows(coords, max_size, chop_mode, mtype) print len(chop_array), 'segments <', max_size, 'bp' # make detailed pairwise alignments of the segments print "Aligning segments ..." ref_rec = load_genbank(ref.gbk) query_rec = load_genbank(query.gbk) id = iter_align(chop_array, ref_rec, query_rec, aln_dir, segfile) print "Results:", id, "% id. overall" except IOError: print "\nERROR: Mauve alignment failed" raise
def align_ctg2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs, genomes, mauve_exec, max_size, chop_mode, mtype): """Align contigs pairwise to the reference contig.""" # set inputs and outputs ref_n = run_ref.name run_root = r_root_dir + run_id + "/" ref_ctg_file = run_ref.file mauve_root = run_root + run_dirs['mauve_out_dir'] + ref_n + "/contigs/" segments_root = run_root + run_dirs['aln_seg_dir'] + ref_n + "/contigs/" q_ctgs_root = run_root + run_dirs['match_out_dir'] + ref_n + "/" ensure_dir([segments_root]) print " ", ref_n # log logstring = "".join(["\n\n# Align contigs to ref @", timestamp, "\n"]) run_ref.log(logstring) # cycle through genomes for genome in genomes: # set inputs and outputs g_name = genome['name'] ctgs_fas_dir = q_ctgs_root + g_name + "/" mauve_dir = mauve_root + g_name + "/" aln_segs_root = segments_root + g_name + "/" ensure_dir([mauve_dir]) print "\t", g_name, "...", # log logstring = "".join(["\n", g_name]) run_ref.log(logstring) # list genbank files in matches directory dir_contents = listdir(ctgs_fas_dir) for item in dir_contents: pattern = re.compile(r'.*_(\d*)\.fas$') match = pattern.match(item) if match: ctg_num = match.group(1) print ctg_num, logstring = "".join(["\t", ctg_num]) run_ref.log(logstring) # set inputs and outputs q_contig = ctgs_fas_dir + item file_list = (ref_ctg_file, q_contig) mauve_outfile = mauve_dir + ctg_num + ".mauve" aln_segs_dir = aln_segs_root + ctg_num + "/" ensure_dir([aln_segs_dir]) segfile = aln_segs_dir + ctg_num + "_" + ref_n + "_segs.txt" open(segfile, 'w').write('') # do Mauve alignment try: open(ref_ctg_file, 'r') open(q_contig, 'r') except IOError: msg = "\nERROR: File missing, cannot align\n\t\t\t" run_ref.log(msg) print msg else: align_mauve(file_list, mauve_outfile, mauve_exec) try: # parse Mauve output (without initial clumping) coords = mauver_load2_k0(mauve_outfile + ".backbone", 0, mtype) # chop segments that are too long chop_array = chop_rows(coords, max_size, chop_mode, mtype) # make detailed pairwise alignments of the segments ref_rec = load_genbank(ref_ctg_file) query_rec = load_fasta(q_contig) iter_align(chop_array, ref_rec, query_rec, aln_segs_dir, segfile) except IOError: msg = "\nERROR: Mauve alignment failed\n\t\t\t" run_ref.log(msg) print msg except Exception: msg = "\nERROR: Iteration failed\n\t\t\t" run_ref.log(msg) print msg print ""
def align_cstrct2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs, genomes, max_size, chop_mode, mtype, mauve_exec): """Align constructs pairwise to the reference contig.""" # set inputs and outputs ref_n = run_ref.name run_root = r_root_dir + run_id + "/" ref_ctg_file = run_ref.file mauve_root = run_root + run_dirs['mauve_out_dir'] + ref_n + "/constructs/" segments_root = run_root + run_dirs['aln_seg_dir'] + ref_n + "/constructs/" scaff_root = run_root + run_dirs['scaffolds_dir'] + ref_n + "/" ensure_dir([segments_root]) print " ", ref_n # log logstring = "".join( ["\n\n# Align scaffold constructs to reference @", timestamp, "\n"]) run_ref.log(logstring) # cycle through genomes for genome in genomes: # set inputs g_name = genome['name'] scaff_gbk = scaff_root + g_name + "_" + ref_n + "_scaffold.gbk" file_list = (ref_ctg_file, scaff_gbk) print "\t", g_name, "...", # log logstring = "".join(["\n", g_name]) run_ref.log(logstring) # set outputs mauve_dir = mauve_root + g_name + "/" aln_segs_dir = segments_root + g_name + "/" ensure_dir([mauve_dir, aln_segs_dir]) mauve_outfile = mauve_dir + g_name + "_" + ref_n + ".mauve" segfile = aln_segs_dir + g_name + "_" + ref_n + "_segs.txt" # abort if the reference file is not found try: open(ref_ctg_file, 'r') except IOError: msg = "ERROR: Reference file not found" print msg run_ref.log(msg) raise # abort if there is no scaffold construct try: open(scaff_gbk, 'r') except IOError: msg = "WARNING: No scaffold construct to align" print msg run_ref.log(msg) else: # prep segments file open(segfile, 'w').write('') # purge any pre-existing sslist file sslist_file = scaff_gbk + ".sslist" if os.path.isfile(sslist_file): try: os.remove(sslist_file) except Exception: raise # do Mauve alignment align_mauve(file_list, mauve_outfile, mauve_exec) try: # parse Mauve output (without initial clumping) coords = mauver_load2_k0(mauve_outfile + ".backbone", 0, mtype) print len(coords), '->', logstring = "".join(["\t", str(len(coords))]) run_ref.log(logstring) # chop segments that are too long chop_array = chop_rows(coords, max_size, chop_mode, mtype) print len(chop_array), 'segments <', max_size, 'bp', logstring = "".join(["\t", str(len(chop_array))]) run_ref.log(logstring) # make detailed pairwise alignments of the segments ref_rec = load_genbank(ref_ctg_file) query_rec = load_genbank(scaff_gbk) id = iter_align(chop_array, ref_rec, query_rec, aln_segs_dir, segfile) print "@", id, "% id. overall" logstring = "".join(["\t", str(id)]) run_ref.log(logstring) except IOError: msg = "\nERROR: Mauve alignment failed" run_ref.log(msg) print msg
def align_ctg2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs, genomes, mauve_exec, max_size, chop_mode, mtype): """Align contigs pairwise to the reference contig.""" # set inputs and outputs ref_n = run_ref.name run_root = r_root_dir+run_id+"/" ref_ctg_file = run_ref.file mauve_root = run_root+run_dirs['mauve_out_dir']+ref_n+"/contigs/" segments_root = run_root+run_dirs['aln_seg_dir']+ref_n+"/contigs/" q_ctgs_root = run_root+run_dirs['match_out_dir']+ref_n+"/" ensure_dir([segments_root]) print " ", ref_n # log logstring = "".join(["\n\n# Align contigs to ref @", timestamp, "\n"]) run_ref.log(logstring) # cycle through genomes for genome in genomes: # set inputs and outputs g_name = genome['name'] ctgs_fas_dir = q_ctgs_root+g_name+"/" mauve_dir = mauve_root+g_name+"/" aln_segs_root = segments_root+g_name+"/" ensure_dir([mauve_dir]) print "\t", g_name, "...", # log logstring = "".join(["\n", g_name]) run_ref.log(logstring) # list genbank files in matches directory dir_contents = listdir(ctgs_fas_dir) for item in dir_contents: pattern = re.compile(r'.*_(\d*)\.fas$') match = pattern.match(item) if match: ctg_num = match.group(1) print ctg_num, logstring = "".join(["\t", ctg_num]) run_ref.log(logstring) # set inputs and outputs q_contig = ctgs_fas_dir+item file_list = (ref_ctg_file, q_contig) mauve_outfile = mauve_dir+ctg_num+".mauve" aln_segs_dir = aln_segs_root+ctg_num+"/" ensure_dir([aln_segs_dir]) segfile = aln_segs_dir+ctg_num+"_"+ref_n+"_segs.txt" open(segfile, 'w').write('') # do Mauve alignment try: open(ref_ctg_file, 'r') open(q_contig, 'r') except IOError: msg = "\nERROR: File missing, cannot align\n\t\t\t" run_ref.log(msg) print msg else: align_mauve(file_list, mauve_outfile, mauve_exec) try: # parse Mauve output (without initial clumping) coords = mauver_load2_k0(mauve_outfile+".backbone", 0, mtype) # chop segments that are too long chop_array = chop_rows(coords, max_size, chop_mode, mtype) # make detailed pairwise alignments of the segments ref_rec = load_genbank(ref_ctg_file) query_rec = load_fasta(q_contig) iter_align(chop_array, ref_rec, query_rec, aln_segs_dir, segfile) except IOError: msg = "\nERROR: Mauve alignment failed\n\t\t\t" run_ref.log(msg) print msg except Exception: msg = "\nERROR: Iteration failed\n\t\t\t" run_ref.log(msg) print msg print ""
def align_cstrct2ref(run_ref, run_id, timestamp, r_root_dir, run_dirs, genomes, max_size, chop_mode, mtype, mauve_exec): """Align constructs pairwise to the reference contig.""" # set inputs and outputs ref_n = run_ref.name run_root = r_root_dir+run_id+"/" ref_ctg_file = run_ref.file mauve_root = run_root+run_dirs['mauve_out_dir']+ref_n+"/constructs/" segments_root = run_root+run_dirs['aln_seg_dir']+ref_n+"/constructs/" scaff_root = run_root+run_dirs['scaffolds_dir']+ref_n+"/" ensure_dir([segments_root]) print " ", ref_n # log logstring = "".join(["\n\n# Align scaffold constructs to reference @", timestamp, "\n"]) run_ref.log(logstring) # cycle through genomes for genome in genomes: # set inputs g_name = genome['name'] scaff_gbk = scaff_root+g_name+"_"+ref_n+"_scaffold.gbk" file_list = (ref_ctg_file, scaff_gbk) print "\t", g_name, "...", # log logstring = "".join(["\n", g_name]) run_ref.log(logstring) # set outputs mauve_dir = mauve_root+g_name+"/" aln_segs_dir = segments_root+g_name+"/" ensure_dir([mauve_dir, aln_segs_dir]) mauve_outfile = mauve_dir+g_name+"_"+ref_n+".mauve" segfile = aln_segs_dir+g_name+"_"+ref_n+"_segs.txt" # abort if the reference file is not found try: open(ref_ctg_file, 'r') except IOError: msg = "ERROR: Reference file not found" print msg run_ref.log(msg) raise # abort if there is no scaffold construct try: open(scaff_gbk, 'r') except IOError: msg = "WARNING: No scaffold construct to align" print msg run_ref.log(msg) else: # prep segments file open(segfile, 'w').write('') # purge any pre-existing sslist file sslist_file = scaff_gbk+".sslist" if os.path.isfile(sslist_file): try: os.remove(sslist_file) except Exception: raise # do Mauve alignment align_mauve(file_list, mauve_outfile, mauve_exec) try: # parse Mauve output (without initial clumping) coords = mauver_load2_k0(mauve_outfile+".backbone", 0, mtype) print len(coords), '->', logstring = "".join(["\t", str(len(coords))]) run_ref.log(logstring) # chop segments that are too long chop_array = chop_rows(coords, max_size, chop_mode, mtype) print len(chop_array), 'segments <', max_size, 'bp', logstring = "".join(["\t", str(len(chop_array))]) run_ref.log(logstring) # make detailed pairwise alignments of the segments ref_rec = load_genbank(ref_ctg_file) query_rec = load_genbank(scaff_gbk) id = iter_align(chop_array, ref_rec, query_rec, aln_segs_dir, segfile) print "@", id, "% id. overall" logstring = "".join(["\t", str(id)]) run_ref.log(logstring) except IOError: msg = "\nERROR: Mauve alignment failed" run_ref.log(msg) print msg