def do_job(nucmer_coords, debug_dir, circular, only_predicted): used_contigs = os.path.join(debug_dir, "filtered_contigs.txt") true_adj_out = os.path.join(debug_dir, "true_edges.dot") base_dot = os.path.join(debug_dir, "breakpoint_graph.dot") overlap_dot = os.path.join(debug_dir, "../contigs_overlap.dot") predicted_dot = os.path.join(debug_dir, "predicted_edges.dot") phylogeny_in = os.path.join(debug_dir, "phylogeny.txt") phylogeny_out = os.path.join(debug_dir, "phylogeny.png") draw_phylogeny(phylogeny_in, phylogeny_out) contigs = get_contig_permutations(used_contigs) if nucmer_coords != "-": alignment = parse_nucmer_coords(nucmer_coords) alignment = list(filter(lambda e: e.qry.seq_id in contigs, alignment)) #alignment = join_collinear(alignment) alignment = filter_by_coverage(alignment, 0.7) alignment = join_collinear(alignment) break_contigs = verify_alignment(alignment, contigs) true_adj = get_true_adjacencies(alignment, contigs, break_contigs, circular) else: true_adj = [] output_edges(true_adj, true_adj_out) g = compose_breakpoint_graph(base_dot, predicted_dot, true_adj) if os.path.exists(overlap_dot): add_overlap_edges(g, overlap_dot, used_contigs) output_graph(g, debug_dir, only_predicted)
def do_job(nucmer_coords, debug_dir, circular, only_predicted): used_contigs = os.path.join(debug_dir, "filtered_contigs.txt") true_adj_out = os.path.join(debug_dir, "true_edges.dot") base_dot = os.path.join(debug_dir, "breakpoint_graph.dot") overlap_dot = os.path.join(debug_dir, "../contigs_overlap.dot") predicted_dot = os.path.join(debug_dir, "predicted_edges.dot") phylogeny_in = os.path.join(debug_dir, "phylogeny.txt") phylogeny_out = os.path.join(debug_dir, "phylogeny.png") draw_phylogeny(phylogeny_in, phylogeny_out) contigs = get_contig_permutations(used_contigs) if nucmer_coords != "-": alignment = parse_nucmer_coords(nucmer_coords) alignment = list([e for e in alignment if e.qry.seq_id in contigs]) #alignment = join_collinear(alignment) alignment = filter_by_coverage(alignment, 0.7) alignment = join_collinear(alignment) break_contigs = verify_alignment(alignment, contigs) true_adj = get_true_adjacencies(alignment, contigs, break_contigs, circular) else: true_adj = [] output_edges(true_adj, true_adj_out) g = compose_breakpoint_graph(base_dot, predicted_dot, true_adj) if os.path.exists(overlap_dot): add_overlap_edges(g, overlap_dot, used_contigs) output_graph(g, debug_dir, only_predicted)
def do_job(nucmer_coords, scaffolds_ord): alignment = parse_nucmer_coords(nucmer_coords) alignment = join_collinear(alignment) alignment = filter_by_coverage(alignment, 0.45) entry_ord, chr_len, contig_len = get_order(alignment) scaffolds = parse_contigs_order(scaffolds_ord) total_breaks = 0 total_gaps = 0 total_contigs = 0 for s in scaffolds: print("\n>" + s.name) prev_aln = [] prev_strand = None increasing = None breaks = [] for contig in s.contigs: miss_ord = False miss_strand = False #checking order if prev_aln: if increasing is not None: if not agreement_ord(increasing, prev_aln, entry_ord[contig.name], chr_len): increasing = None breaks.append(contig.name) total_breaks += 1 miss_ord = True elif len(entry_ord[contig.name]) == 1 and len(prev_aln) == 1: increasing = (entry_ord[contig.name][0].index > prev_aln[0].index) #checking strand cur_strand = [h.sign * contig.sign for h in entry_ord[contig.name]] if not miss_ord and prev_strand and cur_strand: if not agreement_strands(prev_strand, cur_strand, increasing): breaks.append(contig.name) total_breaks += 1 miss_strand = True increasing = None if gap_count(prev_aln, entry_ord[contig.name]) > 0: total_gaps += 1 #only if this contig has alignments if entry_ord[contig.name]: prev_aln = entry_ord[contig.name] prev_strand = cur_strand #output sign = "+" if contig.sign > 0 else "-" pos_list = list(map(str, entry_ord[contig.name])) pos_list_str = (str(pos_list) if len(pos_list) < 5 else str(pos_list[:5]) + "...") print("{0}{1}\t{2}\t{3}".format(sign, contig.name, contig_len[contig.name], pos_list_str), end="") print("\t<<<order" if miss_ord else "", end="") print("\t<<<strand" if miss_strand else "", end="") print("") total_contigs += 1 ### print("\tmiss-ordered: ", len(breaks)) print("\nTotal miss-ordered:", total_breaks) print("Total gaps:", total_gaps) print("Total contigs:", total_contigs) print("Total scaffolds:", len(scaffolds))
def do_job(nucmer_coords, scaffolds_ord): alignment = parse_nucmer_coords(nucmer_coords) alignment = join_collinear(alignment) alignment = filter_by_coverage(alignment, 0.45) entry_ord, chr_len, contig_len = get_order(alignment) scaffolds = parse_contigs_order(scaffolds_ord) total_breaks = 0 total_gaps = 0 total_contigs = 0 for s in scaffolds: print("\n>" + s.name) prev_aln = None prev_strand = None increasing = None breaks = [] for contig in s.contigs: miss_ord = False miss_strand = False #checking order if prev_aln: if increasing is not None: if not agreement_ord(increasing, prev_aln, entry_ord[contig.name], chr_len): increasing = None breaks.append(contig.name) total_breaks += 1 miss_ord = True elif len(entry_ord[contig.name]) == 1 and len(prev_aln) == 1: increasing = (entry_ord[contig.name][0].index > prev_aln[0].index) #checking strand cur_strand = list(map(lambda h: h.sign * contig.sign, entry_ord[contig.name])) if not miss_ord and prev_strand and cur_strand: if not agreement_strands(prev_strand, cur_strand, increasing): breaks.append(contig.name) total_breaks += 1 miss_strand = True increasing = None if gap_count(prev_aln, entry_ord[contig.name]) > 0: total_gaps += 1 #only if this contig has alignments if entry_ord[contig.name]: prev_aln = entry_ord[contig.name] prev_strand = cur_strand #output sign = "+" if contig.sign > 0 else "-" pos_list = list(map(str, entry_ord[contig.name])) pos_list_str = (str(pos_list) if len(pos_list) < 5 else str(pos_list[:5]) + "...") print("{0}{1}\t{2}\t{3}".format(sign, contig.name, contig_len[contig.name], pos_list_str), end="") print("\t<<<order" if miss_ord else "", end="") print("\t<<<strand" if miss_strand else "", end="") print("") total_contigs += 1 ### print("\tmiss-ordered: ", len(breaks)) print("\nTotal miss-ordered:", total_breaks) print("Total gaps:", total_gaps) print("Total contigs:", total_contigs) print("Total scaffolds:", len(scaffolds))