def build_scaffolds(adjacencies, perm_container, debug_output, correct_distances): """ Assembles scaffolds wrt to inferred adjacencies """ if debug_output: logger.debug("Building scaffolds") contigs, contig_index = _make_contigs(perm_container) scaffolds = _extend_scaffolds(adjacencies, contigs, contig_index, correct_distances) num_contigs = sum([len(s.contigs) for s in scaffolds]) logger.debug("%d contigs were joined into %d scaffolds", num_contigs, len(scaffolds)) if debugger.debugging and debug_output: links_out = os.path.join(debugger.debug_dir, "scaffolder.links") output_links(scaffolds, links_out) contigs_out = os.path.join(debugger.debug_dir, "scaffolder_contigs.txt") output_permutations(perm_container.target_perms, contigs_out) perms_out = os.path.join(debugger.debug_dir, "scaffolder_scaffolds.txt") output_scaffolds_premutations(scaffolds, perms_out) return scaffolds
def merge_scaffolds(big_scaffolds, small_scaffolds, perm_container, rearrange): """ Merges scaffold sets from different iterations. If rearrangements are allowed, tries to keep some small-scale rearrangements from the weaker scaffold set. """ logger.info("Merging two iterations") #synchronizing scaffolds to the same permutations big_updated = _update_scaffolds(big_scaffolds, perm_container) small_updated = _update_scaffolds(small_scaffolds, perm_container) if rearrange: projector = RearrangementProjector(big_updated, small_updated, True) new_adj = projector.project() big_rearranged = build_scaffolds(new_adj, perm_container, False, False) else: big_rearranged = big_updated merged_scf = _merge_scaffolds(big_rearranged, small_updated) merged_scf = _merge_consecutive_contigs(merged_scf) if debugger.debugging: links_out = os.path.join(debugger.debug_dir, "merged.links") output_links(merged_scf, links_out) perms_out = os.path.join(debugger.debug_dir, "merged_scaffolds.txt") output_scaffolds_premutations(merged_scf, perms_out) return merged_scf
def build_scaffolds(adjacencies, perm_container, debug_output=True, correct_distances=True, ancestral=False): """ Assembles scaffolds wrt to inferred adjacencies """ if debug_output: logger.info("Building scaffolds") if not ancestral: contigs, contig_index = _make_contigs(perm_container) else: contigs, contig_index = _make_contigs(perm_container, ancestral=ancestral) scaffolds = _extend_scaffolds(adjacencies, contigs, contig_index, correct_distances) num_contigs = sum(map(lambda s: len(s.contigs), scaffolds)) logger.debug("{0} contigs were joined into {1} scaffolds" .format(num_contigs, len(scaffolds))) if debugger.debugging and debug_output: links_out = os.path.join(debugger.debug_dir, "scaffolder.links") output_links(scaffolds, links_out) contigs_out = os.path.join(debugger.debug_dir, "scaffolder_contigs.txt") output_permutations(perm_container.target_perms, contigs_out) perms_out = os.path.join(debugger.debug_dir, "scaffolder_scaffolds.txt") output_scaffolds_premutations(scaffolds, perms_out) return scaffolds
def merge_scaffolds(big_scaffolds, small_scaffolds, perm_container, rearrange, ancestral = False): """ Merges scaffold sets from different iterations. If rearrangements are allowed, tries to keep some small-scale rearrangements from the weaker scaffold set. """ logger.info("Merging two iterations") #synchronizing scaffolds to the same permutations big_updated = _update_scaffolds(big_scaffolds, perm_container, ancestral=ancestral) small_updated = _update_scaffolds(small_scaffolds, perm_container, ancestral=ancestral) if rearrange: projector = RearrangementProjector(big_updated, small_updated, True) new_adj = projector.project() big_rearranged = build_scaffolds(new_adj, perm_container, False, False, ancestral=ancestral) else: big_rearranged = big_updated merged_scf = _merge_scaffolds(big_rearranged, small_updated) merged_scf = _merge_consecutive_contigs(merged_scf) if debugger.debugging: links_out = os.path.join(debugger.debug_dir, "merged.links") output_links(merged_scf, links_out) perms_out = os.path.join(debugger.debug_dir, "merged_scaffolds.txt") output_scaffolds_premutations(merged_scf, perms_out) return merged_scf
def build_scaffolds(adjacencies, perm_container, debug_output=True, correct_distances=True, ancestral=False): """ Assembles scaffolds wrt to inferred adjacencies """ if debug_output: logger.info("Building scaffolds") if not ancestral: contigs, contig_index = _make_contigs(perm_container) else: contigs, contig_index = _make_contigs(perm_container, ancestral=ancestral) scaffolds = _extend_scaffolds(adjacencies, contigs, contig_index, correct_distances) num_contigs = sum(map(lambda s: len(s.contigs), scaffolds)) logger.debug("{0} contigs were joined into {1} scaffolds".format( num_contigs, len(scaffolds))) if debugger.debugging and debug_output: links_out = os.path.join(debugger.debug_dir, "scaffolder.links") output_links(scaffolds, links_out) contigs_out = os.path.join(debugger.debug_dir, "scaffolder_contigs.txt") output_permutations(perm_container.target_perms, contigs_out) perms_out = os.path.join(debugger.debug_dir, "scaffolder_scaffolds.txt") output_scaffolds_premutations(scaffolds, perms_out) return scaffolds
def do_job(recipe_file, out_dir, backend, assembly_refine, overwrite, debug): """ Top-level logic of program """ out_log = os.path.join(out_dir, "ragout.log") out_links = os.path.join(out_dir, "scaffolds.links") out_scaffolds = os.path.join(out_dir, "scaffolds.fasta") out_overlap = os.path.join(out_dir, "contigs_overlap.dot") out_refined_links = os.path.join(out_dir, "scaffolds_refined.links") out_refined_scaffolds = os.path.join(out_dir, "scaffolds_refined.fasta") debug_root = os.path.join(out_dir, "debug") if not os.path.isdir(out_dir): os.mkdir(out_dir) if debug: debugger.set_debug_dir(debug_root) debugger.clear_debug_dir() enable_logging(out_log, debug) logger.info("Cooking Ragout...") if not check_extern_modules(backend): return 1 try: recipe = parse_ragout_recipe(recipe_file) phylogeny = Phylogeny(recipe) target_fasta_file = recipe["genomes"][recipe["target"]]["fasta"] logger.info("Reading FASTA with contigs") target_fasta_dict = read_fasta_dict(target_fasta_file) except (RecipeException, FastaError, PhyloException) as e: logger.error(e) return 1 backends = SyntenyBackend.get_available_backends() perm_files = backends[backend].make_permutations(recipe, out_dir, overwrite) if not perm_files: logger.error("There were problems with synteny backend, exiting.") return 1 last_scaffolds = None for block_size in recipe["blocks"]: logger.info("Running Ragout with the block size {0}".format(block_size)) if debug: debug_dir = os.path.join(debug_root, str(block_size)) debugger.set_debug_dir(debug_dir) try: perm_container = PermutationContainer(perm_files[block_size], recipe) except PermException as e: logger.error(e) return 1 graph = bg.BreakpointGraph() graph.build_from(perm_container, recipe) adjacencies = graph.find_adjacencies(phylogeny) scaffolds = scfldr.get_scaffolds(adjacencies, perm_container) if last_scaffolds: last_scaffolds = merge.merge(last_scaffolds, scaffolds) else: last_scaffolds = scaffolds debugger.set_debug_dir(debug_root) out_gen.output_links(last_scaffolds, out_links) out_gen.output_fasta(target_fasta_dict, last_scaffolds, out_scaffolds) if assembly_refine: if not ovlp.make_overlap_graph(target_fasta_file, out_overlap): logger.error("Error in overlap graph reconstruction, exiting") return 1 refined_scaffolds = asref.refine_scaffolds(out_overlap, last_scaffolds, target_fasta_dict) out_gen.output_links(refined_scaffolds, out_refined_links) out_gen.output_fasta(target_fasta_dict, refined_scaffolds, out_refined_scaffolds) if debug: shutil.copy(out_overlap, debugger.debug_dir) out_colored_overlap = os.path.join(debugger.debug_dir, "colored_overlap.dot") asgraph.save_colored_insert_overlap_graph(out_overlap, last_scaffolds, refined_scaffolds, out_colored_overlap) os.remove(out_overlap) logger.info("Your Ragout is ready!")