def __init__(self, vertex1=None, vertex2=None, fragment1=None, fragment2=None, sign1=None, sign2=None, additional_information=None, cc_id=None): self.vertex1 = vertex1 self.vertex2 = vertex2 self.target_multicolor = None if additional_information is None else additional_information.target_multicolor self.info = additional_information self.fragment1 = None if additional_information is None else get_from_dict_with_path( additional_information.iedge1.data, key="name", path=["fragment"]) self.fragment2 = None if additional_information is None else get_from_dict_with_path( additional_information.iedge2.data, key="name", path=["fragment"]) fr1_fo = None if additional_information is None else get_from_dict_with_path( additional_information.iedge1.data, key="forward_orientation", path=["fragment"]) self.fragment1_sign = None if fr1_fo is None else ( "+" if vertex1 == fr1_fo[0] else "-") fr2_fo = None if additional_information is None else get_from_dict_with_path( additional_information.iedge2.data, key="forward_orientation", path=["fragment"]) self.fragment2_sign = None if fr2_fo is None else ( "+" if vertex2 == fr2_fo[1] else "-") self.id = AssemblyPoint.id_cnt self.cc_id = cc_id AssemblyPoint.id_cnt += 1
def run(self, manager): mgra_ex_path = get_from_dict_with_path(manager.configuration, key="executable_path", path=["mgra"]) manager.logger.info("=" * 80) if mgra_ex_path is None: manager.logger.info( "MGRA executable path is not supplied, skipping the MGRA based tasks" ) return manager.logger.info( "Preparing data to communicate with MGRA and ontain guidance graph" ) temp_dir = os.path.join( manager.configuration["gos-asm"]["output"]["dir"], "tmp_mgra") if not os.path.exists(temp_dir): os.mkdir(temp_dir) blocks_file_name = os.path.join(temp_dir, "blocks.txt") config_file_name = os.path.join(temp_dir, "config.cfg") mgra_output_dir_name = os.path.join(temp_dir, "output/") manager.logger.debug( "Writing blocks orders in GRIMM format to {file_name}".format( file_name=blocks_file_name)) GRIMMWriter.print_genomes_as_grimm_blocks_orders( bg=manager.data["gos-asm"]["bg"], file_name=blocks_file_name) manager.logger.debug( "Writing configuration file for MGRA run to {file_name}".format( file_name=config_file_name)) config = self.create_mgra_config(blocks_file_name=blocks_file_name, manager=manager) with open(config_file_name, "wt") as destination: json.dump(obj=config, fp=destination) manager.logger.info("Running MGRA on prepared configuration") os.system("{mgra_ex_path} -c {config_file_path} -o {output_dir_path}" "".format(mgra_ex_path=mgra_ex_path, config_file_path=config_file_name, output_dir_path=mgra_output_dir_name)) manager.logger.debug("MGRA has successfully finished") manager.logger.info("Reading MGRA produced guidance graph") genomes_dir = os.path.join(mgra_output_dir_name, "genomes") genome_files = [ name for name in os.listdir(genomes_dir) if name.endswith(".gen") ] full_genomes_paths = [ os.path.join(genomes_dir, name) for name in genome_files ] guidance_bg = BreakpointGraph() for file_name in full_genomes_paths: with open(file_name, "rt") as source: guidance_bg.update( breakpoint_graph=GRIMMReader.get_breakpoint_graph( stream=source, merge_edges=False), merge_edges=False) if "mgra" not in manager.data: manager.data["mgra"] = {} manager.data["mgra"]["guidance_graph"] = guidance_bg manager.logger.info("Obtained MGRA produced guidance graph")
def __init__(self, vertex1=None, vertex2=None, fragment1=None, fragment2=None, sign1=None, sign2=None, additional_information=None, cc_id=None): self.vertex1 = vertex1 self.vertex2 = vertex2 self.target_multicolor = None if additional_information is None else additional_information.target_multicolor self.info = additional_information self.fragment1 = None if additional_information is None else get_from_dict_with_path(additional_information.iedge1.data, key="name", path=["fragment"]) self.fragment2 = None if additional_information is None else get_from_dict_with_path(additional_information.iedge2.data, key="name", path=["fragment"]) fr1_fo = None if additional_information is None else get_from_dict_with_path(additional_information.iedge1.data, key="forward_orientation", path=["fragment"]) self.fragment1_sign = None if fr1_fo is None else ("+" if vertex1 == fr1_fo[0] else "-") fr2_fo = None if additional_information is None else get_from_dict_with_path(additional_information.iedge2.data, key="forward_orientation", path=["fragment"]) self.fragment2_sign = None if fr2_fo is None else ("+" if vertex2 == fr2_fo[1] else "-") self.id = AssemblyPoint.id_cnt self.cc_id = cc_id AssemblyPoint.id_cnt += 1
def run(self, manager): mgra_ex_path = get_from_dict_with_path(manager.configuration, key="executable_path", path=["mgra"]) manager.logger.info("=" * 80) if mgra_ex_path is None: manager.logger.info("MGRA executable path is not supplied, skipping the MGRA based tasks") return manager.logger.info("Preparing data to communicate with MGRA and ontain guidance graph") temp_dir = os.path.join(manager.configuration["gos-asm"]["output"]["dir"], "tmp_mgra") if not os.path.exists(temp_dir): os.mkdir(temp_dir) blocks_file_name = os.path.join(temp_dir, "blocks.txt") config_file_name = os.path.join(temp_dir, "config.cfg") mgra_output_dir_name = os.path.join(temp_dir, "output/") manager.logger.debug("Writing blocks orders in GRIMM format to {file_name}".format(file_name=blocks_file_name)) GRIMMWriter.print_genomes_as_grimm_blocks_orders(bg=manager.data["gos-asm"]["bg"], file_name=blocks_file_name) manager.logger.debug("Writing configuration file for MGRA run to {file_name}".format(file_name=config_file_name)) config = self.create_mgra_config(blocks_file_name=blocks_file_name, manager=manager) with open(config_file_name, "wt") as destination: json.dump(obj=config, fp=destination) manager.logger.info("Running MGRA on prepared configuration") os.system("{mgra_ex_path} -c {config_file_path} -o {output_dir_path}" "".format(mgra_ex_path=mgra_ex_path, config_file_path=config_file_name, output_dir_path=mgra_output_dir_name)) manager.logger.debug("MGRA has successfully finished") manager.logger.info("Reading MGRA produced guidance graph") genomes_dir = os.path.join(mgra_output_dir_name, "genomes") genome_files = [name for name in os.listdir(genomes_dir) if name.endswith(".gen")] full_genomes_paths = [os.path.join(genomes_dir, name) for name in genome_files] guidance_bg = BreakpointGraph() for file_name in full_genomes_paths: with open(file_name, "rt") as source: guidance_bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False) if "mgra" not in manager.data: manager.data["mgra"] = {} manager.data["mgra"]["guidance_graph"] = guidance_bg manager.logger.info("Obtained MGRA produced guidance graph")
def run(self, manager): manager.logger.info("=" * 80) manager.logger.info("Assembling with Connected Components Assembly Strategy Using MGRA guidance graph") manager.logger.info("Strategy version 2") bg = manager.data["gos-asm"]["bg"] guidance_graph = manager.data["mgra"]["guidance_graph"] guidance_graph = get_balance_graph(breakpoint_graph=guidance_graph) log_bg_stats(bg=bg, logger=manager.logger) log_bg_stats(bg=guidance_graph, logger=manager.logger) target_multicolor = manager.data["gos-asm"]["target_multicolor"] assembly_cnt = 0 ap_header_printed = False kbreaks = [] for cc_cnt, cc in enumerate(guidance_graph.connected_components_subgraphs(copy=False)): possible_assemblies_graph = Graph() for vertex in (v for v in cc.nodes() if v.is_regular_vertex): if suitable_for_assembly_fragment_ends_vertex(graph=bg, reg_vertex=vertex, target_multicolor=target_multicolor): possible_assemblies_graph.add_node(vertex) if len(list(possible_assemblies_graph.nodes())) > 1000: continue for v1, v2 in itertools.combinations(list(possible_assemblies_graph.nodes()), 2): if assembly_is_allowed(graph=bg, vertex1=v1, vertex2=v2, target_multicolor=target_multicolor, data=manager.data): possible_assemblies_graph.add_edge(v1, v2) reg_vertices_for_assembly = set() for pag_cc in nx.connected_component_subgraphs(possible_assemblies_graph, copy=False): if len(list(pag_cc.nodes())) != 2: continue reg_vertices_for_assembly.add(tuple(pag_cc.nodes())) for vertex_pair in reg_vertices_for_assembly: v1, v2 = vertex_pair if v1.block_name == v2.block_name: continue for color in target_multicolor.colors: iedge1 = get_irregular_edge_by_vertex_color(graph=bg, vertex=v1, color=color) iedge2 = get_irregular_edge_by_vertex_color(graph=bg, vertex=v2, color=color) fr1_name = get_from_dict_with_path(iedge1.data, key="name", path=["fragment"]) fr2_name = get_from_dict_with_path(iedge2.data, key='name', path=["fragment"]) if fr1_name == fr2_name: continue s_edge = bg.get_condensed_edge(vertex1=v1, vertex2=v2) if bg.has_edge(vertex1=v1, vertex2=v2) else None r1_name, r1_dir = get_repeat_info(iedge1) r2_name, r2_dir = get_repeat_info(iedge2) r1_entry = get_repeat_entry(repeat_name=r1_name, repeat_direction=r1_dir) r2_entry = get_repeat_entry(repeat_name=r2_name, repeat_direction=r2_dir) repeat_info = { "repeat_name_1": r1_name, "repeat_name_2": r2_name, "repeat_dir_1": r1_dir, "repeat_dir_2": r2_dir } try: repeat_guidance = get_repeat_guidance(genome=color, repeat1_entry=r1_entry, repeat2_entry=r2_entry, data=manager.data) except nx.networkx.exception.NetworkXNoPath: raise Exception("Pair of edges must suitable for assembly with repeats guidance") repeat_info["repeat_guidance"] = repeat_guidance evolutionary_scenarios = {} full_ie1_multicolor = get_full_irregular_multicolor(vertex=v1, data=manager.data, graph=bg) full_ie2_multicolor = get_full_irregular_multicolor(vertex=v2, data=manager.data, graph=bg) sedge_multicolor = s_edge.multicolor if s_edge is not None else Multicolor() for e_scenario_name, e_scenario in non_conflicting_evolutionary_scenarios: evolutionary_scenarios[e_scenario_name] = get_assembly_score(full_ie1_multicolor=full_ie1_multicolor, full_ie2_multicolor=full_ie2_multicolor, sedge_multicolor=sedge_multicolor, target_multicolor=target_multicolor, evolutionary_scenario=e_scenario, data=manager.data) api = AssemblyPointInfo(support_edge=s_edge, iedge1=iedge1, iedge2=iedge2, evolutionary_scenarios=evolutionary_scenarios, allowed=True, repeat_info=repeat_info, target_multicolor=target_multicolor, target_color=Multicolor(color)) ap = AssemblyPoint(vertex1=v1, vertex2=v2, additional_information=api) ap.cc_id = cc_cnt if not ap_header_printed: ap_header_printed = True manager.logger.debug("-"*32 + "-"*len(AssemblyPoint.logger_file_header_string())) manager.logger.debug(" "*32 + AssemblyPoint.logger_file_header_string()) manager.logger.debug("-"*32 + "-"*len(AssemblyPoint.logger_file_header_string())) manager.logger.debug("Identified an assembly point :: {ap}".format(ap=ap.as_logger_entry())) manager.data["gos-asm"]["assembly_points"].append(ap) k_break = create_k_break_from_assembly_point(assembly_point=ap) kbreaks.append(k_break) manager.logger.debug("-"*32 + "-"*len(AssemblyPoint.logger_file_header_string())) for k_break in kbreaks: bg.apply_kbreak(kbreak=k_break, merge=False) assembly_cnt += 1 manager.logger.info("Identified and performed {gluing_cnt} assemblies with Connected Components Assembly strategy" "".format(gluing_cnt=assembly_cnt)) log_bg_stats(bg=bg, logger=manager.logger)