def generate_read_to_ctg_map(self): rawread_id_file = fn(self.rawread_id_file) pread_id_file = fn(self.pread_id_file) read_to_contig_map = fn(self.read_to_contig_map) pread_did_to_rid = open(pread_id_file).read().split("\n") rid_to_oid = open(rawread_id_file).read().split("\n") asm_G = AsmGraph(fn(self.sg_edges_list), fn(self.utg_data), fn(self.ctg_paths)) pread_to_contigs = {} with open(read_to_contig_map, "w") as f: for ctg in asm_G.ctg_data: if ctg[-1] == "R": continue ctg_g = asm_G.get_sg_for_ctg(ctg) for n in ctg_g.nodes(): pid = int(n.split(":")[0]) rid = pread_did_to_rid[pid].split("/")[1] rid = int(int(rid) / 10) oid = rid_to_oid[rid] k = (pid, rid, oid) pread_to_contigs.setdefault(k, set()) pread_to_contigs[k].add(ctg) for k in pread_to_contigs: pid, rid, oid = k for ctg in list(pread_to_contigs[k]): print >> f, "%09d %09d %s %s" % (pid, rid, oid, ctg)
def generate_read_to_ctg_map(self): rawread_id_file = fn(self.rawread_id_file) pread_id_file = fn(self.pread_id_file) read_to_contig_map = fn(self.read_to_contig_map) pread_did_to_rid = open(pread_id_file).read().split('\n') rid_to_oid = open(rawread_id_file).read().split('\n') asm_G = AsmGraph(fn(self.sg_edges_list), fn(self.utg_data), fn(self.ctg_paths)) pread_to_contigs = {} with open(read_to_contig_map, 'w') as f: for ctg in asm_G.ctg_data: if ctg[-1] == 'R': continue ctg_g = asm_G.get_sg_for_ctg(ctg) for n in ctg_g.nodes(): pid = int(n.split(':')[0]) rid = pread_did_to_rid[pid].split('/')[1] rid = int(int(rid) / 10) oid = rid_to_oid[rid] k = (pid, rid, oid) pread_to_contigs.setdefault(k, set()) pread_to_contigs[k].add(ctg) for k in pread_to_contigs: pid, rid, oid = k for ctg in list(pread_to_contigs[k]): print >> f, '%09d %09d %s %s' % (pid, rid, oid, ctg)
def test_add_nx_string_graph(): # Load the assembly graph. sg_edges_list = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'sg_edges_list') utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'utg_data') ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths') asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths) # The following block is taken from Unzip, graphs_to_h_tigs.py. nx_sg = nx.DiGraph() arid_to_phase = {} for ctg_id in asm_graph.ctg_data.keys(): ctg_G = asm_graph.get_sg_for_ctg(ctg_id) ctg_nodes = set(ctg_G.nodes()) for v, w in ctg_G.edges(): vrid = v[:9] wrid = w[:9] edge_data = asm_graph.sg_edges[(v, w)] if edge_data[-1] != "G": continue vphase = arid_to_phase.get(vrid, (-1, 0)) wphase = arid_to_phase.get(wrid, (-1, 0)) if vphase[0] == wphase[0] and vphase[1] != wphase[1]: cross_phase = "Y" else: cross_phase = "N" nx_sg.add_node(v, label="%d_%d" % vphase, phase="%d_%d" % vphase, src="P") nx_sg.add_node(w, label="%d_%d" % wphase, phase="%d_%d" % wphase, src="P") nx_sg.add_edge(v, w, src="OP", cross_phase=cross_phase) # we need to add the complimentary edges as the ctg_graph does not contain the dual edges rv = reverse_end(v) rw = reverse_end(w) nx_sg.add_node(rv, label="%d_%d" % vphase, phase="%d_%d" % vphase, src="P") nx_sg.add_node(rw, label="%d_%d" % wphase, phase="%d_%d" % wphase, src="P") nx_sg.add_edge(rw, rv, src="OP", cross_phase=cross_phase) # Add the string graph to the GFA. gfa_graph = mod.GFAGraph() gexf_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'sg.gexf') nx_sg = nx.read_gexf(gexf_file) gfa_graph.add_nx_string_graph(nx_sg)