Exemplo n.º 1
0
    def generate_read_to_ctg_map(self):
        rawread_id_file = fn(self.rawread_id_file)
        pread_id_file = fn(self.pread_id_file)
        read_to_contig_map = fn(self.read_to_contig_map)

        pread_did_to_rid = open(pread_id_file).read().split("\n")
        rid_to_oid = open(rawread_id_file).read().split("\n")

        asm_G = AsmGraph(fn(self.sg_edges_list), fn(self.utg_data), fn(self.ctg_paths))

        pread_to_contigs = {}

        with open(read_to_contig_map, "w") as f:
            for ctg in asm_G.ctg_data:
                if ctg[-1] == "R":
                    continue
                ctg_g = asm_G.get_sg_for_ctg(ctg)
                for n in ctg_g.nodes():
                    pid = int(n.split(":")[0])

                    rid = pread_did_to_rid[pid].split("/")[1]
                    rid = int(int(rid) / 10)
                    oid = rid_to_oid[rid]
                    k = (pid, rid, oid)
                    pread_to_contigs.setdefault(k, set())
                    pread_to_contigs[k].add(ctg)

            for k in pread_to_contigs:
                pid, rid, oid = k
                for ctg in list(pread_to_contigs[k]):
                    print >> f, "%09d %09d %s %s" % (pid, rid, oid, ctg)
Exemplo n.º 2
0
def generate_read_to_ctg_map(self):
    rawread_id_file = fn(self.rawread_id_file)
    pread_id_file = fn(self.pread_id_file)
    read_to_contig_map = fn(self.read_to_contig_map)

    pread_did_to_rid = open(pread_id_file).read().split('\n')
    rid_to_oid = open(rawread_id_file).read().split('\n')

    asm_G = AsmGraph(fn(self.sg_edges_list), fn(self.utg_data),
                     fn(self.ctg_paths))

    pread_to_contigs = {}

    with open(read_to_contig_map, 'w') as f:
        for ctg in asm_G.ctg_data:
            if ctg[-1] == 'R':
                continue
            ctg_g = asm_G.get_sg_for_ctg(ctg)
            for n in ctg_g.nodes():
                pid = int(n.split(':')[0])

                rid = pread_did_to_rid[pid].split('/')[1]
                rid = int(int(rid) / 10)
                oid = rid_to_oid[rid]
                k = (pid, rid, oid)
                pread_to_contigs.setdefault(k, set())
                pread_to_contigs[k].add(ctg)

        for k in pread_to_contigs:
            pid, rid, oid = k
            for ctg in list(pread_to_contigs[k]):
                print >> f, '%09d %09d %s %s' % (pid, rid, oid, ctg)
Exemplo n.º 3
0
def test_add_nx_string_graph():
    # Load the assembly graph.
    sg_edges_list = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'sg_edges_list')
    utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
    ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
    asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)

    # The following block is taken from Unzip, graphs_to_h_tigs.py.
    nx_sg = nx.DiGraph()
    arid_to_phase = {}
    for ctg_id in asm_graph.ctg_data.keys():
        ctg_G = asm_graph.get_sg_for_ctg(ctg_id)
        ctg_nodes = set(ctg_G.nodes())
        for v, w in ctg_G.edges():
            vrid = v[:9]
            wrid = w[:9]
            edge_data = asm_graph.sg_edges[(v, w)]
            if edge_data[-1] != "G":
                continue

            vphase = arid_to_phase.get(vrid, (-1, 0))
            wphase = arid_to_phase.get(wrid, (-1, 0))
            if vphase[0] == wphase[0] and vphase[1] != wphase[1]:
                cross_phase = "Y"
            else:
                cross_phase = "N"

            nx_sg.add_node(v,
                           label="%d_%d" % vphase,
                           phase="%d_%d" % vphase,
                           src="P")

            nx_sg.add_node(w,
                           label="%d_%d" % wphase,
                           phase="%d_%d" % wphase,
                           src="P")

            nx_sg.add_edge(v, w, src="OP", cross_phase=cross_phase)

            # we need to add the complimentary edges as the ctg_graph does not contain the dual edges
            rv = reverse_end(v)
            rw = reverse_end(w)
            nx_sg.add_node(rv,
                           label="%d_%d" % vphase,
                           phase="%d_%d" % vphase,
                           src="P")
            nx_sg.add_node(rw,
                           label="%d_%d" % wphase,
                           phase="%d_%d" % wphase,
                           src="P")
            nx_sg.add_edge(rw, rv, src="OP", cross_phase=cross_phase)

    # Add the string graph to the GFA.
    gfa_graph = mod.GFAGraph()
    gexf_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'sg.gexf')
    nx_sg = nx.read_gexf(gexf_file)
    gfa_graph.add_nx_string_graph(nx_sg)