def main(): print("<div style=\"border:1px solid black;\">", end="\n\n") print("`{bm-disable-all}`", end="\n\n") try: lines = [] while True: try: line = input().strip() if len(line) > 0: lines.append(line) except EOFError: break command = lines[0] lines = lines[1:] counter = Counter(lines) if command == 'reads': frags = [Read(r, i) for r, c in counter.items() for i in range(c)] elif command == 'read-pairs': frags = [ReadPair(Kdmer(r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c)] else: raise graph = to_debruijn_graph(frags) print(f'Given the fragments {lines}, the de Bruijn graph is...', end="\n\n") print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n') print(f'... and a Eulerian cycle is ...', end="\n\n") path = walk_eulerian_cycle(graph, frags[0].prefix()) print(f'{" -> ".join([str(p) for p in path])}') finally: print("</div>", end="\n\n") print("`{bm-enable-all}`", end="\n\n")
def main(): print("<div style=\"border:1px solid black;\">", end="\n\n") print("`{bm-disable-all}`", end="\n\n") try: lines = [] while True: try: line = input().strip() if len(line) > 0: lines.append(line) except EOFError: break command = lines[0] lines = lines[1:] counter = Counter(lines) if command == 'reads': frags = [Read(r, i) for r, c in counter.items() for i in range(c)] elif command == 'read-pairs': frags = [ ReadPair( Kdmer( r.split('|')[0], r.split('|')[2], int(r.split('|')[1])), i) for r, c in counter.items() for i in range(c) ] else: raise graph = to_debruijn_graph(frags) graph, head_nodes, tail_nodes = balance_graph(graph) print( f'Given the fragments {lines}, the artificially balanced de Bruijn graph is...', end="\n\n") print(f'```{{dot}}\n{to_graphviz(graph)}\n```\n\n') print( f'... with original head nodes at {head_nodes} and tail nodes at {tail_nodes}.' ) finally: print("</div>", end="\n\n") print("`{bm-enable-all}`", end="\n\n")
from Read import Read from ToDeBruijnGraph import to_debruijn_graph from Utils import slide_window with open('/home/user/Downloads/dataset_240257_6(1).txt', mode='r', encoding='utf-8') as f: data = f.read() lines = data.split('\n') k = int(lines[0].strip()) dna = lines[1].strip() reads = [Read(kmer) for kmer, _ in slide_window(dna, k)] graph = to_debruijn_graph(reads) for node, other_nodes in graph.get_all_outputs(): other_nodes = list(other_nodes) if len(other_nodes) == 0: continue print(f'{node} -> {",".join([str(x) for x in other_nodes])}')
from ToDeBruijnGraph import to_debruijn_graph reads_filepath = 'FinalChallengeReads.txt.xz' with lzma.open(reads_filepath, mode='rt', encoding='utf-8') as f: lines = f.read().splitlines() lines = [l.strip() for l in lines] # get rid of whitespace lines = [l for l in lines if len(l) > 0] # get rid of empty lines lines_split = [tuple(l.split('|', maxsplit=2)) for l in lines] kdmers = [Kdmer(k1, k2, 1000) for k1, k2 in lines_split] rps = [ReadPair(kdmer) for kdmer in kdmers] broken_rps = [broken_rp for rp in rps for broken_rp in rp.shatter(40)] broken_rps = list(set(broken_rps)) graph = to_debruijn_graph(broken_rps) contig_paths = find_maximal_non_branching_paths(graph) contig_paths.sort(key=lambda x: len(x)) for path in contig_paths: if len(path) >= path[0].d: out = path[0].stitch(path) print(f'{len(path)} kd-mers = {out}') else: heads = [Read(p.data.head) for p in path] heads_out = heads[0].stitch(heads) tails = [Read(p.data.tail) for p in path] tails_out = tails[0].stitch(tails) print(f'{len(heads)} k-mers = {heads_out}') print(f'{len(tails)} k-mers = {tails_out}')