def test_create_from_patterns(): patterns = ['GAGG', 'CAGG', 'GGGG', 'GGGA', 'CAGG', 'AGGG', 'GGAG'] expected = {'AGG': ['GGG'], 'CAG': ['AGG', 'AGG'], 'GAG': ['AGG'], 'GGA': ['GAG'], 'GGG': ['GGG', 'GGA']} actual = debruijn.create_from_patterns(patterns) assert (expected == actual)
def create_circular_string(k): format_string = '{0:0' + str(k) + 'b}' strings = [format_string.format(x) for x in range(0, 2 ** k)] graph = debruijn.create_from_patterns(strings) cycle = eulerian.cycle.find_cycle(graph)[:-(k-1)] text = kmer_reconstruction.reconstruct(cycle) return text
def reconstruct(kmers): graph = debruijn.create_from_patterns(kmers) path = eulerian.path.find_path(graph) text = kmer_reconstruction.reconstruct(path) return text
import contigs import kmer_reconstruction import sys import debruijn filename = sys.argv[1] with open(filename, 'r') as f: lines = [l.strip() for l in f.readlines() if l.strip() != ''] graph = debruijn.create_from_patterns(lines) res = contigs.maximal_non_branching_paths(graph) cgs = [kmer_reconstruction.reconstruct(ks) for ks in res] output = '\n'.join(cgs) print(output)
import debruijn, sys mode = sys.argv[1] filename = sys.argv[2] with open(filename, 'r') as f: if mode == 'patterns': kmers = [line.strip() for line in f.readlines() if line.strip() != ''] graph = debruijn.create_from_patterns(kmers) else: k = int(f.readline()) text = f.readline() graph = debruijn.create(text, k) print(debruijn.to_string(graph))