def string_spelled_by_gapped_patterns(gapped_patterns, k, d): first_patterns = [gp[0] for gp in gapped_patterns] second_patterns = [gp[1] for gp in gapped_patterns] prefix_string = kmer_reconstruction.reconstruct(first_patterns) suffix_string = kmer_reconstruction.reconstruct(second_patterns) for i in range(k + d + 1, len(prefix_string)): p = prefix_string[i] s = suffix_string[i - k - d] if p != s: return None return prefix_string + suffix_string[-(k + d):]
def create_circular_string(k): format_string = '{0:0' + str(k) + 'b}' strings = [format_string.format(x) for x in range(0, 2 ** k)] graph = debruijn.create_from_patterns(strings) cycle = eulerian.cycle.find_cycle(graph)[:-(k-1)] text = kmer_reconstruction.reconstruct(cycle) return text
def reconstruct(kmers): graph = debruijn.create_from_patterns(kmers) path = eulerian.path.find_path(graph) text = kmer_reconstruction.reconstruct(path) return text
import contigs import kmer_reconstruction import sys import debruijn filename = sys.argv[1] with open(filename, 'r') as f: lines = [l.strip() for l in f.readlines() if l.strip() != ''] graph = debruijn.create_from_patterns(lines) res = contigs.maximal_non_branching_paths(graph) cgs = [kmer_reconstruction.reconstruct(ks) for ks in res] output = '\n'.join(cgs) print(output)
import kmer_reconstruction, sys filename = sys.argv[1] with open(filename, 'r') as f: kmers = [line.strip() for line in f.readlines() if line.strip() != ''] print(kmer_reconstruction.reconstruct(kmers))
def test_single_kmer(): kmers = ['ACCGA'] expected = 'ACCGA' assert (expected == reconstruct(kmers))
def test_normal_case(): kmers = ['ACCGA', 'CCGAA', 'CGAAG', 'GAAGC', 'AAGCT'] expected = 'ACCGAAGCT' assert (expected == reconstruct(kmers))
def test_empty_kmers(): kmers = [] expected = '' assert (expected == reconstruct(kmers))
def test_single__char_kmers(): kmers = ['A', 'C', 'C', 'G', 'A'] expected = 'ACCGA' assert (expected == reconstruct(kmers))