def _tandem_repeats_gt_ksize(): repeat = _get_random_sequence(ksize * 2) tandem_repeats = repeat * request.param consume_collector(tandem_repeats) check_fp_collector( (lambda G: count_decision_nodes(tandem_repeats, G, ksize), {})) return (repeat, tandem_repeats), request.param
def _suffix_circular(): sequence = linear_path() sequence += sequence[:ksize - 1] consume_collector(sequence) check_fp_collector( (lambda G: count_decision_nodes(sequence, G, ksize), {})) return sequence
def _linear_path(): try: sequence = sequence_generator.random_unitig(length) except ValueError: pytest.xfail('Greedy dead-end in sequence generation.') consume_collector(sequence) check_fp_collector( (lambda G: count_decision_nodes(sequence, G, ksize), {})) return sequence
def _right_sea(): root = sequence_generator.random_seed() seqs = tuple( sequence_generator.random_branches(root, n_branches=n_branches, n_tail_kmers=tip_length)) consume_collector(*seqs) check_fp_collector(*((lambda G: count_decision_nodes(s, G, ksize), { (0, n_branches): 1 }) for s in seqs)) return seqs
def _right_fork(): (core_sequence, tip), pivot = right_tip() branch_sequence = random_sequence() branch_sequence = tip + random_sequence()[:length - pivot - ksize] consume_collector(core_sequence, branch_sequence) check_fp_collector( (lambda G: count_decision_nodes(core_sequence, G, ksize), { (1, 2): 1 }), (lambda G: count_decision_nodes(branch_sequence, G, ksize), {})) return (core_sequence, branch_sequence), pivot
def _right_tip(): sequence = random_sequence() pivot = internal_pivot if pivot < 1: raise ValueError("ksize too large for length") # the branch kmer tip = mutate_position(sequence[pivot + 1:pivot + 1 + ksize], -1) consume_collector(sequence, tip) check_fp_collector( (lambda G: count_decision_nodes(sequence, G, ksize), { (1, 2): 1 }), (lambda G: count_decision_nodes(tip, G, ksize), {})) return (sequence, tip), pivot
def _tandem_quad_forks(): core = linear_path() S_l = (len(core) // 2) - ksize S_r = S_l + 1 left_branches = [kmer + random_sequence(exclude=kmer) \ for kmer in right_kmers(core[S_l:S_l+ksize]) \ if kmer not in core] right_branches = [kmer + random_sequence(exclude=kmer) \ for kmer in right_kmers(core[S_r:S_r+ksize]) \ if kmer not in core] consume_collector(core, *left_branches, *right_branches) check_fp_collector((lambda G: count_decision_nodes(core, G, ksize), {(1,4): 2}), *[(lambda G: count_decision_nodes(branch, G, ksize), {}) \ for branch in left_branches + right_branches]) return (core, left_branches, right_branches), S_l, S_r
def _left_hairpin(): core = linear_path() pos = len(core) // 2 if core[pos - 1] == core[-1]: core = mutate_position(core, -1) hdn = core[pos:pos + ksize] result = core + hdn _collector = consume_collector() _collector.pop() consume_collector(result) _check_fp_collector = check_fp_collector() _check_fp_collector.pop() check_fp_collector((lambda G: count_decision_nodes(result, G, ksize), { (2, 1): 2 })) return result, pos
def _bowtie_tangle(): top = linear_path() decision_segment = top[middle_pivot:middle_pivot + ksize + 2] decision_segment = mutate_position(decision_segment, 0) decision_segment = mutate_position(decision_segment, -1) bottom = random_sequence(exclude=decision_segment)[:middle_pivot] \ + decision_segment bottom += random_sequence(exclude=bottom)[:length - middle_pivot - len(decision_segment)] consume_collector(top, bottom) check_fp_collector((lambda G: count_decision_nodes(top, G, ksize), { (2, 2): 1 }), (lambda G: count_decision_nodes(bottom, G, ksize), { (2, 2): 1 })) return (top, bottom), middle_pivot
def _right_triple_fork(): (core_sequence, top_branch), pivot = right_fork() bottom_branch = random_sequence()[:length - pivot - ksize] # the branch sequence, mutated at position S+1 # choose a base not already represented at that position bases = {'A', 'C', 'G', 'T'} used = {core_sequence[pivot + ksize], top_branch[ksize - 1]} mutated = random.choice(list(bases - used)) bottom_branch = top_branch[:ksize - 1] + mutated + bottom_branch consume_collector(core_sequence, bottom_branch, top_branch) check_fp_collector( (lambda G: count_decision_nodes(core_sequence, G, ksize), { (1, 3): 1 }), (lambda G: count_decision_nodes(bottom_branch, G, ksize), {})) return (core_sequence, top_branch, bottom_branch), pivot
def _left_fork(): (core_sequence, branch), pivot = right_fork() core_sequence = revcomp(core_sequence) branch = revcomp(branch) pivot = length - pivot - ksize _collector = consume_collector() _collector.pop( ) # remove previous two fixtures that compose right_fork _collector.pop() consume_collector(core_sequence, branch) _collector = check_fp_collector() _collector.pop() _collector.pop() check_fp_collector( (lambda G: count_decision_nodes(core_sequence, G, ksize), { (2, 1): 1 }), (lambda G: count_decision_nodes(branch, G, ksize), {})) return (core_sequence, branch), pivot
def _snp_bubble(): wildtype_sequence = linear_path() decision_L = middle_pivot decision_R = decision_L + ksize + 1 if decision_L < 1: raise ValueError("ksize too long for length") snp_sequence = mutate_position(wildtype_sequence, decision_L + ksize) consume_collector(wildtype_sequence, snp_sequence) check_fp_collector( (lambda G: count_decision_nodes(wildtype_sequence, G, ksize), { (1, 2): 1, (2, 1): 1 }), (lambda G: count_decision_nodes(snp_sequence, G, ksize), { (1, 2): 1, (2, 1): 1 })) return (wildtype_sequence, snp_sequence), decision_L, decision_R
def _circular_key(): loop = linear_path() loop = loop + loop[:ksize - 1] if pivot in (length - ksize, length - ksize - 1): _pivot = pivot + ksize - 1 else: _pivot = pivot loop_kmers = list(kmers(loop, ksize)) tail_kmers = [ loop_kmers[i % len(loop_kmers)] for i in range(_pivot + 1, _pivot + 1 + ksize) ] tail = ''.join((kmer[0] for kmer in tail_kmers)) tail = mutate_position(tail, -1) consume_collector(loop, tail) check_fp_collector((lambda G: count_decision_nodes(loop, G, ksize), { (1, 2): 1 })) return (loop, tail), _pivot
def _suffix_circular_tangle(): base = suffix_circular() L = middle_pivot decision_segment = base[L:L + ksize + 1] decision_segment = mutate_position(decision_segment, 0) decision_segment = mutate_position(decision_segment, -1) inducer = random_sequence(exclude=decision_segment)[:L] \ + decision_segment inducer += random_sequence(exclude=inducer)[:length - L - len(decision_segment)] consume_collector(base, inducer) check_fp_collector((lambda G: count_decision_nodes(base, G, ksize), { (1, 2): 1, (2, 1): 1 }), (lambda G: count_decision_nodes(inducer, G, ksize), { (1, 2): 1, (2, 1): 1 })) return (base, inducer), L
def _hourglass_tangle(): top = linear_path() L = middle_pivot decision_segment = top[L:L + ksize + 1] decision_segment = mutate_position(decision_segment, 0) decision_segment = mutate_position(decision_segment, -1) bottom = random_sequence(exclude=decision_segment)[:L] \ + decision_segment bottom += random_sequence(exclude=bottom)[:length - L - len(decision_segment)] consume_collector(top, bottom) check_fp_collector((lambda G: count_decision_nodes(top, G, ksize), { (1, 2): 1, (2, 1): 1 }), (lambda G: count_decision_nodes(bottom, G, ksize), { (1, 2): 1, (2, 1): 1 })) return (top, bottom), L