def _FindPos_AllKMers(sequence,k): 'Returns a dictionary {kmer: [appearing_positions]} containing all the kmers in sequence' seqs = {} current_seq = [sequence[i] for i in range(k)] seqs = {utils.seq2str(current_seq): [0]} for i in range(1, len(sequence)-k+1): current_seq = current_seq[1:] + [sequence[i+k-1]] if utils.seq2str(current_seq) in seqs.keys(): seqs[utils.seq2str(current_seq)].append(i) else: seqs[utils.seq2str(current_seq)] = [i] return seqs
def solve_simple(self, simple_seq: Seq, state: State) -> typing.List[int]: self.log.info('solving simple sequence %s', seq2str(simple_seq)) self.push_task_stack(state, f'solve {seq2str(simple_seq)}') # search for the path assert self.pos == simple_seq[0] path = self.search_path(simple_seq[0], simple_seq[-1], len(simple_seq), len(simple_seq), True, state) # move to the end point # this may involve additional searching self.push_task_stack(state, f'move to {simple_seq[-1]}') move_stack = [[simple_seq[-1]]] while self.pos != move_stack[-1][0]: subpath = self.search_path(self.pos, simple_seq[-1], 0, 1024, False, state) move_stack.append(subpath) for subpath in reversed(move_stack[1:]): self.move_along_path(subpath, state) self.pop_task_stack(state) self.log.info('solved simple sequence %s: %s', simple_seq, path) self.pop_task_stack(state) for n in path: self.visited_nodes.add(n) return path
def _kmersWithAlterations_at_index(inp, curr_index, prev_combs, length, max_hamming, alphabet='ATCG'): 'Returns all the kmers with alterations at curr_index. It calculates from previous data, so we do not need to start from scratch at every index position' new_combs = {} if len(prev_combs) == 0: # Initialize if max_hamming > 0: for alph in alphabet: new_combs[alph] = 1 new_combs[inp[0]] = 0 else: # First, we eliminate the first character of each possible sequence so far current_combs = _update_prev_list(inp, curr_index, prev_combs, length, max_hamming) # Then, we generate the new batch by adding one character at the end for now_comb in current_combs: now_ham = current_combs[now_comb] if now_ham == max_hamming: new_combination = [x for x in now_comb] + [ inp[curr_index] ] # no permutate in this case new_distance = max_hamming new_combs[utils.seq2str( new_combination)] = new_combs.setdefault( utils.seq2str(new_combination), max_hamming) else: for alp in alphabet: new_combination = [x for x in now_comb ] + [alp] # no permutate in this case if alp == inp[curr_index]: new_distance = now_ham else: new_distance = now_ham + 1 new_combs[utils.seq2str(new_combination)] = min( new_combs.setdefault(utils.seq2str(new_combination), max_hamming), new_distance) return new_combs
def solve(graph: nx.DiGraph, i: int, seq: Seq, correct: SolvedSeq, savedir: str): i = i + 1 savedir = os.path.join(savedir, 'simulations') os.makedirs(savedir, exist_ok=True) agent = Agent(seq, f'Agent-{i}') state = State(graph, f'State-{i}') solved_seq = agent.solve(state) if solved_seq != correct: raise ValueError(f'{i}: incorrect solve') state.generate_progress(os.path.join(savedir, f'solve_{i}.pdf'), solved_seq, seq2str(seq))
def _update_prev_list(inp, curr_index, prev_combs, length, max_hamming): 'Returns the list of current possible d-distance sequences by eliminating the previous element of each sequence, merging equal resulting subsequences and updating hamming distances' # Propagate result if len(list(prev_combs.keys())[0]) != length: current_combs = prev_combs # First, we clean current_combinations (we drop first character) else: current_combs = {} for now_comb in prev_combs: # New combinations drop the first character (slide window) new_combination = now_comb[1:] if now_comb[0] == inp[max(curr_index - length, 0)]: # hamming distance does not change new_distance = prev_combs[now_comb] current_combs[utils.seq2str(new_combination)] = min( current_combs.setdefault(utils.seq2str(new_combination), max_hamming), new_distance) else: # the new sequence has shorter hamming entropy (we drop a difference) current_combs[utils.seq2str(new_combination)] = min( current_combs.setdefault(utils.seq2str(new_combination), max_hamming), prev_combs[now_comb] - 1) return current_combs
def draw(g: nx.DiGraph, sqs: typing.List[typing.Tuple[Seq, SolvedSeq]], title: str = ''): plt.figure(0, figsize=(10, 5)) plt.clf() if title: plt.suptitle(title) plt.subplot(1, 2, 1) nx.draw_networkx(g, pos=dict(g.nodes('pos'))) plt.subplot(1, 2, 2) for i, (s, ss) in enumerate(sqs): plt.text(0, i / len(sqs), '{: >2}: {}'.format(i, seq2str(s, aligned=True, align_size=2)), fontfamily='monospace') plt.ylim((0, 1)) plt.axis('off')
def solve(self, state: State): self.log.info('solving master sequence %s', seq2str(self.seq)) self.push_task_stack(state, f'solve {seq2str(self.seq)}') assert self.seq[0] == self.pos # always peek node type at the current position state.peek_node_type(self) # split sequence into subsequences that start and end with known nodes # and have only unknown nodes in between subseqs = split_seq(self.seq) solved_seq = [] for subseq in subseqs: # solve each simple sequence solved_subseq = self.solve_simple(subseq, state) solved_seq = join(solved_seq, solved_subseq) return solved_seq
def _run_or_rebot_from_cli(method, cliargs, usage, **argparser_config): LOGGER.register_file_logger() ap = utils.ArgumentParser(usage, get_full_version()) try: options, datasources = \ ap.parse_args(cliargs, argfile='argumentfile', unescape='escape', help='help', version='version', check_args=True, **argparser_config) except Information, msg: _exit(INFO_PRINTED, utils.unic(msg)) except DataError, err: _exit(DATA_ERROR, utils.unic(err)) LOGGER.info('Data sources: %s' % utils.seq2str(datasources)) try: suite = method(*datasources, **options) except DataError, err: _exit(DATA_ERROR, unicode(err)) except (KeyboardInterrupt, SystemExit): _exit(STOPPED_BY_USER, 'Execution stopped by user.') except: error, details = utils.get_error_details() _exit(FRAMEWORK_ERROR, 'Unexpected error: %s' % error, details) else: _exit(_failed_critical_test_count(suite)) def _failed_critical_test_count(suite): rc = suite.critical_stats.failed
def rebot_from_cli(args, usage): LOGGER.info(get_full_version('Rebot')) return _run_or_rebot_from_cli(run_rebot, args, usage) def _run_or_rebot_from_cli(method, cliargs, usage, **argparser_config): LOGGER.register_file_logger() try: options, datasources = _parse_arguments(cliargs, usage, **argparser_config) except Information, msg: print utils.encode_output(unicode(msg)) return INFO_PRINTED except DataError, err: _report_error(unicode(err), help=True) return DATA_ERROR LOGGER.info('Data sources: %s' % utils.seq2str(datasources)) return _execute(method, datasources, options) def _parse_arguments(cliargs, usage, **argparser_config): ap = utils.ArgumentParser(usage, get_full_version()) return ap.parse_args(cliargs, argfile='argumentfile', unescape='escape', help='help', version='version', check_args=True, **argparser_config) def _execute(method, datasources, options): try: suite = method(*datasources, **options) except DataError, err: _report_error(unicode(err), help=True) return DATA_ERROR except (KeyboardInterrupt, SystemExit):